Compare commits

..

1 Commits

Author SHA1 Message Date
Anton Blanchard 986881f258 Add a patch to route the NIA out to GPIOs 5 years ago

@ -1,109 +0,0 @@
name: 'test'

on:
push:
pull_request:
schedule:
- cron: '0 0 * * 5'
workflow_dispatch:

jobs:

build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
backend:
- llvm
- gcc
container: ghdl/vunit:${{ matrix.backend }}
steps:
- uses: actions/checkout@v2
- run: make GNATMAKE='gnatmake -j'$(nproc)

py:
needs: [build]
runs-on: ubuntu-latest
container: ghdl/vunit:llvm
steps:
- uses: actions/checkout@v2
- run: |
apt update
apt install -y python3-pexpect
make -j$(nproc) test_micropython test_micropython_long

test:
needs: [build]
strategy:
fail-fast: false
max-parallel: 3
matrix:
task: [
"tests_console",
"{1..99}",
"{100..199}",
"{200..299}",
"{300..399}",
"{400..499}",
"{500..599}",
"{600..699}",
"{700..799}",
"{800..899}",
"{900..999}",
]
runs-on: ubuntu-latest
container: ghdl/vunit:llvm
steps:
- uses: actions/checkout@v2
- run: bash -c "make -j$(nproc) ${{ matrix.task }}"

VUnit:
needs: [build]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: docker://ghdl/vunit:llvm
with:
args: python3 ./run.py -p10

symbiflow:
strategy:
fail-fast: false
max-parallel: 2
matrix:
task: [ ECP5-EVN, ORANGE-CRAB, ORANGE-CRAB-0.21 ]
runs-on: ubuntu-latest
env:
DOCKER: 1
FPGA_TARGET: ${{matrix.task}}
steps:
- uses: actions/checkout@v2
- run: make microwatt.json
- run: make microwatt.bit
- run: make microwatt.svf
- uses: actions/upload-artifact@v2
with:
name: ${{matrix.task}}-bitstream
path: microwatt.svf

# test building verilog target from yosys/nextpnr
verilog:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- run: make DOCKER=1 microwatt.v

verilator:
runs-on: ubuntu-latest
env:
DOCKER: 1
FPGA_TARGET: verilator
RAM_INIT_FILE: micropython/firmware.hex
MEMORY_SIZE: 524288
steps:
- uses: actions/checkout@v2
- run: |
sudo apt update
sudo apt install -y python3-pexpect
make -j$(nproc) test_micropython_verilator test_micropython_verilator_long

19
.gitignore vendored

@ -2,16 +2,9 @@
*~
*.cf
*.s
*.bit
*_out.config
microwatt.json
microwatt.svf
*_tb
main_ram.bin
tests/*/*.bin
tests/*/*.hex
tests/*/*.elf
TAGS
litedram/build/*
liteeth/build/*
obj_dir/*
core_tb
fetch_tb
loadstore_tb
soc_reset_tb
simple_ram_behavioural_tb
simple_ram_behavioural.bin

@ -0,0 +1,8 @@
language: minimal
install: skip

services: docker

before_install: docker pull ghdl/vunit:llvm

script: docker run --rm -t -v `pwd`:/build -w /build ghdl/vunit:llvm bash -c "apt update && apt install -y python3-pexpect && make GNATMAKE='gnatmake -j'$(nproc) && if [ -n \"$TRAVIS_FULL_CHECK\" ] ; then make -j$(nproc) check; else make -j$(nproc) check_light ; fi"

@ -1,343 +1,82 @@
GHDL ?= ghdl
GHDL=ghdl
GHDLFLAGS=--std=08
CFLAGS=-O3 -Wall
# Need to investigate why yosys is hitting verilator warnings, and eventually turn on -Wall
VERILATOR_FLAGS=-O3 -Wno-fatal -Wno-CASEOVERLAP -Wno-UNOPTFLAT #--trace
# It takes forever to build with optimisation, so disable by default
#VERILATOR_CFLAGS=-O3
CFLAGS=-O2 -Wall

# some yosys builds have ghdl plugin built in, otherwise need "-m ghdl"
GHDLSYNTH ?= $(shell ($(YOSYS) -H | grep -q ghdl) || echo -m ghdl)
YOSYS ?= yosys
NEXTPNR ?= nextpnr-ecp5
ECPPACK ?= ecppack
ECPPROG ?= ecpprog
OPENOCD ?= openocd
VUNITRUN ?= python3 ./run.py
VERILATOR ?= verilator
DFUUTIL ?= dfu-util
DFUSUFFIX ?= dfu-suffix

# We need a version of GHDL built with either the LLVM or gcc backend.
# Fedora provides this, but other distros may not. Another option is to use
# the Docker image.
DOCKER ?= 0
PODMAN ?= 0

ifeq ($(DOCKER), 1)
DOCKERBIN=docker
USE_DOCKER=1
endif

ifeq ($(PODMAN), 1)
DOCKERBIN=podman
USE_DOCKER=1
endif

ifeq ($(USE_DOCKER), 1)
PWD = $(shell pwd)
DOCKERARGS = run --rm -v $(PWD):/src:z -w /src
GHDL = $(DOCKERBIN) $(DOCKERARGS) ghdl/ghdl:buster-llvm-7 ghdl
CC = $(DOCKERBIN) $(DOCKERARGS) ghdl/ghdl:buster-llvm-7 gcc
GHDLSYNTH = -m ghdl
YOSYS = $(DOCKERBIN) $(DOCKERARGS) hdlc/ghdl:yosys yosys
NEXTPNR = $(DOCKERBIN) $(DOCKERARGS) hdlc/nextpnr:ecp5 nextpnr-ecp5
ECPPACK = $(DOCKERBIN) $(DOCKERARGS) hdlc/prjtrellis ecppack
OPENOCD = $(DOCKERBIN) $(DOCKERARGS) --device /dev/bus/usb hdlc/prog openocd
VUNITRUN = $(DOCKERBIN) $(DOCKERARGS) ghdl/vunit:llvm python3 ./run.py
VERILATOR = $(DOCKERBIN) $(DOCKERARGS) verilator/verilator:latest
endif

VUNITARGS += -p10

all = core_tb icache_tb dcache_tb dmi_dtm_tb \
wishbone_bram_tb soc_reset_tb
all = core_tb simple_ram_behavioural_tb soc_reset_tb
# XXX
# loadstore_tb fetch_tb

all: $(all)

core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl \
control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countbits.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl fpu.vhdl pmu.vhdl

soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \
spi_rxtx.vhdl spi_flash_ctrl.vhdl

uart_files = $(wildcard uart16550/*.v)

soc_sim_files = $(core_files) $(soc_files) sim_console.vhdl sim_pp_uart.vhdl sim_bram_helpers.vhdl \
sim_bram.vhdl sim_jtag_socket.vhdl sim_jtag.vhdl dmi_dtm_xilinx.vhdl \
sim_16550_uart.vhdl \
foreign_random.vhdl glibc_random.vhdl glibc_random_helpers.vhdl

soc_sim_c_files = sim_vhpi_c.c sim_bram_helpers_c.c sim_console_c.c \
sim_jtag_socket_c.c

soc_sim_obj_files=$(soc_sim_c_files:.c=.o)
comma := ,
soc_sim_link=$(patsubst %,-Wl$(comma)%,$(soc_sim_obj_files))

unisim_dir = sim-unisim
unisim_lib = $(unisim_dir)/unisim-obj08.cf
unisim_lib_files = $(unisim_dir)/BSCANE2.vhdl $(unisim_dir)/BUFG.vhdl \
$(unisim_dir)/unisim_vcomponents.vhdl
$(unisim_lib): $(unisim_lib_files)
$(GHDL) -i --std=08 --work=unisim --workdir=$(unisim_dir) $^
GHDLFLAGS += -P$(unisim_dir)

soc_tbs = core_tb icache_tb dcache_tb dmi_dtm_tb wishbone_bram_tb
soc_flash_tbs = core_flash_tb
soc_dram_tbs = dram_tb core_dram_tb

ifneq ($(FLASH_MODEL_PATH),)
fmf_dir = $(FLASH_MODEL_PATH)/fmf
fmf_lib = $(fmf_dir)/fmf-obj08.cf
fmf_lib_files = $(wildcard $(fmf_dir)/*.vhd)
GHDLFLAGS += -P$(fmf_dir)
$(fmf_lib): $(fmf_lib_files)
$(GHDL) -i --std=08 --work=fmf --workdir=$(fmf_dir) $^

flash_model_files=$(FLASH_MODEL_PATH)/s25fl128s.vhd
flash_model_files: $(fmf_lib)
else
flash_model_files=sim_no_flash.vhdl
fmf_lib=
endif

$(soc_flash_tbs): %: $(soc_sim_files) $(soc_sim_obj_files) $(unisim_lib) $(fmf_lib) $(flash_model_files) %.vhdl
$(GHDL) -c $(GHDLFLAGS) $(soc_sim_link) $(soc_sim_files) $(flash_model_files) $@.vhdl $(unisim_files) -e $@

$(soc_tbs): %: $(soc_sim_files) $(soc_sim_obj_files) $(unisim_lib) %.vhdl
$(GHDL) -c $(GHDLFLAGS) $(soc_sim_link) $(soc_sim_files) $@.vhdl -e $@

soc_reset_tb: fpga/soc_reset_tb.vhdl fpga/soc_reset.vhdl
$(GHDL) -c $(GHDLFLAGS) fpga/soc_reset_tb.vhdl fpga/soc_reset.vhdl -e $@

# LiteDRAM sim
VERILATOR_ROOT=$(shell verilator -getenv VERILATOR_ROOT 2>/dev/null)
ifeq (, $(VERILATOR_ROOT))
$(soc_dram_tbs):
$(error "Verilator is required to make this target !")
else

verilated_dram: litedram/generated/sim/litedram_core.v
verilator $(VERILATOR_FLAGS) -CFLAGS $(VERILATOR_CFLAGS) -Wno-fatal --cc $<
make -C obj_dir -f ../litedram/extras/sim_dram_verilate.mk VERILATOR_ROOT=$(VERILATOR_ROOT)

SIM_DRAM_CFLAGS = -I. -Iobj_dir -Ilitedram/generated/sim -I$(VERILATOR_ROOT)/include -I$(VERILATOR_ROOT)/include/vltstd
SIM_DRAM_CFLAGS += -DVM_COVERAGE=0 -DVM_SC=0 -DVM_TRACE=1 -DVL_PRINTF=printf -faligned-new
sim_litedram_c.o: litedram/extras/sim_litedram_c.cpp verilated_dram
$(CC) $(CPPFLAGS) $(SIM_DRAM_CFLAGS) $(CFLAGS) -c $< -o $@

soc_dram_files = $(core_files) $(soc_files) litedram/extras/litedram-wrapper-l2.vhdl litedram/generated/sim/litedram-initmem.vhdl
soc_dram_sim_files = $(soc_sim_files) litedram/extras/sim_litedram.vhdl
soc_dram_sim_obj_files = $(soc_sim_obj_files) sim_litedram_c.o
dram_link_files=-Wl,obj_dir/Vlitedram_core__ALL.a -Wl,obj_dir/verilated.o -Wl,obj_dir/verilated_vcd_c.o -Wl,-lstdc++
soc_dram_sim_link=$(patsubst %,-Wl$(comma)%,$(soc_dram_sim_obj_files)) $(dram_link_files)

$(soc_dram_tbs): %: $(soc_dram_files) $(soc_dram_sim_files) $(soc_dram_sim_obj_files) $(flash_model_files) $(unisim_lib) $(fmf_lib) %.vhdl
$(GHDL) -c $(GHDLFLAGS) $(soc_dram_sim_link) $(soc_dram_files) $(soc_dram_sim_files) $(flash_model_files) $@.vhdl -e $@
endif

# Hello world
MEMORY_SIZE ?=8192
RAM_INIT_FILE ?=hello_world/hello_world.hex

# Micropython
#MEMORY_SIZE=393216
#RAM_INIT_FILE=micropython/firmware.hex

FPGA_TARGET ?= ORANGE-CRAB-0.21

# FIXME: icache RAMs aren't being inferrenced as block RAMs on ECP5
# with yosys, so make it smaller for now as a workaround.
ICACHE_NUM_LINES=4

clkgen=fpga/clk_gen_ecp5.vhd
toplevel=fpga/top-generic.vhdl
dmi_dtm=dmi_dtm_dummy.vhdl
LITEDRAM_GHDL_ARG=

# OrangeCrab with ECP85 (original v0.0 with UM5G-85 chip)
ifeq ($(FPGA_TARGET), ORANGE-CRAB)
RESET_LOW=true
CLK_INPUT=48000000
CLK_FREQUENCY=48000000
LPF=constraints/orange-crab.lpf
PACKAGE=CSFBGA285
NEXTPNR_FLAGS=--um5g-85k --freq 48
OPENOCD_JTAG_CONFIG=openocd/olimex-arm-usb-tiny-h.cfg
OPENOCD_DEVICE_CONFIG=openocd/LFE5UM5G-85F.cfg
ECP_FLASH_OFFSET=0x80000
endif

# OrangeCrab with ECP85 (v0.21)
ifeq ($(FPGA_TARGET), ORANGE-CRAB-0.21)
RESET_LOW=true
CLK_INPUT=48000000
CLK_FREQUENCY=48000000
LPF=constraints/orange-crab-0.2.lpf
PACKAGE=CSFBGA285
NEXTPNR_FLAGS=--85k --speed 8 --freq 48 --timing-allow-fail --ignore-loops
OPENOCD_JTAG_CONFIG=openocd/olimex-arm-usb-tiny-h.cfg
OPENOCD_DEVICE_CONFIG=openocd/LFE5U-85F.cfg
DFU_VENDOR=1209
DFU_PRODUCT=5af0
ECP_FLASH_OFFSET=0x80000
toplevel=fpga/top-orangecrab0.2.vhdl
litedram_target=orangecrab-85-0.2
soc_extra_v += litesdcard/generated/lattice/litesdcard_core.v
dmi_dtm=dmi_dtm_ecp5.vhdl
endif

# ECP5-EVN
ifeq ($(FPGA_TARGET), ECP5-EVN)
RESET_LOW=true
CLK_INPUT=12000000
CLK_FREQUENCY=40000000
LPF=constraints/ecp5-evn.lpf
PACKAGE=CABGA381
NEXTPNR_FLAGS=--um5g-85k --freq 40
OPENOCD_JTAG_CONFIG=openocd/ecp5-evn.cfg
OPENOCD_DEVICE_CONFIG=openocd/LFE5UM5G-85F.cfg
endif

ifneq ($(litedram_target),)
soc_extra_synth += litedram/extras/litedram-wrapper-l2.vhdl \
litedram/generated/$(litedram_target)/litedram-initmem.vhdl
soc_extra_v += litedram/generated/$(litedram_target)/litedram_core.v
LITEDRAM_GHDL_ARG=-gUSE_LITEDRAM=true
endif

GHDL_IMAGE_GENERICS=-gMEMORY_SIZE=$(MEMORY_SIZE) -gRAM_INIT_FILE=$(RAM_INIT_FILE) \
-gRESET_LOW=$(RESET_LOW) -gCLK_INPUT=$(CLK_INPUT) -gCLK_FREQUENCY=$(CLK_FREQUENCY) -gICACHE_NUM_LINES=$(ICACHE_NUM_LINES) \
$(LITEDRAM_GHDL_ARG)


ifeq ($(FPGA_TARGET), verilator)
RESET_LOW=true
CLK_INPUT=50000000
CLK_FREQUENCY=50000000
clkgen=fpga/clk_gen_bypass.vhd
endif

fpga_files = fpga/soc_reset.vhdl \
fpga/pp_fifo.vhd fpga/pp_soc_uart.vhd fpga/main_bram.vhdl \
nonrandom.vhdl

synth_files = $(core_files) $(soc_files) $(soc_extra_synth) $(fpga_files) $(clkgen) $(toplevel) $(dmi_dtm)

microwatt.json: $(synth_files) $(RAM_INIT_FILE)
$(YOSYS) $(GHDLSYNTH) -p "ghdl --std=08 --no-formal $(GHDL_IMAGE_GENERICS) $(synth_files) -e toplevel; read_verilog $(uart_files) $(soc_extra_v); synth_ecp5 -abc9 -nowidelut -json $@ $(SYNTH_ECP5_FLAGS)"

microwatt.v: $(synth_files) $(RAM_INIT_FILE)
$(YOSYS) $(GHDLSYNTH) -p "ghdl --std=08 --no-formal $(GHDL_IMAGE_GENERICS) $(synth_files) -e toplevel; write_verilog $@"

microwatt-verilator: microwatt.v verilator/microwatt-verilator.cpp verilator/uart-verilator.c
$(VERILATOR) $(VERILATOR_FLAGS) -CFLAGS "$(VERILATOR_CFLAGS) -DCLK_FREQUENCY=$(CLK_FREQUENCY)" -Iuart16550 --assert --cc --exe --build $^ -o $@ -top-module toplevel
@cp -f obj_dir/microwatt-verilator microwatt-verilator

microwatt_out.config: microwatt.json $(LPF)
$(NEXTPNR) --json $< --lpf $(LPF) --textcfg $@.tmp $(NEXTPNR_FLAGS) --package $(PACKAGE)
mv -f $@.tmp $@

microwatt.bit: microwatt_out.config
$(ECPPACK) --compress --freq 38.8 --svf microwatt.svf $< $@

microwatt.svf: microwatt.bit

prog: microwatt.svf
$(OPENOCD) -f $(OPENOCD_JTAG_CONFIG) -f $(OPENOCD_DEVICE_CONFIG) -c "transport select jtag; init; svf $<; exit"

microwatt.dfu: microwatt.bit
cp $< $@.tmp
$(DFUSUFFIX) -v $(DFU_VENDOR) -p $(DFU_PRODUCT) -a $@.tmp
mv $@.tmp $@

dfuprog: microwatt.dfu
$(DFUUTIL) -a 0 -D $<

ecpprog: microwatt.bit
$(ECPPROG) -S $<

ecpflash: microwatt.bit
test -n "$(ECP_FLASH_OFFSET)" || (echo Error: No ECP_FLASH_OFFSET defined for target; exit 1)
$(ECPPROG) -o $(ECP_FLASH_OFFSET) $<
%.o : %.vhdl
$(GHDL) -a $(GHDLFLAGS) $<

common.o: decode_types.o
core_tb.o: common.o core.o soc.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o
cr_file.o: common.o
crhelpers.o: common.o
decode1.o: common.o decode_types.o
decode2.o: decode_types.o common.o helpers.o insn_helpers.o
decode_types.o:
execute1.o: decode_types.o common.o helpers.o crhelpers.o ppc_fx_insns.o sim_console.o
execute2.o: common.o crhelpers.o ppc_fx_insns.o
fetch1.o: common.o
fetch2.o: common.o wishbone_types.o
glibc_random_helpers.o:
glibc_random.o: glibc_random_helpers.o
helpers.o:
icache.o: common.o wishbone_types.o
insn_helpers.o:
loadstore1.o: common.o
loadstore2.o: common.o helpers.o wishbone_types.o
multiply_tb.o: common.o glibc_random.o ppc_fx_insns.o multiply.o
multiply.o: common.o decode_types.o ppc_fx_insns.o crhelpers.o
ppc_fx_insns.o: helpers.o
register_file.o: common.o
sim_console.o:
simple_ram_behavioural_helpers.o:
simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o
simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o
sim_uart.o: wishbone_types.o sim_console.o
soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o
wishbone_arbiter.o: wishbone_types.o
wishbone_types.o:
writeback.o: common.o
fpga/soc_reset_tb.o: fpga/soc_reset.o

soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o
$(GHDL) -e $(GHDLFLAGS) soc_reset_tb

core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o $@

fetch_tb: fetch_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

loadstore_tb: loadstore_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

simple_ram_tb: simple_ram_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@

tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out)))
tests_console = $(sort $(patsubst tests/%.console_out,%,$(wildcard tests/*.console_out)))

tests_console: $(tests_console)

check_vunit:
$(VUNITRUN) $(VUNITARGS)
check: $(tests) test_micropython test_micropython_long

check: $(tests) tests_console test_micropython test_micropython_long tests_unit

check_light: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 test_micropython test_micropython_long tests_console tests_unit
check_light: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 test_micropython test_micropython_long

$(tests): core_tb
@./scripts/run_test.sh $@

$(tests_console): core_tb
@./scripts/run_test_console.sh $@

test_micropython: core_tb
@./scripts/test_micropython.py

test_micropython_verilator: microwatt-verilator
@./scripts/test_micropython_verilator.py

test_micropython_long: core_tb
@./scripts/test_micropython_long.py

test_micropython_verilator_long: microwatt-verilator
@./scripts/test_micropython_verilator_long.py

tests_soc_tb = $(patsubst %_tb,%_tb_test,$(soc_tbs))

%_test: %
./$< --assert-level=error > /dev/null

tests_soc: $(tests_soc_tb)

# FIXME SOC tests have bit rotted, so disable for now
#tests_unit: tests_soc

TAGS:
find . -name '*.vhdl' | xargs ./scripts/vhdltags

.PHONY: TAGS

_clean:
rm -f *.o *.cf $(all)
rm -f fpga/*.o fpga/*.cf
rm -f sim-unisim/*.o sim-unisim/*.cf
rm -f litedram/extras/*.o
rm -f TAGS
rm -f scripts/mw_debug/*.o
rm -f scripts/mw_debug/mw_debug
rm -f microwatt.bin microwatt.json microwatt.svf microwatt_out.config
rm -f microwatt.v microwatt-verilator
rm -rf obj_dir/

clean: _clean
make -f scripts/mw_debug/Makefile clean
make -f hello_world/Makefile clean

distclean: _clean
rm -f *~ fpga/*~ lib/*~ console/*~ include/*~
rm -rf litedram/build
rm -f litedram/extras/*~
rm -f litedram/gen-src/*~
rm -f litedram/gen-src/sdram_init/*~
make -f scripts/mw_debug/Makefile distclean
make -f hello_world/Makefile distclean

.PHONY: all prog check check_light clean distclean
.PRECIOUS: microwatt.json microwatt_out.config microwatt.bit
clean:
rm -f *.o work-*cf $(all)

@ -1,7 +1,3 @@
<p align="center">
<img src="media/microwatt-title.png" alt="Microwatt">
</p>

# Microwatt

A tiny Open POWER ISA softcore written in VHDL 2008. It aims to be simple and easy
@ -12,62 +8,32 @@ to understand.
<img src="http://neuling.org/microwatt-micropython.gif" alt="MicroPython running on Microwatt"/>
</p>

You can try out Microwatt/Micropython without hardware by using the ghdl simulator. If you want to build directly for a hardware target board, see below.

- Build micropython. If you aren't building on a ppc64le box you
will need a cross compiler. If it isn't available on your distro
grab the powerpc64le-power8 toolchain from https://toolchains.bootlin.com.
You may need to set the CROSS_COMPILE environment variable
to the prefix used for your cross compilers. The default is
powerpc64le-linux-gnu-.
grab the powerpc64le-power8 toolchain from https://toolchains.bootlin.com

```
git clone https://github.com/micropython/micropython.git
git clone https://github.com/mikey/micropython
cd micropython
git checkout powerpc
cd ports/powerpc
make -j$(nproc)
cd ../../../
```

A prebuilt micropython image is also available in the micropython/ directory.

- Microwatt uses ghdl for simulation. Either install this from your
distro or build it. Microwatt requires ghdl to be built with the LLVM
or gcc backend, which not all distros do (Fedora does, Debian/Ubuntu
appears not to). ghdl with the LLVM backend is likely easier to build.

If building ghdl from scratch is too much for you, the microwatt Makefile
supports using Docker or Podman.

- Next build microwatt:
distro or build it. Next build microwatt:

```
git clone https://github.com/antonblanchard/microwatt
cd microwatt
make
```

To build using Docker:
```
make DOCKER=1
```

and to build using Podman:

```
make PODMAN=1
```

- Link in the micropython image:

```
ln -s ../micropython/ports/powerpc/build/firmware.bin main_ram.bin
```

Or if you were using the pre-built image:

```
ln -s micropython/firmware.bin main_ram.bin
ln -s ../micropython/ports/powerpc/build/firmware.bin simple_ram_behavioural.bin
```

- Now run microwatt, sending debug output to /dev/null:
@ -91,30 +57,20 @@ source /opt/Xilinx/Vivado/2019.1/settings64.sh
```
pip3 install --user -U fusesoc
```
Fedora users can get FuseSoC package via
```
sudo dnf copr enable sharkcz/danny
sudo dnf install fusesoc
```

- If this is your first time using fusesoc, initialize fusesoc.
This is needed to be able to pull down fussoc library components referenced
by microwatt. Run
- Create a working directory and point FuseSoC at microwatt:

```
fusesoc init
fusesoc fetch uart16550
fusesoc library add microwatt /path/to/microwatt
mkdir microwatt-fusesoc
cd microwatt-fusesoc
fusesoc library add microwatt /path/to/microwatt/
```

- Build using FuseSoC. For hello world (Replace nexys_video with your FPGA board such as --target=arty_a7-100):
You may wish to ensure you have [installed Digilent Board files](https://reference.digilentinc.com/vivado/installing-vivado/start#installing_digilent_board_files)
or appropriate files for your board first.
- Build using FuseSoC. For hello world (Replace nexys_video with your FPGA board):

```
fusesoc run --target=nexys_video microwatt --memory_size=16384 --ram_init_file=/path/to/microwatt/fpga/hello_world.hex
fusesoc run --target=nexys_video microwatt --memory_size=8192 --ram_init_file=/path/to/microwatt/fpga/hello_world.hex
```
You should then be able to see output via the serial port of the board (/dev/ttyUSB1, 115200 for example assuming standard clock speeds). There is a know bug where initial output may not be sent - try the reset (not programming button) on your board if you don't see anything.

- To build micropython (currently requires 1MB of BRAM eg an Artix-7 A200):

@ -122,68 +78,6 @@ You should then be able to see output via the serial port of the board (/dev/tty
fusesoc run --target=nexys_video microwatt
```

## Linux on Microwatt

Mainline Linux supports Microwatt as of v5.14. The Arty A7 is the best tested
platform, but it's also been tested on the OrangeCrab and ButterStick.

1. Use buildroot to create a userspace

A small change is required to glibc in order to support the VMX/AltiVec-less
Microwatt, as float128 support is mandiatory and for this in GCC requires
VSX/AltiVec. This change is included in Joel's buildroot fork, along with a
defconfig:
```
git clone -b microwatt https://github.com/shenki/buildroot
cd buildroot
make ppc64le_microwatt_defconfig
make
```

The output is `output/images/rootfs.cpio`.

2. Build the Linux kernel
```
git clone https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
cd linux
make ARCH=powerpc microwatt_defconfig
make ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- \
CONFIG_INITRAMFS_SOURCE=/buildroot/output/images/rootfs.cpio -j`nproc`
```

The output is `arch/powerpc/boot/dtbImage.microwatt.elf`.

3. Build gateware using FuseSoC

First configure FuseSoC as above.
```
fusesoc run --build --target=arty_a7-100 microwatt --no_bram --memory_size=0
```

The output is `build/microwatt_0/arty_a7-100-vivado/microwatt_0.bit`.

4. Program the flash

This operation will overwrite the contents of your flash.

For the Arty A7 A100, set `FLASH_ADDRESS` to `0x400000` and pass `-f a100`.

For the Arty A7 A35, set `FLASH_ADDRESS` to `0x300000` and pass `-f a35`.
```
microwatt/openocd/flash-arty -f a100 build/microwatt_0/arty_a7-100-vivado/microwatt_0.bit
microwatt/openocd/flash-arty -f a100 dtbImage.microwatt.elf -t bin -a $FLASH_ADDRESS
```

5. Connect to the second USB TTY device exposed by the FPGA

```
minicom -D /dev/ttyUSB1
```

The gateware has firmware that will look at `FLASH_ADDRESS` and attempt to
parse an ELF there, loading it to the address specified in the ELF header
and jumping to it.

## Testing

- A simple test suite containing random execution test cases and a couple of
@ -195,5 +89,9 @@ make -j$(nproc) check

## Issues

- There are a few instructions still to be implemented:
- Vector/VMX/VSX
This is functional, but very simple. We still have quite a lot to do:

- Need to implement a simple non pipelined divide
- There are a few instructions still to be implemented
- Need to add caches and bypassing (in progress)
- Need to add supervisor state (in progress)

@ -1,85 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;

entity cache_ram is
generic(
ROW_BITS : integer := 16;
WIDTH : integer := 64;
TRACE : boolean := false;
ADD_BUF : boolean := false
);

port(
clk : in std_logic;
rd_en : in std_logic;
rd_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
rd_data : out std_logic_vector(WIDTH - 1 downto 0);
wr_sel : in std_logic_vector(WIDTH/8 - 1 downto 0);
wr_addr : in std_logic_vector(ROW_BITS - 1 downto 0);
wr_data : in std_logic_vector(WIDTH - 1 downto 0)
);

end cache_ram;

architecture rtl of cache_ram is
constant SIZE : integer := 2**ROW_BITS;

type ram_type is array (0 to SIZE - 1) of std_logic_vector(WIDTH - 1 downto 0);
signal ram : ram_type;
attribute ram_style : string;
attribute ram_style of ram : signal is "block";

signal rd_data0 : std_logic_vector(WIDTH - 1 downto 0);

begin
process(clk)
variable lbit : integer range 0 to WIDTH - 1;
variable mbit : integer range 0 to WIDTH - 1;
variable widx : integer range 0 to SIZE - 1;
constant sel0 : std_logic_vector(WIDTH/8 - 1 downto 0)
:= (others => '0');
begin
if rising_edge(clk) then
if TRACE then
if wr_sel /= sel0 then
report "write a:" & to_hstring(wr_addr) &
" sel:" & to_hstring(wr_sel) &
" dat:" & to_hstring(wr_data);
end if;
end if;
for i in 0 to WIDTH/8-1 loop
lbit := i * 8;
mbit := lbit + 7;
widx := to_integer(unsigned(wr_addr));
if wr_sel(i) = '1' then
ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit);
end if;
end loop;
if rd_en = '1' then
rd_data0 <= ram(to_integer(unsigned(rd_addr)));
if TRACE then
report "read a:" & to_hstring(rd_addr) &
" dat:" & to_hstring(ram(to_integer(unsigned(rd_addr))));
end if;
end if;
end if;
end process;

buf: if ADD_BUF generate
begin
process(clk)
begin
if rising_edge(clk) then
rd_data <= rd_data0;
end if;
end process;
end generate;

nobuf: if not ADD_BUF generate
begin
rd_data <= rd_data0;
end generate;

end;

@ -1,792 +1,196 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.decode_types.all;

package common is
-- Processor Version Number
constant PVR_MICROWATT : std_ulogic_vector(31 downto 0) := x"00630000";

-- MSR bit numbers
constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode
constant MSR_EE : integer := (63 - 48); -- External interrupt Enable
constant MSR_PR : integer := (63 - 49); -- PRoblem state
constant MSR_FP : integer := (63 - 50); -- Floating Point available
constant MSR_FE0 : integer := (63 - 52); -- Floating Exception mode
constant MSR_SE : integer := (63 - 53); -- Single-step bit of TE field
constant MSR_BE : integer := (63 - 54); -- Branch trace bit of TE field
constant MSR_FE1 : integer := (63 - 55); -- Floating Exception mode
constant MSR_IR : integer := (63 - 58); -- Instruction Relocation
constant MSR_DR : integer := (63 - 59); -- Data Relocation
constant MSR_PMM : integer := (63 - 61); -- Performance Monitor Mark
constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt
constant MSR_LE : integer := (63 - 63); -- Little Endian

-- SPR numbers
subtype spr_num_t is integer range 0 to 1023;

function decode_spr_num(insn: std_ulogic_vector(31 downto 0)) return spr_num_t;

constant SPR_XER : spr_num_t := 1;
constant SPR_LR : spr_num_t := 8;
constant SPR_CTR : spr_num_t := 9;
constant SPR_TAR : spr_num_t := 815;
constant SPR_DSISR : spr_num_t := 18;
constant SPR_DAR : spr_num_t := 19;
constant SPR_TB : spr_num_t := 268;
constant SPR_TBU : spr_num_t := 269;
constant SPR_DEC : spr_num_t := 22;
constant SPR_SRR0 : spr_num_t := 26;
constant SPR_SRR1 : spr_num_t := 27;
constant SPR_CFAR : spr_num_t := 28;
constant SPR_HSRR0 : spr_num_t := 314;
constant SPR_HSRR1 : spr_num_t := 315;
constant SPR_SPRG0 : spr_num_t := 272;
constant SPR_SPRG1 : spr_num_t := 273;
constant SPR_SPRG2 : spr_num_t := 274;
constant SPR_SPRG3 : spr_num_t := 275;
constant SPR_SPRG3U : spr_num_t := 259;
constant SPR_HSPRG0 : spr_num_t := 304;
constant SPR_HSPRG1 : spr_num_t := 305;
constant SPR_PID : spr_num_t := 48;
constant SPR_PTCR : spr_num_t := 464;
constant SPR_PVR : spr_num_t := 287;

-- PMU registers
constant SPR_UPMC1 : spr_num_t := 771;
constant SPR_UPMC2 : spr_num_t := 772;
constant SPR_UPMC3 : spr_num_t := 773;
constant SPR_UPMC4 : spr_num_t := 774;
constant SPR_UPMC5 : spr_num_t := 775;
constant SPR_UPMC6 : spr_num_t := 776;
constant SPR_UMMCR0 : spr_num_t := 779;
constant SPR_UMMCR1 : spr_num_t := 782;
constant SPR_UMMCR2 : spr_num_t := 769;
constant SPR_UMMCRA : spr_num_t := 770;
constant SPR_USIER : spr_num_t := 768;
constant SPR_USIAR : spr_num_t := 780;
constant SPR_USDAR : spr_num_t := 781;
constant SPR_PMC1 : spr_num_t := 787;
constant SPR_PMC2 : spr_num_t := 788;
constant SPR_PMC3 : spr_num_t := 789;
constant SPR_PMC4 : spr_num_t := 790;
constant SPR_PMC5 : spr_num_t := 791;
constant SPR_PMC6 : spr_num_t := 792;
constant SPR_MMCR0 : spr_num_t := 795;
constant SPR_MMCR1 : spr_num_t := 798;
constant SPR_MMCR2 : spr_num_t := 785;
constant SPR_MMCRA : spr_num_t := 786;
constant SPR_SIER : spr_num_t := 784;
constant SPR_SIAR : spr_num_t := 796;
constant SPR_SDAR : spr_num_t := 797;

-- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0);

-- Extended GPR index (can hold an SPR or a FPR)
subtype gspr_index_t is std_ulogic_vector(6 downto 0);

-- FPR indices
subtype fpr_index_t is std_ulogic_vector(4 downto 0);

-- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31.
--
-- The function fast_spr_num() returns the corresponding fast
-- pseudo-GPR number for a given SPR number. The result MSB
-- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file.
--
-- FPRs are also stored in the register file, using GSPR
-- numbers from 64 to 95.
--
function fast_spr_num(spr: spr_num_t) return gspr_index_t;

-- Indices conversion functions
function gspr_to_gpr(i: gspr_index_t) return gpr_index_t;
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic;
function fpr_to_gspr(f: fpr_index_t) return gspr_index_t;

-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write
-- control). The rest is stored as a fast SPR.
type xer_common_t is record
ca : std_ulogic;
ca32 : std_ulogic;
ov : std_ulogic;
ov32 : std_ulogic;
so : std_ulogic;
end record;
constant xerc_init : xer_common_t := (others => '0');

-- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32;
constant FPSCR_FEX : integer := 63 - 33;
constant FPSCR_VX : integer := 63 - 34;
constant FPSCR_OX : integer := 63 - 35;
constant FPSCR_UX : integer := 63 - 36;
constant FPSCR_ZX : integer := 63 - 37;
constant FPSCR_XX : integer := 63 - 38;
constant FPSCR_VXSNAN : integer := 63 - 39;
constant FPSCR_VXISI : integer := 63 - 40;
constant FPSCR_VXIDI : integer := 63 - 41;
constant FPSCR_VXZDZ : integer := 63 - 42;
constant FPSCR_VXIMZ : integer := 63 - 43;
constant FPSCR_VXVC : integer := 63 - 44;
constant FPSCR_FR : integer := 63 - 45;
constant FPSCR_FI : integer := 63 - 46;
constant FPSCR_C : integer := 63 - 47;
constant FPSCR_FL : integer := 63 - 48;
constant FPSCR_FG : integer := 63 - 49;
constant FPSCR_FE : integer := 63 - 50;
constant FPSCR_FU : integer := 63 - 51;
constant FPSCR_VXSOFT : integer := 63 - 53;
constant FPSCR_VXSQRT : integer := 63 - 54;
constant FPSCR_VXCVI : integer := 63 - 55;
constant FPSCR_VE : integer := 63 - 56;
constant FPSCR_OE : integer := 63 - 57;
constant FPSCR_UE : integer := 63 - 58;
constant FPSCR_ZE : integer := 63 - 59;
constant FPSCR_XE : integer := 63 - 60;
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;

-- Real addresses
-- REAL_ADDR_BITS is the number of real address bits that we store
constant REAL_ADDR_BITS : positive := 56;
subtype real_addr_t is std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t;

-- Used for tracking instruction completion and pending register writes
constant TAG_COUNT : positive := 4;
constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
subtype tag_number_t is integer range 0 to TAG_COUNT - 1;
subtype tag_index_t is unsigned(TAG_NUMBER_BITS - 1 downto 0);
type instr_tag_t is record
tag : tag_number_t;
valid : std_ulogic;
end record;
constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0');
function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean;

subtype intr_vector_t is integer range 0 to 16#fff#;

-- For now, fixed 16 sources, make this either a parametric
-- package of some sort or an unconstrainted array.
type ics_to_icp_t is record
-- Level interrupts only, ICS just keeps prsenting the
-- highest priority interrupt. Once handling edge, something
-- smarter involving handshake & reject support will be needed
src : std_ulogic_vector(3 downto 0);
pri : std_ulogic_vector(7 downto 0);
end record;

-- This needs to die...
type ctrl_t is record
tb: std_ulogic_vector(63 downto 0);
dec: std_ulogic_vector(63 downto 0);
msr: std_ulogic_vector(63 downto 0);
cfar: std_ulogic_vector(63 downto 0);
end record;

type Fetch1ToIcacheType is record
req: std_ulogic;
virt_mode : std_ulogic;
priv_mode : std_ulogic;
big_endian : std_ulogic;
stop_mark: std_ulogic;
predicted : std_ulogic;
pred_ntaken : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
end record;

type IcacheToDecode1Type is record
valid: std_ulogic;
stop_mark: std_ulogic;
fetch_failed: std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
big_endian: std_ulogic;
next_predicted: std_ulogic;
next_pred_ntaken: std_ulogic;
end record;

type IcacheEventType is record
icache_miss : std_ulogic;
itlb_miss_resolved : std_ulogic;
end record;

type Decode1ToDecode2Type is record
valid: std_ulogic;
stop_mark : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
ispro: gspr_index_t; -- (G)SPR written with LR or CTR
decode: decode_rom_t;
br_pred: std_ulogic; -- Branch was predicted to be taken
big_endian: std_ulogic;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'),
decode => decode_rom_init, br_pred => '0', big_endian => '0');

type Decode1ToFetch1Type is record
redirect : std_ulogic;
redirect_nia : std_ulogic_vector(63 downto 0);
end record;

type bypass_data_t is record
tag : instr_tag_t;
data : std_ulogic_vector(63 downto 0);
end record;
constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0'));

type cr_bypass_data_t is record
tag : instr_tag_t;
data : std_ulogic_vector(31 downto 0);
end record;
constant cr_bypass_data_init : cr_bypass_data_t := (tag => instr_tag_init, data => (others => '0'));

type Decode2ToExecute1Type is record
valid: std_ulogic;
unit : unit_t;
fac : facility_t;
insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0);
instr_tag : instr_tag_t;
write_reg: gspr_index_t;
write_reg_enable: std_ulogic;
read_reg1: gspr_index_t;
read_reg2: gspr_index_t;
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
cr: std_ulogic_vector(31 downto 0);
xerc: xer_common_t;
lr: std_ulogic;
br_abs: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
invert_a: std_ulogic;
addm1 : std_ulogic;
invert_out: std_ulogic;
input_carry: carry_in_t;
output_carry: std_ulogic;
input_cr: std_ulogic;
output_cr: std_ulogic;
output_xer: std_ulogic;
is_32bit: std_ulogic;
is_signed: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
data_len: std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
reserve : std_ulogic; -- set for larx/stcx
br_pred : std_ulogic;
result_sel : std_ulogic_vector(2 downto 0); -- select source of result
sub_select : std_ulogic_vector(2 downto 0); -- sub-result selection
repeat : std_ulogic; -- set if instruction is cracked into two ops
second : std_ulogic; -- set if this is the second op
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0',
lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0',
output_cr => '0', output_xer => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),
read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'),
cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'),
result_sel => "000", sub_select => "000",
repeat => '0', second => '0', others => (others => '0'));

type MultiplyInputType is record
valid: std_ulogic;
data1: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(63 downto 0);
addend: std_ulogic_vector(127 downto 0);
is_32bit: std_ulogic;
not_result: std_ulogic;
end record;
constant MultiplyInputInit : MultiplyInputType := (valid => '0',
is_32bit => '0', not_result => '0',
others => (others => '0'));

type MultiplyOutputType is record
valid: std_ulogic;
result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic;
end record;
constant MultiplyOutputInit : MultiplyOutputType := (valid => '0', overflow => '0',
others => (others => '0'));

type Execute1ToDividerType is record
valid: std_ulogic;
dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0);
is_signed: std_ulogic;
is_32bit: std_ulogic;
is_extended: std_ulogic;
is_modulus: std_ulogic;
neg_result: std_ulogic;
end record;
constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
neg_result => '0', others => (others => '0'));

type PMUEventType is record
no_instr_avail : std_ulogic;
dispatch : std_ulogic;
ext_interrupt : std_ulogic;
instr_complete : std_ulogic;
fp_complete : std_ulogic;
ld_complete : std_ulogic;
st_complete : std_ulogic;
br_taken_complete : std_ulogic;
br_mispredict : std_ulogic;
ipref_discard : std_ulogic;
itlb_miss : std_ulogic;
itlb_miss_resolved : std_ulogic;
icache_miss : std_ulogic;
dc_miss_resolved : std_ulogic;
dc_load_miss : std_ulogic;
dc_ld_miss_resolved : std_ulogic;
dc_store_miss : std_ulogic;
dtlb_miss : std_ulogic;
dtlb_miss_resolved : std_ulogic;
ld_miss_nocache : std_ulogic;
ld_fill_nocache : std_ulogic;
end record;
constant PMUEventInit : PMUEventType := (others => '0');

type Execute1ToPMUType is record
mfspr : std_ulogic;
mtspr : std_ulogic;
spr_num : std_ulogic_vector(4 downto 0);
spr_val : std_ulogic_vector(63 downto 0);
tbbits : std_ulogic_vector(3 downto 0); -- event bits from timebase
pmm_msr : std_ulogic; -- PMM bit from MSR
pr_msr : std_ulogic; -- PR bit from MSR
run : std_ulogic;
nia : std_ulogic_vector(63 downto 0);
addr : std_ulogic_vector(63 downto 0);
addr_v : std_ulogic;
occur : PMUEventType;
end record;

type PMUToExecute1Type is record
spr_val : std_ulogic_vector(63 downto 0);
intr : std_ulogic;
end record;

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;
read1_reg : gspr_index_t;
read2_enable : std_ulogic;
read2_reg : gspr_index_t;
read3_enable : std_ulogic;
read3_reg : gspr_index_t;
end record;

type RegisterFileToDecode2Type is record
read1_data : std_ulogic_vector(63 downto 0);
read2_data : std_ulogic_vector(63 downto 0);
read3_data : std_ulogic_vector(63 downto 0);
end record;

type Decode2ToCrFileType is record
read : std_ulogic;
end record;

type CrFileToDecode2Type is record
read_cr_data : std_ulogic_vector(31 downto 0);
read_xerc_data : xer_common_t;
end record;

type Execute1ToLoadstore1Type is record
valid : std_ulogic;
op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
instr_tag : instr_tag_t;
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
ci : std_ulogic; -- cache-inhibited load/store
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
xerc : xer_common_t;
reserve : std_ulogic; -- set for larx/stcx.
rc : std_ulogic; -- set for stcx.
virt_mode : std_ulogic; -- do translation through TLB
priv_mode : std_ulogic; -- privileged mode (MSR[PR] = 0)
mode_32bit : std_ulogic; -- trim addresses to 32 bits
is_32bit : std_ulogic;
repeat : std_ulogic;
second : std_ulogic;
msr : std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
(valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
nia => (others => '0'), insn => (others => '0'),
instr_tag => instr_tag_init,
addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
write_reg => (others => '0'),
length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0',
msr => (others => '0'));

type Loadstore1ToExecute1Type is record
busy : std_ulogic;
in_progress : std_ulogic;
interrupt : std_ulogic;
end record;

type Loadstore1ToDcacheType is record
valid : std_ulogic;
hold : std_ulogic;
load : std_ulogic; -- is this a load
dcbz : std_ulogic;
nc : std_ulogic;
reserve : std_ulogic;
atomic : std_ulogic; -- part of a multi-transfer atomic op
atomic_last : std_ulogic;
virt_mode : std_ulogic;
priv_mode : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1
byte_sel : std_ulogic_vector(7 downto 0);
end record;

type DcacheToLoadstore1Type is record
valid : std_ulogic;
data : std_ulogic_vector(63 downto 0);
store_done : std_ulogic;
error : std_ulogic;
cache_paradox : std_ulogic;
end record;

type DcacheEventType is record
load_miss : std_ulogic;
store_miss : std_ulogic;
dcache_refill : std_ulogic;
dtlb_miss : std_ulogic;
dtlb_miss_resolved : std_ulogic;
end record;

type Loadstore1ToMmuType is record
valid : std_ulogic;
tlbie : std_ulogic;
slbia : std_ulogic;
mtspr : std_ulogic;
iside : std_ulogic;
load : std_ulogic;
priv : std_ulogic;
sprn : std_ulogic_vector(9 downto 0);
addr : std_ulogic_vector(63 downto 0);
rs : std_ulogic_vector(63 downto 0);
end record;

type MmuToLoadstore1Type is record
done : std_ulogic;
err : std_ulogic;
invalid : std_ulogic;
badtree : std_ulogic;
segerr : std_ulogic;
perm_error : std_ulogic;
rc_error : std_ulogic;
sprval : std_ulogic_vector(63 downto 0);
end record;

type MmuToDcacheType is record
valid : std_ulogic;
tlbie : std_ulogic;
doall : std_ulogic;
tlbld : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
pte : std_ulogic_vector(63 downto 0);
end record;

type DcacheToMmuType is record
stall : std_ulogic;
done : std_ulogic;
err : std_ulogic;
data : std_ulogic_vector(63 downto 0);
end record;

type MmuToIcacheType is record
tlbld : std_ulogic;
tlbie : std_ulogic;
doall : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
pte : std_ulogic_vector(63 downto 0);
end record;

type Loadstore1ToWritebackType is record
valid : std_ulogic;
instr_tag : instr_tag_t;
write_enable: std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
xerc : xer_common_t;
rc : std_ulogic;
store_done : std_ulogic;
interrupt : std_ulogic;
intr_vec : intr_vector_t;
srr0: std_ulogic_vector(63 downto 0);
srr1: std_ulogic_vector(15 downto 0);
end record;
constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType :=
(valid => '0', instr_tag => instr_tag_init, write_enable => '0',
write_reg => (others => '0'), write_data => (others => '0'),
xerc => xerc_init, rc => '0', store_done => '0',
interrupt => '0', intr_vec => 0,
srr0 => (others => '0'), srr1 => (others => '0'));

type Loadstore1EventType is record
load_complete : std_ulogic;
store_complete : std_ulogic;
itlb_miss : std_ulogic;
end record;

type Execute1ToWritebackType is record
valid: std_ulogic;
instr_tag : instr_tag_t;
rc : std_ulogic;
mode_32bit : std_ulogic;
write_enable : std_ulogic;
write_reg: gspr_index_t;
write_data: std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
interrupt : std_ulogic;
intr_vec : intr_vector_t;
redirect: std_ulogic;
redir_mode: std_ulogic_vector(3 downto 0);
last_nia: std_ulogic_vector(63 downto 0);
br_offset: std_ulogic_vector(63 downto 0);
br_last: std_ulogic;
br_taken: std_ulogic;
abs_br: std_ulogic;
srr1: std_ulogic_vector(15 downto 0);
msr: std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType :=
(valid => '0', instr_tag => instr_tag_init, rc => '0', mode_32bit => '0',
write_enable => '0', write_cr_enable => '0',
write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000",
last_nia => (others => '0'), br_offset => (others => '0'),
br_last => '0', br_taken => '0', abs_br => '0',
srr1 => (others => '0'), msr => (others => '0'));

type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
itag : instr_tag_t;
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
frt : gspr_index_t;
rc : std_ulogic;
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
itag => instr_tag_init,
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
single => '0', out_cr => '0');

type FPUToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
end record;
constant FPUToExecute1Init : FPUToExecute1Type := (others => '0');

type FPUToWritebackType is record
valid : std_ulogic;
interrupt : std_ulogic;
instr_tag : instr_tag_t;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
intr_vec : intr_vector_t;
srr0 : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0);
end record;
constant FPUToWritebackInit : FPUToWritebackType :=
(valid => '0', interrupt => '0', instr_tag => instr_tag_init,
write_enable => '0', write_reg => (others => '0'),
write_cr_enable => '0', write_cr_mask => (others => '0'),
write_cr_data => (others => '0'),
intr_vec => 0, srr1 => (others => '0'),
others => (others => '0'));

type DividerToExecute1Type is record
valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0);
overflow : std_ulogic;
end record;
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type WritebackToFetch1Type is record
redirect: std_ulogic;
virt_mode: std_ulogic;
priv_mode: std_ulogic;
big_endian: std_ulogic;
mode_32bit: std_ulogic;
redirect_nia: std_ulogic_vector(63 downto 0);
br_nia : std_ulogic_vector(63 downto 0);
br_last : std_ulogic;
br_taken : std_ulogic;
end record;
constant WritebackToFetch1Init : WritebackToFetch1Type :=
(redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0',
mode_32bit => '0', redirect_nia => (others => '0'),
br_last => '0', br_taken => '0', br_nia => (others => '0'));

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic;
end record;
constant WritebackToRegisterFileInit : WritebackToRegisterFileType :=
(write_enable => '0', write_data => (others => '0'), others => (others => '0'));

type WritebackToCrFileType is record
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
write_xerc_data : xer_common_t;
end record;
constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', write_xerc_enable => '0',
write_xerc_data => xerc_init,
write_cr_mask => (others => '0'),
write_cr_data => (others => '0'));

type WritebackEventType is record
instr_complete : std_ulogic;
fp_complete : std_ulogic;
end record;

type ctrl_t is record
lr: std_ulogic_vector(63 downto 0);
ctr: std_ulogic_vector(63 downto 0);
tb: std_ulogic_vector(63 downto 0);
carry: std_ulogic;
end record;

type Fetch1ToFetch2Type is record
nia: std_ulogic_vector(63 downto 0);
end record;

type Fetch2ToDecode1Type is record
valid: std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
end record;
constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', others => (others => '0'));

type Decode1ToDecode2Type is record
valid: std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
decode: decode_rom_t;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', decode => decode_rom_init, others => (others => '0'));

type Fetch2ToIcacheType is record
req: std_ulogic;
addr: std_ulogic_vector(63 downto 0);
end record;

type IcacheToFetch2Type is record
ack: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
end record;

type Decode2ToExecute1Type is record
valid: std_ulogic;
insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0);
write_reg: std_ulogic_vector(4 downto 0);
read_reg1: std_ulogic_vector(4 downto 0);
read_reg2: std_ulogic_vector(4 downto 0);
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
const1: std_ulogic_vector(7 downto 0);
const2: std_ulogic_vector(5 downto 0);
const3: std_ulogic_vector(4 downto 0);
cr: std_ulogic_vector(31 downto 0);
lr: std_ulogic;
rc: std_ulogic;
input_carry: std_ulogic;
output_carry: std_ulogic;
input_cr: std_ulogic;
output_cr: std_ulogic;
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', input_carry => '0', output_carry => '0', input_cr => '0', output_cr => '0', others => (others => '0'));

type Decode2ToMultiplyType is record
valid: std_ulogic;
insn_type: insn_type_t;
write_reg: std_ulogic_vector(4 downto 0);
data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0);
rc: std_ulogic;
end record;
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', others => (others => '0'));

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;
read1_reg : std_ulogic_vector(4 downto 0);
read2_enable : std_ulogic;
read2_reg : std_ulogic_vector(4 downto 0);
read3_enable : std_ulogic;
read3_reg : std_ulogic_vector(4 downto 0);
end record;

type RegisterFileToDecode2Type is record
read1_data : std_ulogic_vector(63 downto 0);
read2_data : std_ulogic_vector(63 downto 0);
read3_data : std_ulogic_vector(63 downto 0);
end record;

type Decode2ToCrFileType is record
read : std_ulogic;
end record;

type CrFileToDecode2Type is record
read_cr_data : std_ulogic_vector(31 downto 0);
end record;

type Execute1ToFetch1Type is record
redirect: std_ulogic;
redirect_nia: std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', others => (others => '0'));

type Decode2ToLoadstore1Type is record
valid : std_ulogic;
load : std_ulogic; -- is this a load or store
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : std_ulogic_vector(4 downto 0); -- read data goes to this register
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
update_reg : std_ulogic_vector(4 downto 0); -- if so, the register to update
end record;
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0'));

type Loadstore1ToLoadstore2Type is record
valid : std_ulogic;
load : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0);
write_reg : std_ulogic_vector(4 downto 0);
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
update_reg : std_ulogic_vector(4 downto 0);
end record;

type Loadstore2ToWritebackType is record
valid : std_ulogic;
write_enable: std_ulogic;
write_reg : std_ulogic_vector(4 downto 0);
write_data : std_ulogic_vector(63 downto 0);
end record;
constant Loadstore2ToWritebackInit : Loadstore2ToWritebackType := (valid => '0', write_enable => '0', others => (others => '0'));

type Execute1ToExecute2Type is record
valid: std_ulogic;
write_enable : std_ulogic;
write_reg: std_ulogic_vector(4 downto 0);
write_data: std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
rc : std_ulogic;
end record;
constant Execute1ToExecute2Init : Execute1ToExecute2Type := (valid => '0', write_enable => '0', write_cr_enable => '0', rc => '0', others => (others => '0'));

type Execute2ToWritebackType is record
valid: std_ulogic;
write_enable : std_ulogic;
write_reg: std_ulogic_vector(4 downto 0);
write_data: std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;
constant Execute2ToWritebackInit : Execute2ToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0'));

type MultiplyToWritebackType is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: std_ulogic_vector(4 downto 0);
write_reg_data: std_ulogic_vector(63 downto 0);
write_cr_enable: std_ulogic;
write_cr_mask: std_ulogic_vector(7 downto 0);
write_cr_data: std_ulogic_vector(31 downto 0);
end record;
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', write_cr_enable => '0', others => (others => '0'));

type WritebackToRegisterFileType is record
write_reg : std_ulogic_vector(4 downto 0);
write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic;
end record;
constant WritebackToRegisterFileInit : WritebackToRegisterFileType := (write_enable => '0', others => (others => '0'));

type WritebackToCrFileType is record
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;
constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', others => (others => '0'));

-- Would prefer not to expose this outside the register file, but ghdl
-- doesn't support external names
type regfile is array(0 to 32) of std_ulogic_vector(63 downto 0);
end common;

package body common is
function decode_spr_num(insn: std_ulogic_vector(31 downto 0)) return spr_num_t is
begin
return to_integer(unsigned(insn(15 downto 11) & insn(20 downto 16)));
end;
function fast_spr_num(spr: spr_num_t) return gspr_index_t is
variable n : integer range 0 to 31;
-- tmp variable introduced as workaround for VCS compilation
-- simulation was failing with subtype constraint mismatch error
-- see GitHub PR #173
variable tmp : std_ulogic_vector(4 downto 0);
begin
case spr is
when SPR_LR =>
n := 0; -- N.B. decode2 relies on this specific value
when SPR_CTR =>
n := 1; -- N.B. decode2 relies on this specific value
when SPR_SRR0 =>
n := 2;
when SPR_SRR1 =>
n := 3;
when SPR_HSRR0 =>
n := 4;
when SPR_HSRR1 =>
n := 5;
when SPR_SPRG0 =>
n := 6;
when SPR_SPRG1 =>
n := 7;
when SPR_SPRG2 =>
n := 8;
when SPR_SPRG3 | SPR_SPRG3U =>
n := 9;
when SPR_HSPRG0 =>
n := 10;
when SPR_HSPRG1 =>
n := 11;
when SPR_XER =>
n := 12;
when SPR_TAR =>
n := 13;
when others =>
n := 0;
return "0000000";
end case;
tmp := std_ulogic_vector(to_unsigned(n, 5));
return "01" & tmp;
end;

function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
begin
return i(4 downto 0);
end;

function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin
return "00" & i;
end;

function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
begin
if s(5) = '1' then
return s;
else
return gpr_to_gspr(g);
end if;
end;

function is_fast_spr(s: gspr_index_t) return std_ulogic is
begin
return s(5);
end;

function fpr_to_gspr(f: fpr_index_t) return gspr_index_t is
begin
return "10" & f;
end;

function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is
begin
return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag;
end;

function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t is
begin
return addr(real_addr_t'range);
end;
end common;

@ -1,19 +0,0 @@
LOCATE COMP "ext_clk" SITE "A10";
IOBUF PORT "ext_clk" IO_TYPE=LVCMOS33;

LOCATE COMP "ext_rst" SITE "P4";
IOBUF PORT "rst" IO_TYPE=LVCMOS33;

LOCATE COMP "uart0_txd" SITE "P3";
LOCATE COMP "uart0_rxd" SITE "P2";

IOBUF PORT "uart0_txd" IO_TYPE=LVCMOS33;
IOBUF PORT "uart0_rxd" IO_TYPE=LVCMOS33;

LOCATE COMP "led_a" SITE "A13";
LOCATE COMP "led_b" SITE "A12";
LOCATE COMP "led_c" SITE "B19";

IOBUF PORT "led_a" IO_TYPE=LVCMOS25;
IOBUF PORT "led_b" IO_TYPE=LVCMOS25;
IOBUF PORT "led_c" IO_TYPE=LVCMOS25;

@ -1,225 +0,0 @@
LOCATE COMP "ext_clk" SITE "A9";
IOBUF PORT "ext_clk" IO_TYPE=LVCMOS33;

// LOCATE COMP "ext_rst_n" SITE "J2"; // io_13
// IOBUF PORT "ext_rst_n" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;

// user_button as reset
LOCATE COMP "ext_rst_n" SITE "J17";
IOBUF PORT "ext_rst_n" IO_TYPE=SSTL135_I;

LOCATE COMP "usb_d_p" SITE "N1";
LOCATE COMP "usb_d_n" SITE "M2";
LOCATE COMP "usb_pullup" SITE "N2";

IOBUF PORT "usb_d_p" IO_TYPE=LVCMOS33;
IOBUF PORT "usb_d_n" IO_TYPE=LVCMOS33;
IOBUF PORT "usb_pullup" IO_TYPE=LVCMOS33;

LOCATE COMP "led0_g" SITE "M3";
LOCATE COMP "led0_r" SITE "K4";
LOCATE COMP "led0_b" SITE "J3";

IOBUF PORT "led0_g" IO_TYPE=LVCMOS33;
IOBUF PORT "led0_g" IO_TYPE=LVCMOS33;
IOBUF PORT "led0_b" IO_TYPE=LVCMOS33;

// discontinuous gpio numbers, match orangecrab litex platform
LOCATE COMP "pin_gpio_0" SITE "N17"; // tx
LOCATE COMP "pin_gpio_1" SITE "M18"; // rx
LOCATE COMP "pin_gpio_2" SITE "C10"; // sda
LOCATE COMP "pin_gpio_3" SITE "C9"; // scl
//
LOCATE COMP "pin_gpio_5" SITE "B10"; // io_5
LOCATE COMP "pin_gpio_6" SITE "B9"; // ...
//
LOCATE COMP "pin_gpio_9" SITE "C8"; //
LOCATE COMP "pin_gpio_10" SITE "B8"; //
LOCATE COMP "pin_gpio_11" SITE "A8"; //
LOCATE COMP "pin_gpio_12" SITE "H2"; //
LOCATE COMP "pin_gpio_13" SITE "J2"; // io_13
LOCATE COMP "pin_gpio_14" SITE "N15"; // miso
LOCATE COMP "pin_gpio_15" SITE "R17"; // sck
LOCATE COMP "pin_gpio_16" SITE "N16"; // mosi

LOCATE COMP "pin_io_a0" SITE "L4";
LOCATE COMP "pin_io_a1" SITE "N3";
LOCATE COMP "pin_io_a2" SITE "N4";
LOCATE COMP "pin_io_a3" SITE "H4";
LOCATE COMP "pin_io_a4" SITE "G4";
LOCATE COMP "pin_io_a5" SITE "T17";

IOBUF PORT "pin_gpio_0" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_1" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_2" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_3" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_5" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_6" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_9" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_10" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_11" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_12" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_13" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_14" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_15" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_gpio_16" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_io_a0" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_io_a1" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_io_a2" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_io_a3" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_io_a4" IO_TYPE=LVCMOS33;
IOBUF PORT "pin_io_a5" IO_TYPE=LVCMOS33;

LOCATE COMP "ddram_a[0]" SITE "C4";
LOCATE COMP "ddram_a[1]" SITE "D2";
LOCATE COMP "ddram_a[2]" SITE "D3";
LOCATE COMP "ddram_a[3]" SITE "A3";
LOCATE COMP "ddram_a[4]" SITE "A4";
LOCATE COMP "ddram_a[5]" SITE "D4";
LOCATE COMP "ddram_a[6]" SITE "C3";
LOCATE COMP "ddram_a[7]" SITE "B2";
LOCATE COMP "ddram_a[8]" SITE "B1";
LOCATE COMP "ddram_a[9]" SITE "D1";
LOCATE COMP "ddram_a[10]" SITE "A7";
LOCATE COMP "ddram_a[11]" SITE "C2";
LOCATE COMP "ddram_a[12]" SITE "B6";
LOCATE COMP "ddram_a[13]" SITE "C1";
LOCATE COMP "ddram_a[14]" SITE "A2";
LOCATE COMP "ddram_a[15]" SITE "C7";
IOBUF PORT "ddram_a[0]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[1]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[2]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[3]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[4]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[5]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[6]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[7]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[8]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[9]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[10]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[11]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[12]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[13]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[14]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_a[15]" IO_TYPE=SSTL135_I SLEWRATE=FAST;

LOCATE COMP "ddram_ba[0]" SITE "D6";
LOCATE COMP "ddram_ba[1]" SITE "B7";
LOCATE COMP "ddram_ba[2]" SITE "A6";
LOCATE COMP "ddram_cas_n" SITE "D13";
LOCATE COMP "ddram_cs_n" SITE "A12";
LOCATE COMP "ddram_dm[0]" SITE "D16";
LOCATE COMP "ddram_dm[1]" SITE "G16";
LOCATE COMP "ddram_ras_n" SITE "C12";
LOCATE COMP "ddram_we_n" SITE "B12";
IOBUF PORT "ddram_ba[0]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_ba[1]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_ba[2]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_cas_n" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_cs_n" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_dm[0]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_dm[1]" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_ras_n" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_we_n" IO_TYPE=SSTL135_I SLEWRATE=FAST;

// from litex platform, termination disabled to reduce heat
LOCATE COMP "ddram_dq[0]" SITE "C17";
LOCATE COMP "ddram_dq[1]" SITE "D15";
LOCATE COMP "ddram_dq[2]" SITE "B17";
LOCATE COMP "ddram_dq[3]" SITE "C16";
LOCATE COMP "ddram_dq[4]" SITE "A15";
LOCATE COMP "ddram_dq[5]" SITE "B13";
LOCATE COMP "ddram_dq[6]" SITE "A17";
LOCATE COMP "ddram_dq[7]" SITE "A13";
LOCATE COMP "ddram_dq[8]" SITE "F17";
LOCATE COMP "ddram_dq[9]" SITE "F16";
LOCATE COMP "ddram_dq[10]" SITE "G15";
LOCATE COMP "ddram_dq[11]" SITE "F15";
LOCATE COMP "ddram_dq[12]" SITE "J16";
LOCATE COMP "ddram_dq[13]" SITE "C18";
LOCATE COMP "ddram_dq[14]" SITE "H16";
LOCATE COMP "ddram_dq[15]" SITE "F18";
IOBUF PORT "ddram_dq[0]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[1]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[2]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[3]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[4]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[5]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[6]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[7]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[8]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[9]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[10]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[11]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[12]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[13]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[14]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;
IOBUF PORT "ddram_dq[15]" IO_TYPE=SSTL135_I SLEWRATE=FAST TERMINATION=OFF;

LOCATE COMP "ddram_dqs_n[0]" SITE "A16";
LOCATE COMP "ddram_dqs_n[1]" SITE "H17";
LOCATE COMP "ddram_dqs_p[0]" SITE "B15";
LOCATE COMP "ddram_dqs_p[1]" SITE "G18";
IOBUF PORT "ddram_dqs_n[0]" IO_TYPE=SSTL135D_I SLEWRATE=FAST DIFFRESISTOR=100 TERMINATION=OFF;
IOBUF PORT "ddram_dqs_n[1]" IO_TYPE=SSTL135D_I SLEWRATE=FAST DIFFRESISTOR=100 TERMINATION=OFF;
IOBUF PORT "ddram_dqs_p[0]" IO_TYPE=SSTL135D_I SLEWRATE=FAST DIFFRESISTOR=100 TERMINATION=OFF;
IOBUF PORT "ddram_dqs_p[1]" IO_TYPE=SSTL135D_I SLEWRATE=FAST DIFFRESISTOR=100 TERMINATION=OFF;

LOCATE COMP "ddram_clk_p" SITE "J18";
LOCATE COMP "ddram_clk_n" SITE "K18";
IOBUF PORT "ddram_clk_p" IO_TYPE=SSTL135D_I SLEWRATE=FAST;
IOBUF PORT "ddram_clk_n" IO_TYPE=SSTL135D_I SLEWRATE=FAST;

LOCATE COMP "ddram_cke" SITE "D18";
LOCATE COMP "ddram_odt" SITE "C13";
LOCATE COMP "ddram_reset_n" SITE "L18";
IOBUF PORT "ddram_cke" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_odt" IO_TYPE=SSTL135_I SLEWRATE=FAST;
IOBUF PORT "ddram_reset_n" IO_TYPE=SSTL135_I SLEWRATE=FAST;

LOCATE COMP "ddram_vccio[0]" SITE "K16";
LOCATE COMP "ddram_vccio[1]" SITE "D17";
LOCATE COMP "ddram_vccio[2]" SITE "K15";
LOCATE COMP "ddram_vccio[3]" SITE "K17";
LOCATE COMP "ddram_vccio[4]" SITE "B18";
LOCATE COMP "ddram_vccio[5]" SITE "C6";
LOCATE COMP "ddram_gnd[0]" SITE "L15";
LOCATE COMP "ddram_gnd[1]" SITE "L16";
IOBUF PORT "ddram_vccio[0]" IO_TYPE=SSTL135_II SLEWRATE=FAST;
IOBUF PORT "ddram_vccio[1]" IO_TYPE=SSTL135_II SLEWRATE=FAST;
IOBUF PORT "ddram_vccio[2]" IO_TYPE=SSTL135_II SLEWRATE=FAST;
IOBUF PORT "ddram_vccio[3]" IO_TYPE=SSTL135_II SLEWRATE=FAST;
IOBUF PORT "ddram_vccio[4]" IO_TYPE=SSTL135_II SLEWRATE=FAST;
IOBUF PORT "ddram_vccio[5]" IO_TYPE=SSTL135_II SLEWRATE=FAST;
IOBUF PORT "ddram_gnd[0]" IO_TYPE=SSTL135_II SLEWRATE=FAST;
IOBUF PORT "ddram_gnd[1]" IO_TYPE=SSTL135_II SLEWRATE=FAST;

// We use USRMCLK instead for clk
// LOCATE COMP "spi_flash_clk" SITE "U16";
// IOBUF PORT "spi_flash_clk" IO_TYPE=LVCMOS33;
LOCATE COMP "spi_flash_cs_n" SITE "U17";
IOBUF PORT "spi_flash_cs_n" IO_TYPE=LVCMOS33;
LOCATE COMP "spi_flash_mosi" SITE "U18";
IOBUF PORT "spi_flash_mosi" IO_TYPE=LVCMOS33;
LOCATE COMP "spi_flash_miso" SITE "T18";
IOBUF PORT "spi_flash_miso" IO_TYPE=LVCMOS33;
LOCATE COMP "spi_flash_wp_n" SITE "R18";
IOBUF PORT "spi_flash_wp_n" IO_TYPE=LVCMOS33;
LOCATE COMP "spi_flash_hold_n" SITE "N18";
IOBUF PORT "spi_flash_hold_n" IO_TYPE=LVCMOS33;

LOCATE COMP "sdcard_data[0]" SITE "J1";
LOCATE COMP "sdcard_data[1]" SITE "K3";
LOCATE COMP "sdcard_data[2]" SITE "L3";
LOCATE COMP "sdcard_data[3]" SITE "M1";
LOCATE COMP "sdcard_cmd" SITE "K2";
LOCATE COMP "sdcard_clk" SITE "K1";
LOCATE COMP "sdcard_cd" SITE "L1";

IOBUF PORT "sdcard_data[0]" IO_TYPE=LVCMOS33 SLEWRATE=FAST PULLMODE=UP;
IOBUF PORT "sdcard_data[1]" IO_TYPE=LVCMOS33 SLEWRATE=FAST PULLMODE=UP;
IOBUF PORT "sdcard_data[2]" IO_TYPE=LVCMOS33 SLEWRATE=FAST PULLMODE=UP;
IOBUF PORT "sdcard_data[3]" IO_TYPE=LVCMOS33 SLEWRATE=FAST PULLMODE=UP;
IOBUF PORT "sdcard_cmd" IO_TYPE=LVCMOS33 SLEWRATE=FAST PULLMODE=UP;
IOBUF PORT "sdcard_clk" IO_TYPE=LVCMOS33 SLEWRATE=FAST;
IOBUF PORT "sdcard_cd" IO_TYPE=LVCMOS33;

@ -1,19 +0,0 @@
LOCATE COMP "ext_clk" SITE "A9";
IOBUF PORT "ext_clk" IO_TYPE=LVCMOS33;

LOCATE COMP "ext_rst" SITE "J2";
IOBUF PORT "ext_rst" PULLMODE=UP IO_TYPE=LVCMOS33 DRIVE=4;

LOCATE COMP "uart0_txd" SITE "N17";
LOCATE COMP "uart0_rxd" SITE "M18";

IOBUF PORT "uart0_txd" IO_TYPE=LVCMOS25;
IOBUF PORT "uart0_rxd" IO_TYPE=LVCMOS25;

LOCATE COMP "led_a" SITE "V17";
LOCATE COMP "led_b" SITE "T17";
LOCATE COMP "led_c" SITE "J3";

IOBUF PORT "led_a" IO_TYPE=LVCMOS25;
IOBUF PORT "led_b" IO_TYPE=LVCMOS25;
IOBUF PORT "led_c" IO_TYPE=LVCMOS25;

@ -1,328 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;

library work;
use work.common.all;

entity control is
generic (
EX1_BYPASS : boolean := true;
PIPELINE_DEPTH : natural := 3
);
port (
clk : in std_ulogic;
rst : in std_ulogic;

complete_in : in instr_tag_t;
valid_in : in std_ulogic;
repeated : in std_ulogic;
flush_in : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t;

gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;

gpr_b_read_valid_in : in std_ulogic;
gpr_b_read_in : in gspr_index_t;

gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gspr_index_t;

execute_next_tag : in instr_tag_t;
execute_next_cr_tag : in instr_tag_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;

valid_out : out std_ulogic;
stall_out : out std_ulogic;
stopped_out : out std_ulogic;

gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic;
cr_bypass : out std_ulogic;

instr_tag_out : out instr_tag_t
);
end entity control;

architecture rtl of control is
type state_type is (IDLE, WAIT_FOR_PREV_TO_COMPLETE, WAIT_FOR_CURR_TO_COMPLETE);

type reg_internal_type is record
state : state_type;
outstanding : integer range -1 to PIPELINE_DEPTH+2;
end record;
constant reg_internal_init : reg_internal_type := (state => IDLE, outstanding => 0);

signal r_int, rin_int : reg_internal_type := reg_internal_init;

signal gpr_write_valid : std_ulogic;
signal cr_write_valid : std_ulogic;

type tag_register is record
wr_gpr : std_ulogic;
reg : gspr_index_t;
recent : std_ulogic;
wr_cr : std_ulogic;
end record;

type tag_regs_array is array(tag_number_t) of tag_register;
signal tag_regs : tag_regs_array;

signal instr_tag : instr_tag_t;

signal gpr_tag_stall : std_ulogic;
signal cr_tag_stall : std_ulogic;

signal curr_tag : tag_number_t;
signal next_tag : tag_number_t;

signal curr_cr_tag : tag_number_t;

begin
control0: process(clk)
begin
if rising_edge(clk) then
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
for i in tag_number_t loop
if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
else
if complete_in.valid = '1' and i = complete_in.tag then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
report "tag " & integer'image(i) & " not valid";
end if;
if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then
tag_regs(i).recent <= '0';
if tag_regs(i).recent = '1' and tag_regs(i).wr_gpr = '1' then
report "tag " & integer'image(i) & " not recent";
end if;
end if;
if instr_tag.valid = '1' and i = instr_tag.tag then
tag_regs(i).wr_gpr <= gpr_write_valid;
tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid;
tag_regs(i).wr_cr <= cr_write_valid;
if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
end if;
end if;
end if;
end loop;
if rst = '1' then
curr_tag <= 0;
curr_cr_tag <= 0;
else
curr_tag <= next_tag;
if cr_write_valid = '1' then
curr_cr_tag <= instr_tag.tag;
end if;
end if;
end if;
end process;

control_hazards : process(all)
variable gpr_stall : std_ulogic;
variable tag_a : instr_tag_t;
variable tag_b : instr_tag_t;
variable tag_c : instr_tag_t;
variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t;
variable byp_a : std_ulogic;
variable byp_b : std_ulogic;
variable byp_c : std_ulogic;
variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic;
begin
tag_a := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then
tag_a.valid := gpr_a_read_valid_in;
tag_a.tag := i;
end if;
end loop;
if tag_match(tag_a, complete_in) then
tag_a.valid := '0';
end if;
tag_b := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
tag_b.valid := gpr_b_read_valid_in;
tag_b.tag := i;
end if;
end loop;
if tag_match(tag_b, complete_in) then
tag_b.valid := '0';
end if;
tag_c := instr_tag_init;
for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
tag_c.valid := gpr_c_read_valid_in;
tag_c.tag := i;
end if;
end loop;
if tag_match(tag_c, complete_in) then
tag_c.valid := '0';
end if;

byp_a := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
byp_a := '1';
end if;
byp_b := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
byp_b := '1';
end if;
byp_c := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
byp_c := '1';
end if;

gpr_bypass_a <= byp_a;
gpr_bypass_b <= byp_b;
gpr_bypass_c <= byp_c;

gpr_tag_stall <= (tag_a.valid and not byp_a) or
(tag_b.valid and not byp_b) or
(tag_c.valid and not byp_c);

incr_tag := curr_tag;
instr_tag.tag <= curr_tag;
instr_tag.valid <= valid_out and not deferred;
if instr_tag.valid = '1' then
incr_tag := (curr_tag + 1) mod TAG_COUNT;
end if;
next_tag <= incr_tag;
instr_tag_out <= instr_tag;

-- CR hazards
tag_cr.tag := curr_cr_tag;
tag_cr.valid := cr_read_in and tag_regs(curr_cr_tag).wr_cr;
if tag_match(tag_cr, complete_in) then
tag_cr.valid := '0';
end if;
byp_cr := '0';
if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
byp_cr := '1';
end if;

cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr;
end process;

control1 : process(all)
variable v_int : reg_internal_type;
variable valid_tmp : std_ulogic;
variable stall_tmp : std_ulogic;
begin
v_int := r_int;

-- asynchronous
valid_tmp := valid_in and not flush_in;
stall_tmp := '0';

if flush_in = '1' then
v_int.outstanding := 0;
elsif complete_in.valid = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;
if r_int.outstanding >= PIPELINE_DEPTH + 1 then
valid_tmp := '0';
stall_tmp := '1';
end if;

if rst = '1' then
gpr_write_valid <= '0';
cr_write_valid <= '0';
v_int := reg_internal_init;
valid_tmp := '0';
end if;

-- Handle debugger stop
stopped_out <= '0';
if stop_mark_in = '1' and v_int.outstanding = 0 then
stopped_out <= '1';
end if;

-- state machine to handle instructions that must be single
-- through the pipeline.
case r_int.state is
when IDLE =>
if valid_tmp = '1' then
if (sgl_pipe_in = '1') then
if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_tmp := '1';
else
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;

when WAIT_FOR_PREV_TO_COMPLETE =>
if v_int.outstanding = 0 then
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
else
stall_tmp := '1';
end if;

when WAIT_FOR_CURR_TO_COMPLETE =>
if v_int.outstanding = 0 then
v_int.state := IDLE;
-- XXX Don't replicate this
if valid_tmp = '1' then
if (sgl_pipe_in = '1') then
if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_tmp := '1';
else
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;
else
stall_tmp := '1';
end if;
end case;

if stall_tmp = '1' then
valid_tmp := '0';
end if;

gpr_write_valid <= gpr_write_valid_in and valid_tmp;
cr_write_valid <= cr_write_in and valid_tmp;

if valid_tmp = '1' and deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;

-- update outputs
valid_out <= valid_tmp;
stall_out <= stall_tmp or deferred;

-- update registers
rin_int <= v_int;
end process;
end;

@ -8,61 +8,36 @@ use work.wishbone_types.all;

entity core is
generic (
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT : boolean := false;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512;
ICACHE_NUM_LINES : natural := 64;
ICACHE_NUM_WAYS : natural := 2;
ICACHE_TLB_SIZE : natural := 64;
DCACHE_NUM_LINES : natural := 64;
DCACHE_NUM_WAYS : natural := 2;
DCACHE_TLB_SET_SIZE : natural := 64;
DCACHE_TLB_NUM_WAYS : natural := 2
SIM : boolean := false
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
clk : in std_logic;
rst : in std_logic;

-- Alternate reset (0xffff0000) for use by DRAM init fw
alt_reset : in std_ulogic;

-- Wishbone interface
wishbone_insn_in : in wishbone_slave_out;
wishbone_insn_out : out wishbone_master_out;

wishbone_data_in : in wishbone_slave_out;
wishbone_data_out : out wishbone_master_out;

wb_snoop_in : in wishbone_master_out;

dmi_addr : in std_ulogic_vector(3 downto 0);
dmi_din : in std_ulogic_vector(63 downto 0);
dmi_dout : out std_ulogic_vector(63 downto 0);
dmi_req : in std_ulogic;
dmi_wr : in std_ulogic;
dmi_ack : out std_ulogic;

ext_irq : in std_ulogic;

terminated_out : out std_logic
-- Added for debug, ghdl doesn't support external names unfortunately
registers : out regfile;
nia_out : out std_ulogic_vector(61 downto 0);
terminate_out : out std_ulogic
);
end core;

architecture behave of core is
-- fetch signals
signal fetch1_to_fetch2: Fetch1ToFetch2Type;
signal fetch2_to_decode1: Fetch2ToDecode1Type;

-- icache signals
signal fetch1_to_icache : Fetch1ToIcacheType;
signal writeback_to_fetch1: WritebackToFetch1Type;
signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType;
signal fetch2_to_icache : Fetch2ToIcacheType;
signal icache_to_fetch2 : IcacheToFetch2Type;

-- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type;
signal decode1_to_fetch1: Decode1ToFetch1Type;
signal decode2_to_execute1: Decode2ToExecute1Type;

-- register file signals
@ -76,422 +51,177 @@ architecture behave of core is
signal writeback_to_cr_file: WritebackToCrFileType;

-- execute signals
signal execute1_to_writeback: Execute1ToWritebackType;
signal execute1_bypass: bypass_data_t;
signal execute1_cr_bypass: cr_bypass_data_t;
signal execute1_to_execute2: Execute1ToExecute2Type;
signal execute2_to_writeback: Execute2ToWritebackType;
signal execute1_to_fetch1: Execute1ToFetch1Type;

-- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
signal loadstore1_to_execute1: Loadstore1ToExecute1Type;
signal loadstore1_to_writeback: Loadstore1ToWritebackType;
signal loadstore1_to_mmu: Loadstore1ToMmuType;
signal mmu_to_loadstore1: MmuToLoadstore1Type;

-- dcache signals
signal loadstore1_to_dcache: Loadstore1ToDcacheType;
signal dcache_to_loadstore1: DcacheToLoadstore1Type;
signal mmu_to_dcache: MmuToDcacheType;
signal dcache_to_mmu: DcacheToMmuType;

-- FPU signals
signal execute1_to_fpu: Execute1ToFPUType;
signal fpu_to_execute1: FPUToExecute1Type;
signal fpu_to_writeback: FPUToWritebackType;
signal decode2_to_loadstore1: Decode2ToLoadstore1Type;
signal loadstore1_to_loadstore2: Loadstore1ToLoadstore2Type;
signal loadstore2_to_writeback: Loadstore2ToWritebackType;

-- multiply signals
signal decode2_to_multiply: Decode2ToMultiplyType;
signal multiply_to_writeback: MultiplyToWritebackType;

-- local signals
signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic;
signal icache_stall_in : std_ulogic;
signal fetch2_stall_in : std_ulogic;
signal fetch2_stall_out : std_ulogic;
signal decode1_stall_in : std_ulogic;
signal decode1_busy : std_ulogic;
signal decode2_busy_in : std_ulogic;
signal decode2_stall_out : std_ulogic;
signal ex1_icache_inval: std_ulogic;
signal ex1_busy_out: std_ulogic;
signal dcache_stall_out: std_ulogic;

signal flush: std_ulogic;
signal decode1_flush: std_ulogic;
signal fetch1_flush: std_ulogic;

signal complete: instr_tag_t;
signal complete: std_ulogic;

signal terminate: std_ulogic;
signal core_rst: std_ulogic;
signal do_interrupt: std_ulogic;

-- Delayed/Latched resets and alt_reset
signal rst_fetch1 : std_ulogic;
signal rst_fetch2 : std_ulogic;
signal rst_icache : std_ulogic;
signal rst_dcache : std_ulogic;
signal rst_dec1 : std_ulogic;
signal rst_dec2 : std_ulogic;
signal rst_ex1 : std_ulogic;
signal rst_fpu : std_ulogic;
signal rst_ls1 : std_ulogic;
signal rst_wback : std_ulogic;
signal rst_dbg : std_ulogic;
signal alt_reset_d : std_ulogic;

signal sim_cr_dump: std_ulogic;

-- Debug actions
signal dbg_core_stop: std_ulogic;
signal dbg_core_rst: std_ulogic;
signal dbg_icache_rst: std_ulogic;

signal dbg_gpr_req : std_ulogic;
signal dbg_gpr_ack : std_ulogic;
signal dbg_gpr_addr : gspr_index_t;
signal dbg_gpr_data : std_ulogic_vector(63 downto 0);

signal msr : std_ulogic_vector(63 downto 0);

-- PMU event bus
signal icache_events : IcacheEventType;
signal loadstore_events : Loadstore1EventType;
signal dcache_events : DcacheEventType;
signal writeback_events : WritebackEventType;

-- Debug status
signal dbg_core_is_stopped: std_ulogic;

-- Logging signals
signal log_data : std_ulogic_vector(255 downto 0);
signal log_rd_addr : std_ulogic_vector(31 downto 0);
signal log_wr_addr : std_ulogic_vector(31 downto 0);
signal log_rd_data : std_ulogic_vector(63 downto 0);

function keep_h(disable : boolean) return string is
begin
if disable then
return "yes";
else
return "no";
end if;
end function;
attribute keep_hierarchy : string;
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of mmu_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN);
begin

core_rst <= dbg_core_rst or rst;

resets: process(clk)
begin
if rising_edge(clk) then
rst_fetch1 <= core_rst;
rst_fetch2 <= core_rst;
rst_icache <= core_rst;
rst_dcache <= core_rst;
rst_dec1 <= core_rst;
rst_dec2 <= core_rst;
rst_ex1 <= core_rst;
rst_fpu <= core_rst;
rst_ls1 <= core_rst;
rst_wback <= core_rst;
rst_dbg <= rst;
alt_reset_d <= alt_reset;
end if;
end process;
terminate_out <= terminate;

fetch1_0: entity work.fetch1
generic map (
RESET_ADDRESS => (others => '0'),
ALT_RESET_ADDRESS => ALT_RESET_ADDRESS,
HAS_BTC => HAS_BTC
RESET_ADDRESS => (others => '0')
)
port map (
clk => clk,
rst => rst_fetch1,
alt_reset_in => alt_reset_d,
rst => rst,
stall_in => fetch1_stall_in,
flush_in => fetch1_flush,
inval_btc => ex1_icache_inval or mmu_to_icache.tlbie,
stop_in => dbg_core_stop,
d_in => decode1_to_fetch1,
w_in => writeback_to_fetch1,
i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
flush_in => flush,
e_in => execute1_to_fetch1,
f_out => fetch1_to_fetch2
);

fetch1_stall_in <= icache_stall_out or decode1_busy;
fetch1_flush <= flush or decode1_flush;
fetch1_stall_in <= fetch2_stall_out or decode2_stall_out;

fetch2_0: entity work.fetch2
port map (
clk => clk,
rst => rst,
stall_in => fetch2_stall_in,
stall_out => fetch2_stall_out,
flush_in => flush,
i_in => icache_to_fetch2,
i_out => fetch2_to_icache,
f_in => fetch1_to_fetch2,
f_out => fetch2_to_decode1
);

fetch2_stall_in <= decode2_stall_out;

icache_0: entity work.icache
generic map(
SIM => SIM,
LINE_SIZE => 64,
NUM_LINES => ICACHE_NUM_LINES,
NUM_WAYS => ICACHE_NUM_WAYS,
TLB_SIZE => ICACHE_TLB_SIZE,
LOG_LENGTH => LOG_LENGTH
LINE_SIZE_DW => 8,
NUM_LINES => 16
)
port map(
clk => clk,
rst => rst_icache,
i_in => fetch1_to_icache,
i_out => icache_to_decode1,
m_in => mmu_to_icache,
flush_in => fetch1_flush,
inval_in => dbg_icache_rst or ex1_icache_inval,
stall_in => icache_stall_in,
stall_out => icache_stall_out,
rst => rst,
i_in => fetch2_to_icache,
i_out => icache_to_fetch2,
wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in,
wb_snoop_in => wb_snoop_in,
events => icache_events,
log_out => log_data(96 downto 43)
wishbone_in => wishbone_insn_in
);

icache_stall_in <= decode1_busy;

decode1_0: entity work.decode1
generic map(
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
clk => clk,
rst => rst_dec1,
rst => rst,
stall_in => decode1_stall_in,
flush_in => flush,
flush_out => decode1_flush,
busy_out => decode1_busy,
f_in => icache_to_decode1,
d_out => decode1_to_decode2,
f_out => decode1_to_fetch1,
log_out => log_data(109 downto 97)
f_in => fetch2_to_decode1,
d_out => decode1_to_decode2
);

decode1_stall_in <= decode2_stall_out;

decode2_0: entity work.decode2
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
clk => clk,
rst => rst_dec2,
busy_in => decode2_busy_in,
rst => rst,
stall_out => decode2_stall_out,
flush_in => flush,
complete_in => complete,
stopped_out => dbg_core_is_stopped,
d_in => decode1_to_decode2,
e_out => decode2_to_execute1,
l_out => decode2_to_loadstore1,
m_out => decode2_to_multiply,
r_in => register_file_to_decode2,
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file,
execute_bypass => execute1_bypass,
execute_cr_bypass => execute1_cr_bypass,
log_out => log_data(119 downto 110)
nia_out => nia_out
);
decode2_busy_in <= ex1_busy_out;

register_file_0: entity work.register_file
generic map (
SIM => SIM,
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
clk => clk,
d_in => decode2_to_register_file,
d_out => register_file_to_decode2,
w_in => writeback_to_register_file,
dbg_gpr_req => dbg_gpr_req,
dbg_gpr_ack => dbg_gpr_ack,
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate,
sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 184)
);
registers_out => registers);

cr_file_0: entity work.cr_file
generic map (
SIM => SIM,
LOG_LENGTH => LOG_LENGTH
)
port map (
clk => clk,
d_in => decode2_to_cr_file,
d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump,
log_out => log_data(183 downto 171)
w_in => writeback_to_cr_file
);

execute1_0: entity work.execute1
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
HAS_SHORT_MULT => HAS_SHORT_MULT,
LOG_LENGTH => LOG_LENGTH
SIM => SIM
)
port map (
clk => clk,
rst => rst_ex1,
flush_in => flush,
busy_out => ex1_busy_out,
flush_out => flush,
e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq,
interrupt_in => do_interrupt,
l_out => execute1_to_loadstore1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback,
bypass_data => execute1_bypass,
bypass_cr_data => execute1_cr_bypass,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
wb_events => writeback_events,
ls_events => loadstore_events,
dc_events => dcache_events,
ic_events => icache_events,
terminate_out => terminate,
log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr,
log_rd_data => log_rd_data,
log_wr_addr => log_wr_addr
f_out => execute1_to_fetch1,
e_out => execute1_to_execute2,
terminate_out => terminate
);

with_fpu: if HAS_FPU generate
begin
fpu_0: entity work.fpu
port map (
clk => clk,
rst => rst_fpu,
e_in => execute1_to_fpu,
e_out => fpu_to_execute1,
w_out => fpu_to_writeback
);
end generate;

no_fpu: if not HAS_FPU generate
begin
fpu_to_execute1 <= FPUToExecute1Init;
fpu_to_writeback <= FPUToWritebackInit;
end generate;
execute2_0: entity work.execute2
port map (
clk => clk,
e_in => execute1_to_execute2,
e_out => execute2_to_writeback
);

loadstore1_0: entity work.loadstore1
generic map (
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH
)
port map (
clk => clk,
rst => rst_ls1,
l_in => execute1_to_loadstore1,
e_out => loadstore1_to_execute1,
l_out => loadstore1_to_writeback,
d_out => loadstore1_to_dcache,
d_in => dcache_to_loadstore1,
m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out,
events => loadstore_events,
log_out => log_data(149 downto 140)
l_in => decode2_to_loadstore1,
l_out => loadstore1_to_loadstore2
);

mmu_0: entity work.mmu
loadstore2_0: entity work.loadstore2
port map (
clk => clk,
rst => core_rst,
l_in => loadstore1_to_mmu,
l_out => mmu_to_loadstore1,
d_out => mmu_to_dcache,
d_in => dcache_to_mmu,
i_out => mmu_to_icache
l_in => loadstore1_to_loadstore2,
w_out => loadstore2_to_writeback,
m_in => wishbone_data_in,
m_out => wishbone_data_out
);

dcache_0: entity work.dcache
generic map(
LINE_SIZE => 64,
NUM_LINES => DCACHE_NUM_LINES,
NUM_WAYS => DCACHE_NUM_WAYS,
TLB_SET_SIZE => DCACHE_TLB_SET_SIZE,
TLB_NUM_WAYS => DCACHE_TLB_NUM_WAYS,
LOG_LENGTH => LOG_LENGTH
)
multiply_0: entity work.multiply
port map (
clk => clk,
rst => rst_dcache,
d_in => loadstore1_to_dcache,
d_out => dcache_to_loadstore1,
m_in => mmu_to_dcache,
m_out => dcache_to_mmu,
stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out,
snoop_in => wb_snoop_in,
events => dcache_events,
log_out => log_data(170 downto 151)
m_in => decode2_to_multiply,
m_out => multiply_to_writeback
);

writeback_0: entity work.writeback
port map (
clk => clk,
rst => rst_wback,
flush_out => flush,
e_in => execute1_to_writeback,
l_in => loadstore1_to_writeback,
fp_in => fpu_to_writeback,
e_in => execute2_to_writeback,
l_in => loadstore2_to_writeback,
m_in => multiply_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
f_out => writeback_to_fetch1,
events => writeback_events,
interrupt_out => do_interrupt,
complete_out => complete
);

log_data(150) <= '0';
log_data(139 downto 135) <= "00000";

debug_0: entity work.core_debug
generic map (
LOG_LENGTH => LOG_LENGTH
)
port map (
clk => clk,
rst => rst_dbg,
dmi_addr => dmi_addr,
dmi_din => dmi_din,
dmi_dout => dmi_dout,
dmi_req => dmi_req,
dmi_wr => dmi_wr,
dmi_ack => dmi_ack,
core_stop => dbg_core_stop,
core_rst => dbg_core_rst,
icache_rst => dbg_icache_rst,
terminate => terminate,
core_stopped => dbg_core_is_stopped,
nia => fetch1_to_icache.nia,
msr => msr,
dbg_gpr_req => dbg_gpr_req,
dbg_gpr_ack => dbg_gpr_ack,
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
log_data => log_data,
log_read_addr => log_rd_addr,
log_read_data => log_rd_data,
log_write_addr => log_wr_addr,
terminated_out => terminated_out
);

end behave;

@ -1,325 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;

entity core_debug is
generic (
-- Length of log buffer
LOG_LENGTH : natural := 512
);
port (
clk : in std_logic;
rst : in std_logic;

dmi_addr : in std_ulogic_vector(3 downto 0);
dmi_din : in std_ulogic_vector(63 downto 0);
dmi_dout : out std_ulogic_vector(63 downto 0);
dmi_req : in std_ulogic;
dmi_wr : in std_ulogic;
dmi_ack : out std_ulogic;

-- Debug actions
core_stop : out std_ulogic;
core_rst : out std_ulogic;
icache_rst : out std_ulogic;

-- Core status inputs
terminate : in std_ulogic;
core_stopped : in std_ulogic;
nia : in std_ulogic_vector(63 downto 0);
msr : in std_ulogic_vector(63 downto 0);

-- GSPR register read port
dbg_gpr_req : out std_ulogic;
dbg_gpr_ack : in std_ulogic;
dbg_gpr_addr : out gspr_index_t;
dbg_gpr_data : in std_ulogic_vector(63 downto 0);

-- Core logging data
log_data : in std_ulogic_vector(255 downto 0);
log_read_addr : in std_ulogic_vector(31 downto 0);
log_read_data : out std_ulogic_vector(63 downto 0);
log_write_addr : out std_ulogic_vector(31 downto 0);

-- Misc
terminated_out : out std_ulogic
);
end core_debug;

architecture behave of core_debug is
-- DMI needs fixing... make a one clock pulse
signal dmi_req_1: std_ulogic;

-- CTRL register (direct actions, write 1 to act, read back 0)
-- bit 0 : Core stop
-- bit 1 : Core reset (doesn't clear stop)
-- bit 2 : Icache reset
-- bit 3 : Single step
-- bit 4 : Core start
constant DBG_CORE_CTRL : std_ulogic_vector(3 downto 0) := "0000";
constant DBG_CORE_CTRL_STOP : integer := 0;
constant DBG_CORE_CTRL_RESET : integer := 1;
constant DBG_CORE_CTRL_ICRESET : integer := 2;
constant DBG_CORE_CTRL_STEP : integer := 3;
constant DBG_CORE_CTRL_START : integer := 4;

-- STAT register (read only)
-- bit 0 : Core stopping (wait til bit 1 set)
-- bit 1 : Core stopped
-- bit 2 : Core terminated (clears with start or reset)
constant DBG_CORE_STAT : std_ulogic_vector(3 downto 0) := "0001";
constant DBG_CORE_STAT_STOPPING : integer := 0;
constant DBG_CORE_STAT_STOPPED : integer := 1;
constant DBG_CORE_STAT_TERM : integer := 2;

-- NIA register (read only for now)
constant DBG_CORE_NIA : std_ulogic_vector(3 downto 0) := "0010";

-- MSR (read only)
constant DBG_CORE_MSR : std_ulogic_vector(3 downto 0) := "0011";

-- GSPR register index
constant DBG_CORE_GSPR_INDEX : std_ulogic_vector(3 downto 0) := "0100";

-- GSPR register data
constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101";

-- Log buffer address and data registers
constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110";
constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111";
constant DBG_CORE_LOG_TRIGGER : std_ulogic_vector(3 downto 0) := "1000";

constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH);

-- Some internal wires
signal stat_reg : std_ulogic_vector(63 downto 0);

-- Some internal latches
signal stopping : std_ulogic;
signal do_step : std_ulogic;
signal do_reset : std_ulogic;
signal do_icreset : std_ulogic;
signal terminated : std_ulogic;
signal do_gspr_rd : std_ulogic;
signal gspr_index : gspr_index_t;

signal log_dmi_addr : std_ulogic_vector(31 downto 0) := (others => '0');
signal log_dmi_data : std_ulogic_vector(63 downto 0) := (others => '0');
signal log_dmi_trigger : std_ulogic_vector(63 downto 0) := (others => '0');
signal do_log_trigger : std_ulogic := '0';
signal do_dmi_log_rd : std_ulogic;
signal dmi_read_log_data : std_ulogic;
signal dmi_read_log_data_1 : std_ulogic;
signal log_trigger_delay : integer range 0 to 255 := 0;

begin
-- Single cycle register accesses on DMI except for GSPR data
dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA
else dbg_gpr_ack;
dbg_gpr_req <= dmi_req when dmi_addr = DBG_CORE_GSPR_DATA
else '0';

-- Status register read composition
stat_reg <= (2 => terminated,
1 => core_stopped,
0 => stopping,
others => '0');

-- DMI read data mux
with dmi_addr select dmi_dout <=
stat_reg when DBG_CORE_STAT,
nia when DBG_CORE_NIA,
msr when DBG_CORE_MSR,
dbg_gpr_data when DBG_CORE_GSPR_DATA,
log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR,
log_dmi_data when DBG_CORE_LOG_DATA,
log_dmi_trigger when DBG_CORE_LOG_TRIGGER,
(others => '0') when others;

-- DMI writes
reg_write: process(clk)
begin
if rising_edge(clk) then
-- Reset the 1-cycle "do" signals
do_step <= '0';
do_reset <= '0';
do_icreset <= '0';
do_dmi_log_rd <= '0';

if (rst) then
stopping <= '0';
terminated <= '0';
log_trigger_delay <= 0;
gspr_index <= (others => '0');
else
if do_log_trigger = '1' or log_trigger_delay /= 0 then
if log_trigger_delay = 255 then
log_dmi_trigger(1) <= '1';
log_trigger_delay <= 0;
else
log_trigger_delay <= log_trigger_delay + 1;
end if;
end if;
-- Edge detect on dmi_req for 1-shot pulses
dmi_req_1 <= dmi_req;
if dmi_req = '1' and dmi_req_1 = '0' then
if dmi_wr = '1' then
report("DMI write to " & to_hstring(dmi_addr));

-- Control register actions
if dmi_addr = DBG_CORE_CTRL then
if dmi_din(DBG_CORE_CTRL_RESET) = '1' then
do_reset <= '1';
terminated <= '0';
end if;
if dmi_din(DBG_CORE_CTRL_STOP) = '1' then
stopping <= '1';
end if;
if dmi_din(DBG_CORE_CTRL_STEP) = '1' then
do_step <= '1';
terminated <= '0';
end if;
if dmi_din(DBG_CORE_CTRL_ICRESET) = '1' then
do_icreset <= '1';
end if;
if dmi_din(DBG_CORE_CTRL_START) = '1' then
stopping <= '0';
terminated <= '0';
end if;
elsif dmi_addr = DBG_CORE_GSPR_INDEX then
gspr_index <= dmi_din(gspr_index_t'left downto 0);
elsif dmi_addr = DBG_CORE_LOG_ADDR then
log_dmi_addr <= dmi_din(31 downto 0);
do_dmi_log_rd <= '1';
elsif dmi_addr = DBG_CORE_LOG_TRIGGER then
log_dmi_trigger <= dmi_din;
end if;
else
report("DMI read from " & to_string(dmi_addr));
end if;

elsif dmi_read_log_data = '0' and dmi_read_log_data_1 = '1' then
-- Increment log_dmi_addr after the end of a read from DBG_CORE_LOG_DATA
log_dmi_addr(LOG_INDEX_BITS + 1 downto 0) <=
std_ulogic_vector(unsigned(log_dmi_addr(LOG_INDEX_BITS+1 downto 0)) + 1);
do_dmi_log_rd <= '1';
end if;
dmi_read_log_data_1 <= dmi_read_log_data;
if dmi_req = '1' and dmi_addr = DBG_CORE_LOG_DATA then
dmi_read_log_data <= '1';
else
dmi_read_log_data <= '0';
end if;

-- Set core stop on terminate. We'll be stopping some time *after*
-- the offending instruction, at least until we can do back flushes
-- that preserve NIA which we can't just yet.
if terminate = '1' then
stopping <= '1';
terminated <= '1';
end if;
end if;
end if;
end process;

dbg_gpr_addr <= gspr_index;

-- Core control signals generated by the debug module
core_stop <= stopping and not do_step;
core_rst <= do_reset;
icache_rst <= do_icreset;
terminated_out <= terminated;

-- Logging RAM
maybe_log: if LOG_LENGTH > 0 generate
subtype log_ptr_t is unsigned(LOG_INDEX_BITS - 1 downto 0);
type log_array_t is array(0 to LOG_LENGTH - 1) of std_ulogic_vector(255 downto 0);
signal log_array : log_array_t;
signal log_rd_ptr : log_ptr_t;
signal log_wr_ptr : log_ptr_t;
signal log_toggle : std_ulogic;
signal log_wr_enable : std_ulogic;
signal log_rd_ptr_latched : log_ptr_t;
signal log_rd : std_ulogic_vector(255 downto 0);
signal log_dmi_reading : std_ulogic;
signal log_dmi_read_done : std_ulogic;

function select_dword(data : std_ulogic_vector(255 downto 0);
addr : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
variable firstbit : integer;
begin
firstbit := to_integer(unsigned(addr(1 downto 0))) * 64;
return data(firstbit + 63 downto firstbit);
end;

attribute ram_style : string;
attribute ram_style of log_array : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of log_array : signal is "power";

begin
-- Use MSB of read addresses to stop the logging
log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31) or log_dmi_trigger(1));

log_ram: process(clk)
begin
if rising_edge(clk) then
if log_wr_enable = '1' then
log_array(to_integer(log_wr_ptr)) <= log_data;
end if;
log_rd <= log_array(to_integer(log_rd_ptr_latched));
end if;
end process;


log_buffer: process(clk)
variable b : integer;
variable data : std_ulogic_vector(255 downto 0);
begin
if rising_edge(clk) then
if rst = '1' then
log_wr_ptr <= (others => '0');
log_toggle <= '0';
elsif log_wr_enable = '1' then
if log_wr_ptr = to_unsigned(LOG_LENGTH - 1, LOG_INDEX_BITS) then
log_toggle <= not log_toggle;
end if;
log_wr_ptr <= log_wr_ptr + 1;
end if;
if do_dmi_log_rd = '1' then
log_rd_ptr_latched <= unsigned(log_dmi_addr(LOG_INDEX_BITS + 1 downto 2));
else
log_rd_ptr_latched <= unsigned(log_read_addr(LOG_INDEX_BITS + 1 downto 2));
end if;
if log_dmi_read_done = '1' then
log_dmi_data <= select_dword(log_rd, log_dmi_addr);
else
log_read_data <= select_dword(log_rd, log_read_addr);
end if;
log_dmi_read_done <= log_dmi_reading;
log_dmi_reading <= do_dmi_log_rd;
do_log_trigger <= '0';
if log_data(42) = log_dmi_trigger(63) and
log_data(41 downto 0) = log_dmi_trigger(43 downto 2) and
log_dmi_trigger(0) = '1' then
do_log_trigger <= '1';
end if;
end if;
end process;
log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr);
log_write_addr(LOG_INDEX_BITS) <= '1';
log_write_addr(31 downto LOG_INDEX_BITS + 1) <= (others => '0');
end generate;

no_log: if LOG_LENGTH = 0 generate
begin
log_read_data <= (others => '0');
log_write_addr <= x"00000001";
end generate;

end behave;

@ -1,162 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;
use work.utils.all;

entity core_dram_tb is
generic (
MEMORY_SIZE : natural := (384*1024);
MAIN_RAM_FILE : string := "main_ram.bin";
DRAM_INIT_FILE : string := "";
DRAM_INIT_SIZE : natural := 16#c000#
);
end core_dram_tb;

architecture behave of core_dram_tb is
signal clk, rst: std_logic;
signal system_clk, soc_rst : std_ulogic;

-- testbench signals
constant clk_period : time := 10 ns;

-- Sim DRAM
signal wb_dram_in : wishbone_master_out;
signal wb_dram_out : wishbone_slave_out;
signal wb_ext_io_in : wb_io_master_out;
signal wb_ext_io_out : wb_io_slave_out;
signal wb_ext_is_dram_csr : std_ulogic;
signal wb_ext_is_dram_init : std_ulogic;
signal core_alt_reset : std_ulogic;

-- SPI
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic := '1';
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);
signal fl_hold_n : std_logic;
signal fl_wp_n : std_logic;
signal fl_mosi : std_logic;
signal fl_miso : std_logic;

-- ROM size
function get_rom_size return natural is
begin
if MEMORY_SIZE = 0 then
return DRAM_INIT_SIZE;
else
return 0;
end if;
end function;

constant ROM_SIZE : natural := get_rom_size;
begin

soc0: entity work.soc
generic map(
SIM => true,
MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => MAIN_RAM_FILE,
HAS_DRAM => true,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => ROM_SIZE,
CLK_FREQ => 100000000,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => 0
)
port map(
rst => soc_rst,
system_clk => system_clk,
wb_dram_in => wb_dram_in,
wb_dram_out => wb_dram_out,
wb_ext_io_in => wb_ext_io_in,
wb_ext_io_out => wb_ext_io_out,
wb_ext_is_dram_csr => wb_ext_is_dram_csr,
wb_ext_is_dram_init => wb_ext_is_dram_init,
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i,
alt_reset => core_alt_reset
);

flash: entity work.s25fl128s
generic map (
TimingModel => "S25FL128SAGNFI000_R_30pF",
LongTimming => false,
tdevice_PU => 10 ns,
tdevice_PP256 => 100 ns,
tdevice_PP512 => 100 ns,
tdevice_WRR => 100 ns,
UserPreload => TRUE
)
port map(
SCK => spi_sck,
SI => fl_mosi,
CSNeg => spi_cs_n,
HOLDNeg => fl_hold_n,
WPNeg => fl_wp_n,
RSTNeg => '1',
SO => fl_miso
);

fl_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
fl_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
fl_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
fl_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else '1' when spi_sdat_oe(0) = '1' else 'Z';

spi_sdat_i(0) <= fl_mosi;
spi_sdat_i(1) <= fl_miso;
spi_sdat_i(2) <= fl_wp_n;
spi_sdat_i(3) <= fl_hold_n;

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 24,
DRAM_ALINES => 1,
DRAM_DLINES => 16,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 128,
PAYLOAD_FILE => DRAM_INIT_FILE,
PAYLOAD_SIZE => ROM_SIZE
)
port map(
clk_in => clk,
rst => rst,
system_clk => system_clk,
system_reset => soc_rst,
core_alt_reset => core_alt_reset,

wb_in => wb_dram_in,
wb_out => wb_dram_out,
wb_ctrl_in => wb_ext_io_in,
wb_ctrl_out => wb_ext_io_out,
wb_ctrl_is_csr => wb_ext_is_dram_csr,
wb_ctrl_is_init => wb_ext_is_dram_init
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

rst_process: process
begin
rst <= '1';
wait for 10*clk_period;
rst <= '0';
wait;
end process;

jtag: entity work.sim_jtag;

end;

@ -1,97 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;

entity core_flash_tb is
end core_flash_tb;

architecture behave of core_flash_tb is
signal clk, rst: std_logic;

-- testbench signals
constant clk_period : time := 10 ns;

-- SPI
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic := '1';
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);
signal fl_hold_n : std_logic;
signal fl_wp_n : std_logic;
signal fl_mosi : std_logic;
signal fl_miso : std_logic;
begin

soc0: entity work.soc
generic map(
SIM => true,
MEMORY_SIZE => (384*1024),
RAM_INIT_FILE => "main_ram.bin",
CLK_FREQ => 100000000,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => 0
)
port map(
rst => rst,
system_clk => clk,
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i
);

flash: entity work.s25fl128s
generic map (
TimingModel => "S25FL128SAGNFI000_R_30pF",
LongTimming => false,
tdevice_PU => 10 ns,
tdevice_PP256 => 100 ns,
tdevice_PP512 => 100 ns,
tdevice_WRR => 100 ns
)
port map(
SCK => spi_sck,
SI => fl_mosi,
CSNeg => spi_cs_n,
HOLDNeg => fl_hold_n,
WPNeg => fl_wp_n,
RSTNeg => '1',
SO => fl_miso
);

fl_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
fl_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
fl_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
fl_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else '1' when spi_sdat_oe(0) = '1' else 'Z';

spi_sdat_i(0) <= fl_mosi;
spi_sdat_i(1) <= fl_miso;
spi_sdat_i(2) <= fl_wp_n;
spi_sdat_i(3) <= fl_hold_n;
clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

rst_process: process
begin
rst <= '1';
wait for 10*clk_period;
rst <= '0';
wait;
end process;

jtag: entity work.sim_jtag;

end;

@ -1,6 +1,5 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
@ -10,40 +9,39 @@ entity core_tb is
end core_tb;

architecture behave of core_tb is
signal clk, rst: std_logic;
signal clk, rst: std_logic;

-- testbench signals
constant clk_period : time := 10 ns;
-- testbench signals
constant clk_period : time := 10 ns;
begin

soc0: entity work.soc
generic map(
SIM => true,
MEMORY_SIZE => (384*1024),
RAM_INIT_FILE => "main_ram.bin",
CLK_FREQ => 100000000
)
port map(
rst => rst,
system_clk => clk
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

rst_process: process
begin
rst <= '1';
wait for 10*clk_period;
rst <= '0';
wait;
end process;

jtag: entity work.sim_jtag;

generic map(
SIM => true,
MEMORY_SIZE => 524288,
RAM_INIT_FILE => "simple_ram_behavioural.bin",
RESET_LOW => false
)
port map(
rst => rst,
system_clk => clk,
uart0_rxd => '0',
uart0_txd => open
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

rst_process: process
begin
rst <= '1';
wait for 10*clk_period;
rst <= '0';
wait;
end process;
end;

@ -1,136 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.helpers.all;

entity bit_counter is
port (
clk : in std_logic;
rs : in std_ulogic_vector(63 downto 0);
count_right : in std_ulogic;
do_popcnt : in std_ulogic;
is_32bit : in std_ulogic;
datalen : in std_ulogic_vector(3 downto 0);
result : out std_ulogic_vector(63 downto 0)
);
end entity bit_counter;

architecture behaviour of bit_counter is
-- signals for count-leading/trailing-zeroes
signal inp : std_ulogic_vector(63 downto 0);
signal inp_r : std_ulogic_vector(63 downto 0);
signal sum : std_ulogic_vector(64 downto 0);
signal sum_r : std_ulogic_vector(64 downto 0);
signal onehot : std_ulogic_vector(63 downto 0);
signal edge : std_ulogic_vector(63 downto 0);
signal bitnum : std_ulogic_vector(5 downto 0);
signal cntz : std_ulogic_vector(63 downto 0);

-- signals for popcnt
signal dlen_r : std_ulogic_vector(3 downto 0);
signal pcnt_r : std_ulogic;
subtype twobit is unsigned(1 downto 0);
type twobit32 is array(0 to 31) of twobit;
signal pc2 : twobit32;
subtype threebit is unsigned(2 downto 0);
type threebit16 is array(0 to 15) of threebit;
signal pc4 : threebit16;
subtype fourbit is unsigned(3 downto 0);
type fourbit8 is array(0 to 7) of fourbit;
signal pc8 : fourbit8;
signal pc8_r : fourbit8;
subtype sixbit is unsigned(5 downto 0);
type sixbit2 is array(0 to 1) of sixbit;
signal pc32 : sixbit2;
signal popcnt : std_ulogic_vector(63 downto 0);

begin
countzero_r: process(clk)
begin
if rising_edge(clk) then
inp_r <= inp;
sum_r <= sum;
end if;
end process;

countzero: process(all)
variable bitnum_e, bitnum_o : std_ulogic_vector(5 downto 0);
begin
if is_32bit = '0' then
if count_right = '0' then
inp <= bit_reverse(rs);
else
inp <= rs;
end if;
else
inp(63 downto 32) <= x"FFFFFFFF";
if count_right = '0' then
inp(31 downto 0) <= bit_reverse(rs(31 downto 0));
else
inp(31 downto 0) <= rs(31 downto 0);
end if;
end if;

sum <= std_ulogic_vector(unsigned('0' & not inp) + 1);

-- The following occurs after a clock edge
edge <= sum_r(63 downto 0) or inp_r;
bitnum_e := edgelocation(edge, 6);
onehot <= sum_r(63 downto 0) and inp_r;
bitnum_o := bit_number(onehot);
bitnum(5 downto 2) <= bitnum_e(5 downto 2);
bitnum(1 downto 0) <= bitnum_o(1 downto 0);

cntz <= 57x"0" & sum_r(64) & bitnum;
end process;

popcnt_r: process(clk)
begin
if rising_edge(clk) then
for i in 0 to 7 loop
pc8_r(i) <= pc8(i);
end loop;
dlen_r <= datalen;
pcnt_r <= do_popcnt;
end if;
end process;

popcnt_a: process(all)
begin
for i in 0 to 31 loop
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
end loop;
for i in 0 to 15 loop
pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1));
end loop;
for i in 0 to 7 loop
pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1));
end loop;

-- after a clock edge
for i in 0 to 1 loop
pc32(i) <= ("00" & pc8_r(i * 4)) + ("00" & pc8_r(i * 4 + 1)) +
("00" & pc8_r(i * 4 + 2)) + ("00" & pc8_r(i * 4 + 3));
end loop;
popcnt <= (others => '0');
if dlen_r(3 downto 2) = "00" then
-- popcntb
for i in 0 to 7 loop
popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8_r(i));
end loop;
elsif dlen_r(3) = '0' then
-- popcntw
for i in 0 to 1 loop
popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i));
end loop;
else
popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1)));
end if;
end process;

result <= cntz when pcnt_r = '0' else popcnt;

end behaviour;

@ -1,118 +0,0 @@
library vunit_lib;
context vunit_lib.vunit_context;

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

library osvvm;
use osvvm.RandomPkg.all;

entity countbits_tb is
generic (runner_cfg : string := runner_cfg_default);
end countbits_tb;

architecture behave of countbits_tb is
constant clk_period: time := 10 ns;
signal rs: std_ulogic_vector(63 downto 0);
signal is_32bit, count_right: std_ulogic := '0';
signal res: std_ulogic_vector(63 downto 0);
signal clk: std_ulogic;

begin
bitcounter_0: entity work.bit_counter
port map (
clk => clk,
rs => rs,
result => res,
count_right => count_right,
is_32bit => is_32bit,
do_popcnt => '0',
datalen => "0000"
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

stim_process: process
variable r: std_ulogic_vector(63 downto 0);
variable rnd : RandomPType;
begin
rnd.InitSeed(stim_process'path_name);

test_runner_setup(runner, runner_cfg);

while test_suite loop
if run("Test with input = 0") then
rs <= (others => '0');
is_32bit <= '0';
count_right <= '0';
wait for clk_period;
check_equal(res, 16#40#, result("for cntlzd"));
count_right <= '1';
wait for clk_period;
check_equal(res, 16#40#, result("for cnttzd"));
is_32bit <= '1';
count_right <= '0';
wait for clk_period;
check_equal(res, 16#20#, result("for cntlzw"));
count_right <= '1';
wait for clk_period;
check_equal(res, 16#20#, result("for cnttzw"));

elsif run("Test cntlzd/w") then
count_right <= '0';
for j in 0 to 100 loop
r := rnd.RandSlv(64);
r(63) := '1';
for i in 0 to 63 loop
rs <= r;
is_32bit <= '0';
wait for clk_period;
check_equal(res, i, result("for cntlzd " & to_hstring(rs)));
rs <= r(31 downto 0) & r(63 downto 32);
is_32bit <= '1';
wait for clk_period;
if i < 32 then
check_equal(res, i, result("for cntlzw " & to_hstring(rs)));
else
check_equal(res, 32, result("for cntlzw " & to_hstring(rs)));
end if;
r := '0' & r(63 downto 1);
end loop;
end loop;

elsif run("Test cnttzd/w") then
count_right <= '1';
for j in 0 to 100 loop
r := rnd.RandSlv(64);
r(0) := '1';
for i in 0 to 63 loop
rs <= r;
is_32bit <= '0';
wait for clk_period;
check_equal(res, i, result("for cnttzd " & to_hstring(rs)));
is_32bit <= '1';
wait for clk_period;
if i < 32 then
check_equal(res, i, result("for cnttzw " & to_hstring(rs)));
else
check_equal(res, 32, result("for cnttzw " & to_hstring(rs)));
end if;
r := r(62 downto 0) & '0';
end loop;
end loop;
end if;
end loop;

test_runner_cleanup(runner);
end process;
end behave;

@ -6,104 +6,55 @@ library work;
use work.common.all;

entity cr_file is
generic (
SIM : boolean := false;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
port(
clk : in std_logic;
port(
clk : in std_logic;

d_in : in Decode2ToCrFileType;
d_out : out CrFileToDecode2Type;
d_in : in Decode2ToCrFileType;
d_out : out CrFileToDecode2Type;

w_in : in WritebackToCrFileType;

-- debug
sim_dump : in std_ulogic;

log_out : out std_ulogic_vector(12 downto 0)
);
w_in : in WritebackToCrFileType
);
end entity cr_file;

architecture behaviour of cr_file is
signal crs : std_ulogic_vector(31 downto 0) := (others => '0');
signal crs_updated : std_ulogic_vector(31 downto 0);
signal xerc : xer_common_t := xerc_init;
signal xerc_updated : xer_common_t;
signal crs : std_ulogic_vector(31 downto 0) := (others => '0');
signal crs_updated : std_ulogic_vector(31 downto 0) := (others => '0');
begin
cr_create_0: process(all)
variable hi, lo : integer := 0;
variable cr_tmp : std_ulogic_vector(31 downto 0) := (others => '0');
begin
cr_tmp := crs;

for i in 0 to 7 loop
if w_in.write_cr_mask(i) = '1' then
lo := i*4;
hi := lo + 3;
cr_tmp(hi downto lo) := w_in.write_cr_data(hi downto lo);
end if;
end loop;

crs_updated <= cr_tmp;

if w_in.write_xerc_enable = '1' then
xerc_updated <= w_in.write_xerc_data;
else
xerc_updated <= xerc;
end if;

end process;

-- synchronous writes
cr_write_0: process(clk)
begin
if rising_edge(clk) then
if w_in.write_cr_enable = '1' then
report "Writing " & to_hstring(w_in.write_cr_data) & " to CR mask " & to_hstring(w_in.write_cr_mask);
crs <= crs_updated;
end if;
if w_in.write_xerc_enable = '1' then
report "Writing XERC";
xerc <= xerc_updated;
end if;
end if;
end process;

-- asynchronous reads
cr_read_0: process(all)
begin
-- just return the entire CR to make mfcrf easier for now
if d_in.read = '1' then
report "Reading CR " & to_hstring(crs_updated);
end if;
d_out.read_cr_data <= crs_updated;
d_out.read_xerc_data <= xerc_updated;
end process;

sim_dump_test: if SIM generate
dump_cr: process(all)
begin
if sim_dump = '1' then
report "CR 00000000" & to_hstring(crs);
assert false report "end of test" severity failure;
end if;
end process;
end generate;

cf_log: if LOG_LENGTH > 0 generate
signal log_data : std_ulogic_vector(12 downto 0);
begin
cr_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_cr_enable &
w_in.write_cr_data(31 downto 28) &
w_in.write_cr_mask;
end if;
end process;
log_out <= log_data;
end generate;

cr_create_0: process(all)
variable hi, lo : integer := 0;
variable cr_tmp : std_ulogic_vector(31 downto 0) := (others => '0');
begin
cr_tmp := crs;

for i in 0 to 7 loop
if w_in.write_cr_mask(i) = '1' then
lo := i*4;
hi := lo + 3;
cr_tmp(hi downto lo) := w_in.write_cr_data(hi downto lo);
end if;
end loop;

crs_updated <= cr_tmp;
end process;

-- synchronous writes
cr_write_0: process(clk)
begin
if rising_edge(clk) then
if w_in.write_cr_enable = '1' then
report "Writing " & to_hstring(w_in.write_cr_data) & " to CR mask " & to_hstring(w_in.write_cr_mask);
end if;
crs <= crs_updated;
end if;
end process;

-- asynchronous reads
cr_read_0: process(all)
begin
-- just return the entire CR to make mfcrf easier for now
if d_in.read = '1' then
report "Reading CR " & to_hstring(crs_updated);
end if;
d_out.read_cr_data <= crs_updated;
end process;
end architecture behaviour;

@ -5,53 +5,130 @@ library work;
use work.common.all;

package crhelpers is
subtype crnum_t is integer range 0 to 7;
subtype crmask_t is std_ulogic_vector(7 downto 0);

function fxm_to_num(fxm: crmask_t) return crnum_t;
function num_to_fxm(num: crnum_t) return crmask_t;
function fxm_to_num(fxm: std_ulogic_vector(7 downto 0)) return integer;
function num_to_fxm(num: integer) return std_ulogic_vector;
--function from_crfile(cr: crfile) return std_ulogic_vector;
--function extract_one_crfield(cr: crfile; fxm: std_ulogic_vector(7 downto 0)) return std_ulogic_vector;
--function insert_multiple_crfields(cr_in: crfile; rs: std_ulogic_vector(63 downto 0); fxm: std_ulogic_vector(7 downto 0)) return crfile;
--function insert_one_crfield(cr_in: crfile; rs: std_ulogic_vector(63 downto 0); fxm: std_ulogic_vector(7 downto 0)) return crfile;
end package crhelpers;

package body crhelpers is

function fxm_to_num(fxm: crmask_t) return crnum_t is
begin
-- If multiple fields are set (undefined), match existing
-- hardware by returning the first one.
for i in 0 to 7 loop
-- Big endian bit numbering
if fxm(7-i) = '1' then
return i;
end if;
end loop;
function fxm_to_num(fxm: std_ulogic_vector(7 downto 0)) return integer is
begin
-- If multiple fields are set (undefined), match existing
-- hardware by returning the first one.
for i in 0 to 7 loop
-- Big endian bit numbering
if fxm(7-i) = '1' then
return i;
end if;
end loop;

-- If no fields are set (undefined), also match existing
-- hardware by returning cr7.
return 7;
end;
-- If no fields are set (undefined), also match existing
-- hardware by returning cr7.
return 7;
end;

function num_to_fxm(num: crnum_t) return crmask_t is
begin
case num is
when 0 =>
return "10000000";
when 1 =>
return "01000000";
when 2 =>
return "00100000";
when 3 =>
return "00010000";
when 4 =>
return "00001000";
when 5 =>
return "00000100";
when 6 =>
return "00000010";
when 7 =>
return "00000001";
when others =>
return "00000000";
end case;
end;
function num_to_fxm(num: integer) return std_ulogic_vector is
begin
case num is
when 0 =>
return "10000000";
when 1 =>
return "01000000";
when 2 =>
return "00100000";
when 3 =>
return "00010000";
when 4 =>
return "00001000";
when 5 =>
return "00000100";
when 6 =>
return "00000010";
when 7 =>
return "00000001";
when others =>
return "00000000";
end case;
end;

-- function from_crfile(cr: crfile) return std_ulogic_vector is
-- variable combined_cr : std_ulogic_vector(31 downto 0) := (others => '0');
-- variable high, low: integer range 0 to 31 := 0;
-- begin
-- for i in 0 to cr'length-1 loop
-- low := 4*(7-i);
-- high := low+3;
-- combined_cr(high downto low) := cr(i);
-- end loop;
--
-- return combined_cr;
-- end function;
--
-- function extract_one_crfield(cr: crfile; fxm: std_ulogic_vector(7 downto 0)) return std_ulogic_vector is
-- variable combined_cr : std_ulogic_vector(63 downto 0) := (others => '0');
-- variable crnum: integer range 0 to 7 := 0;
-- begin
-- crnum := fxm_to_num(fxm);
--
-- -- Vivado doesn't support non constant vector slice
-- -- low := 4*(7-crnum);
-- -- high := low+3;
-- -- combined_cr(high downto low) := cr(crnum);
-- case_0: case crnum is
-- when 0 =>
-- combined_cr(31 downto 28) := cr(0);
-- when 1 =>
-- combined_cr(27 downto 24) := cr(1);
-- when 2 =>
-- combined_cr(23 downto 20) := cr(2);
-- when 3 =>
-- combined_cr(19 downto 16) := cr(3);
-- when 4 =>
-- combined_cr(15 downto 12) := cr(4);
-- when 5 =>
-- combined_cr(11 downto 8) := cr(5);
-- when 6 =>
-- combined_cr(7 downto 4) := cr(6);
-- when 7 =>
-- combined_cr(3 downto 0) := cr(7);
-- end case;
--
-- return combined_cr;
-- end;
--
-- function insert_multiple_crfields(cr_in: crfile; rs: std_ulogic_vector(63 downto 0); fxm: std_ulogic_vector(7 downto 0)) return crfile is
-- variable cr : crfile;
-- variable combined_cr : std_ulogic_vector(63 downto 0) := (others => '0');
-- variable high, low: integer range 0 to 31 := 0;
-- begin
-- cr := cr_in;
--
-- for i in 0 to 7 loop
-- -- BE bit numbering
-- if fxm(7-i) = '1' then
-- low := 4*(7-i);
-- high := low+3;
-- cr(i) := rs(high downto low);
-- end if;
-- end loop;
--
-- return cr;
-- end;
--
-- function insert_one_crfield(cr_in: crfile; rs: std_ulogic_vector(63 downto 0); fxm: std_ulogic_vector(7 downto 0)) return crfile is
-- variable cr : crfile;
-- variable crnum: integer range 0 to 7 := 0;
-- variable high, low: integer range 0 to 31 := 0;
-- begin
-- cr := cr_in;
-- crnum := fxm_to_num(fxm);
-- low := 4*(7-crnum);
-- high := low+3;
-- cr(crnum) := rs(high downto low);
-- return cr;
-- end;
end package body crhelpers;

File diff suppressed because it is too large Load Diff

@ -1,138 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;

library work;
use work.common.all;
use work.wishbone_types.all;

entity dcache_tb is
end dcache_tb;

architecture behave of dcache_tb is
signal clk : std_ulogic;
signal rst : std_ulogic;

signal d_in : Loadstore1ToDcacheType;
signal d_out : DcacheToLoadstore1Type;

signal m_in : MmuToDcacheType;
signal m_out : DcacheToMmuType;

signal wb_bram_in : wishbone_master_out;
signal wb_bram_out : wishbone_slave_out;

constant clk_period : time := 10 ns;
begin
dcache0: entity work.dcache
generic map(
LINE_SIZE => 64,
NUM_LINES => 4
)
port map(
clk => clk,
rst => rst,
d_in => d_in,
d_out => d_out,
m_in => m_in,
m_out => m_out,
wishbone_out => wb_bram_in,
wishbone_in => wb_bram_out
);

-- BRAM Memory slave
bram0: entity work.wishbone_bram_wrapper
generic map(
MEMORY_SIZE => 1024,
RAM_INIT_FILE => "icache_test.bin"
)
port map(
clk => clk,
rst => rst,
wishbone_in => wb_bram_in,
wishbone_out => wb_bram_out
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

rst_process: process
begin
rst <= '1';
wait for 2*clk_period;
rst <= '0';
wait;
end process;

stim: process
begin
-- Clear stuff
d_in.valid <= '0';
d_in.load <= '0';
d_in.nc <= '0';
d_in.addr <= (others => '0');
d_in.data <= (others => '0');
m_in.valid <= '0';
m_in.addr <= (others => '0');
m_in.pte <= (others => '0');

wait for 4*clk_period;
wait until rising_edge(clk);

-- Cacheable read of address 4
d_in.load <= '1';
d_in.nc <= '0';
d_in.addr <= x"0000000000000004";
d_in.valid <= '1';
wait until rising_edge(clk);
d_in.valid <= '0';

wait until rising_edge(clk) and d_out.valid = '1';
assert d_out.data = x"0000000100000000"
report "data @" & to_hstring(d_in.addr) &
"=" & to_hstring(d_out.data) &
" expected 0000000100000000"
severity failure;
-- wait for clk_period;

-- Cacheable read of address 30
d_in.load <= '1';
d_in.nc <= '0';
d_in.addr <= x"0000000000000030";
d_in.valid <= '1';
wait until rising_edge(clk);
d_in.valid <= '0';

wait until rising_edge(clk) and d_out.valid = '1';
assert d_out.data = x"0000000D0000000C"
report "data @" & to_hstring(d_in.addr) &
"=" & to_hstring(d_out.data) &
" expected 0000000D0000000C"
severity failure;

-- Non-cacheable read of address 100
d_in.load <= '1';
d_in.nc <= '1';
d_in.addr <= x"0000000000000100";
d_in.valid <= '1';
wait until rising_edge(clk);
d_in.valid <= '0';
wait until rising_edge(clk) and d_out.valid = '1';
assert d_out.data = x"0000004100000040"
report "data @" & to_hstring(d_in.addr) &
"=" & to_hstring(d_out.data) &
" expected 0000004100000040"
severity failure;

wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);

std.env.finish;
end process;
end;

File diff suppressed because it is too large Load Diff

@ -9,575 +9,403 @@ use work.helpers.all;
use work.insn_helpers.all;

entity decode2 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
port (
clk : in std_ulogic;
rst : in std_ulogic;

complete_in : in instr_tag_t;
busy_in : in std_ulogic;
stall_out : out std_ulogic;
complete_in : in std_ulogic;
stall_out : out std_ulogic;

stopped_out : out std_ulogic;
flush_in: in std_ulogic;

flush_in: in std_ulogic;
d_in : in Decode1ToDecode2Type;

d_in : in Decode1ToDecode2Type;
e_out : out Decode2ToExecute1Type;
m_out : out Decode2ToMultiplyType;
l_out : out Decode2ToLoadstore1Type;

e_out : out Decode2ToExecute1Type;
r_in : in RegisterFileToDecode2Type;
r_out : out Decode2ToRegisterFileType;

r_in : in RegisterFileToDecode2Type;
r_out : out Decode2ToRegisterFileType;
c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType;

c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType;

execute_bypass : in bypass_data_t;
execute_cr_bypass : in cr_bypass_data_t;

log_out : out std_ulogic_vector(9 downto 0)
nia_out : out std_ulogic_vector(63 downto 2)
);
end entity decode2;

architecture behaviour of decode2 is
type reg_type is record
e : Decode2ToExecute1Type;
repeat : std_ulogic;
end record;

signal r, rin : reg_type;

signal deferred : std_ulogic;

type decode_input_reg_t is record
reg_valid : std_ulogic;
reg : gspr_index_t;
data : std_ulogic_vector(63 downto 0);
end record;

type decode_output_reg_t is record
reg_valid : std_ulogic;
reg : gspr_index_t;
end record;

function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t;
instr_addr : std_ulogic_vector(63 downto 0))
return decode_input_reg_t is
begin
if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data);
elsif t = SPR then
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
--
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
elsif t = CIA then
return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
end;

function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is
variable ret : decode_input_reg_t;
begin
case t is
when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when FRB =>
if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data);
else
ret := ('0', (others => '0'), (others => '0'));
end if;
when CONST_UI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)), 64)));
when CONST_SI_HI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)) & x"0000", 64)));
when CONST_UI_HI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_si(insn_in)) & x"0000", 64)));
when CONST_LI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_li(insn_in)) & "00", 64)));
when CONST_BD =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_bd(insn_in)) & "00", 64)));
when CONST_DS =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_ds(insn_in)) & "00", 64)));
when CONST_DQ =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_dq(insn_in)) & "0000", 64)));
when CONST_DXHI4 =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_dx(insn_in)) & x"0004", 64)));
when CONST_M1 =>
ret := ('0', (others => '0'), x"FFFFFFFFFFFFFFFF");
when CONST_SH =>
ret := ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11));
when CONST_SH32 =>
ret := ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11));
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data);
when NONE =>
ret := ('0', (others => '0'), (others => '0'));
end case;

return ret;
end;

function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
begin
case t is
when RS =>
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
when FRS =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when FRC =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
end;

function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
ispr : gspr_index_t) return decode_output_reg_t is
begin
case t is
when RT =>
return ('1', gpr_to_gspr(insn_rt(insn_in)));
when RA =>
return ('1', gpr_to_gspr(insn_ra(insn_in)));
when FRT =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)));
else
return ('0', "0000000");
end if;
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr);
when NONE =>
return ('0', "0000000");
end case;
end;

function decode_rc (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
begin
case t is
when RC =>
return insn_rc(insn_in);
when ONE =>
return '1';
when NONE =>
return '0';
end case;
end;

-- control signals that are derived from insn_type
type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);

constant result_select : mux_select_array_t := (
OP_AND => "001", -- logical_result
OP_OR => "001",
OP_XOR => "001",
OP_PRTY => "001",
OP_CMPB => "001",
OP_EXTS => "001",
OP_BPERM => "001",
OP_BCD => "001",
OP_MTSPR => "001",
OP_RLC => "010", -- rotator_result
OP_RLCL => "010",
OP_RLCR => "010",
OP_SHL => "010",
OP_SHR => "010",
OP_EXTSWSLI => "010",
OP_MUL_L64 => "011", -- muldiv_result
OP_MUL_H64 => "011",
OP_MUL_H32 => "011",
OP_DIV => "011",
OP_DIVE => "011",
OP_MOD => "011",
OP_CNTZ => "100", -- countbits_result
OP_POPCNT => "100",
OP_MFSPR => "101", -- spr_result
OP_B => "110", -- next_nia
OP_BC => "110",
OP_BCREG => "110",
OP_ADDG6S => "111", -- misc_result
OP_ISEL => "111",
OP_DARN => "111",
OP_MFMSR => "111",
OP_MFCR => "111",
OP_SETB => "111",
others => "000" -- default to adder_result
);

constant subresult_select : mux_select_array_t := (
OP_MUL_L64 => "000", -- muldiv_result
OP_MUL_H64 => "001",
OP_MUL_H32 => "010",
OP_DIV => "011",
OP_DIVE => "011",
OP_MOD => "011",
OP_ADDG6S => "001", -- misc_result
OP_ISEL => "010",
OP_DARN => "011",
OP_MFMSR => "100",
OP_MFCR => "101",
OP_SETB => "110",
OP_CMP => "000", -- cr_result
OP_CMPRB => "001",
OP_CMPEQB => "010",
OP_CROP => "011",
OP_MCRXRX => "100",
OP_MTCRF => "101",
others => "000"
);

-- issue control signals
signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic;
signal control_stall_out : std_ulogic;
signal control_sgl_pipe : std_logic;

signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t;

signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read : gspr_index_t;
signal gpr_a_bypass : std_ulogic;

signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_read_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic;

signal instr_tag : instr_tag_t;

type state_type is (IDLE, WAIT_FOR_PREV_TO_COMPLETE, WAIT_FOR_CURR_TO_COMPLETE);

type reg_internal_type is record
state : state_type;
outstanding : integer;
end record;

type reg_type is record
e : Decode2ToExecute1Type;
m : Decode2ToMultiplyType;
l : Decode2ToLoadstore1Type;
end record;

signal r_int, rin_int : reg_internal_type;
signal r, rin : reg_type;

type decode_input_reg_t is record
reg_valid : std_ulogic;
reg : std_ulogic_vector(4 downto 0);
data : std_ulogic_vector(63 downto 0);
end record;

function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
begin
case t is
when RA =>
return ('1', insn_ra(insn_in), reg_data);
when RA_OR_ZERO =>
return ('1', insn_ra(insn_in), ra_or_zero(reg_data, insn_ra(insn_in)));
when RS =>
return ('1', insn_rs(insn_in), reg_data);
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
end;

function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
begin
case t is
when RB =>
return ('1', insn_rb(insn_in), reg_data);
when RS =>
return ('1', insn_rs(insn_in), reg_data);
when CONST_UI =>
return ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI =>
return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)), 64)));
when CONST_SI_HI =>
return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)) & x"0000", 64)));
when CONST_UI_HI =>
return ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_si(insn_in)) & x"0000", 64)));
when CONST_LI =>
return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_li(insn_in)) & "00", 64)));
when CONST_BD =>
return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_bd(insn_in)) & "00", 64)));
when CONST_DS =>
return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_ds(insn_in)) & "00", 64)));
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
end;

function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
begin
case t is
when RS =>
return ('1', insn_rs(insn_in), reg_data);
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
end;

function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
begin
case t is
when RT =>
return insn_rt(insn_in);
when RA =>
return insn_ra(insn_in);
when NONE =>
return "00000";
end case;
end;

function decode_const_a (t : constant_a_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
begin
case t is
when SH =>
return "00" & insn_sh(insn_in);
when SH32 =>
return "000" & insn_sh32(insn_in);
when FXM =>
return insn_fxm(insn_in);
when BO =>
return "000" & insn_bo(insn_in);
when BF =>
return "00000" & insn_bf(insn_in);
when TOO =>
return "000" & insn_to(insn_in);
when BC =>
return "000" & insn_bc(insn_in);
when NONE =>
return "00000000";
end case;
end;

function decode_const_b (t : constant_b_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
begin
case t is
when MB =>
return insn_mb(insn_in);
when ME =>
return insn_me(insn_in);
when MB32 =>
return "0" & insn_mb32(insn_in);
when BI =>
return "0" & insn_bi(insn_in);
when L =>
return "00000" & insn_l(insn_in);
when NONE =>
return "000000";
end case;
end;

function decode_const_c (t : constant_c_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
begin
case t is
when ME32 =>
return insn_me32(insn_in);
when BH =>
return "000" & insn_bh(insn_in);
when NONE =>
return "00000";
end case;
end;

function decode_rc (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
begin
case t is
when RC =>
return insn_rc(insn_in);
when ONE =>
return '1';
when NONE =>
return '0';
end case;
end;
begin
control_0: entity work.control
generic map (
EX1_BYPASS => EX1_BYPASS
)
port map (
clk => clk,
rst => rst,

complete_in => complete_in,
valid_in => control_valid_in,
repeated => r.repeat,
busy_in => busy_in,
deferred => deferred,
flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe,
stop_mark_in => d_in.stop_mark,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write,

gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read,

gpr_b_read_valid_in => gpr_b_read_valid,
gpr_b_read_in => gpr_b_read,

gpr_c_read_valid_in => gpr_c_read_valid,
gpr_c_read_in => gpr_c_read,

execute_next_tag => execute_bypass.tag,
execute_next_cr_tag => execute_cr_bypass.tag,

cr_read_in => cr_read_valid,
cr_write_in => cr_write_valid,
cr_bypass => cr_bypass,

valid_out => control_valid_out,
stall_out => control_stall_out,
stopped_out => stopped_out,

gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass,
gpr_bypass_c => gpr_c_bypass,

instr_tag_out => instr_tag
);

deferred <= r.e.valid and busy_in;

decode2_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' or flush_in = '1' or deferred = '0' then
if rin.e.valid = '1' then
report "execute " & to_hstring(rin.e.nia);
end if;
r <= rin;
end if;
end if;
end process;

c_out.read <= d_in.decode.input_cr;

decode2_1: process(all)
variable v : reg_type;
variable mul_a : std_ulogic_vector(63 downto 0);
variable mul_b : std_ulogic_vector(63 downto 0);
variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable length : std_ulogic_vector(3 downto 0);
variable op : insn_type_t;
begin
v := r;

v.e := Decode2ToExecute1Init;

mul_a := (others => '0');
mul_b := (others => '0');

--v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;

-- Work out whether XER common bits are set
v.e.output_xer := d_in.decode.output_carry;
case d_in.decode.insn_type is
when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
-- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
v.e.oe := '1';
v.e.output_xer := '1';
end if;
when OP_MTSPR =>
if decode_spr_num(d_in.insn) = SPR_XER then
v.e.output_xer := '1';
end if;
when others =>
end case;

decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);

if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
-- b and bc have even major opcodes; bcreg is considered absolute
v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26);
end if;
op := d_in.decode.insn_type;

if d_in.decode.repeat /= NONE then
v.e.repeat := '1';
v.e.second := r.repeat;
case d_in.decode.repeat is
when DRSE =>
-- do RS|1,RS for LE; RS,RS|1 for BE
if r.repeat = d_in.big_endian then
decoded_reg_c.reg(0) := '1';
end if;
when DRTE =>
-- do RT|1,RT for LE; RT,RT|1 for BE
if r.repeat = d_in.big_endian then
decoded_reg_o.reg(0) := '1';
end if;
when DUPD =>
-- update-form loads, 2nd instruction writes RA
if r.repeat = '1' then
decoded_reg_o.reg := decoded_reg_a.reg;
end if;
when others =>
end case;
elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then
-- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled
v.e.repeat := '1';
v.e.second := r.repeat;
-- first one does CTR, second does LR
decoded_reg_o.reg(0) := not r.repeat;
end if;

r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid;
r_out.read1_reg <= decoded_reg_a.reg;
r_out.read2_enable <= decoded_reg_b.reg_valid and d_in.valid;
r_out.read2_reg <= decoded_reg_b.reg;
r_out.read3_enable <= decoded_reg_c.reg_valid and d_in.valid;
r_out.read3_reg <= decoded_reg_c.reg;

case d_in.decode.length is
when is1B =>
length := "0001";
when is2B =>
length := "0010";
when is4B =>
length := "0100";
when is8B =>
length := "1000";
when NONE =>
length := "0000";
end case;

-- execute unit
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility;
v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_reg2 := decoded_reg_b.reg;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0';
v.e.insn_type := op;
v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry;
v.e.output_carry := d_in.decode.output_carry;
v.e.is_32bit := d_in.decode.is_32bit;
v.e.is_signed := d_in.decode.is_signed;
v.e.insn := d_in.insn;
v.e.data_len := length;
v.e.byte_reverse := d_in.decode.byte_reverse;
v.e.sign_extend := d_in.decode.sign_extend;
v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve;
v.e.br_pred := d_in.br_pred;
v.e.result_sel := result_select(op);
v.e.sub_select := subresult_select(op);
if op = OP_BC or op = OP_BCREG then
if d_in.insn(23) = '0' and r.repeat = '0' and
not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
-- decrement CTR if BO(2) = 0 and not bcctr
v.e.addm1 := '1';
v.e.result_sel := "000"; -- select adder output
end if;
end if;

-- See if any of the operands can get their value via the bypass path.
case gpr_a_bypass is
when '1' =>
v.e.read_data1 := execute_bypass.data;
when others =>
v.e.read_data1 := decoded_reg_a.data;
end case;
case gpr_b_bypass is
when '1' =>
v.e.read_data2 := execute_bypass.data;
when others =>
v.e.read_data2 := decoded_reg_b.data;
end case;
case gpr_c_bypass is
when '1' =>
v.e.read_data3 := execute_bypass.data;
when others =>
v.e.read_data3 := decoded_reg_c.data;
end case;

v.e.cr := c_in.read_cr_data;
if cr_bypass = '1' then
v.e.cr := execute_cr_bypass.data;
end if;

-- issue control
control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe;

gpr_write_valid <= v.e.write_reg_enable;
gpr_write <= decoded_reg_o.reg;

gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;

gpr_b_read_valid <= decoded_reg_b.reg_valid;
gpr_b_read <= decoded_reg_b.reg;

gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= decoded_reg_c.reg;

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
-- Since ops that write CR only write some of the fields,
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or d_in.decode.input_cr;

v.e.valid := control_valid_out;
if control_valid_out = '1' then
v.repeat := v.e.repeat and not r.repeat;
end if;

stall_out <= control_stall_out or v.repeat;

if rst = '1' or flush_in = '1' then
v.e := Decode2ToExecute1Init;
v.repeat := '0';
end if;

-- Update registers
rin <= v;

-- Update outputs
e_out <= r.e;
end process;

d2_log: if LOG_LENGTH > 0 generate
signal log_data : std_ulogic_vector(9 downto 0);
begin
dec2_log : process(clk)
begin
if rising_edge(clk) then
log_data <= r.e.nia(5 downto 2) &
r.e.valid &
stopped_out &
stall_out &
gpr_a_bypass &
gpr_b_bypass &
gpr_c_bypass;
end if;
end process;
log_out <= log_data;
end generate;

decode2_0: process(clk)
begin
if rising_edge(clk) then
nia_out <= (others => '0');
if rin.e.valid = '1' or rin.l.valid = '1' or rin.m.valid = '1' then
nia_out <= rin.e.nia(63 downto 2);
report "execute " & to_hstring(rin.e.nia);
end if;
r <= rin;
r_int <= rin_int;
end if;
end process;

r_out.read1_reg <= insn_ra(d_in.insn) when (d_in.decode.input_reg_a = RA) else
insn_ra(d_in.insn) when d_in.decode.input_reg_a = RA_OR_ZERO else
insn_rs(d_in.insn) when d_in.decode.input_reg_a = RS else
(others => '0');

r_out.read2_reg <= insn_rb(d_in.insn) when d_in.decode.input_reg_b = RB else
insn_rs(d_in.insn) when d_in.decode.input_reg_b = RS else
(others => '0');

r_out.read3_reg <= insn_rs(d_in.insn) when d_in.decode.input_reg_c = RS else
(others => '0');

c_out.read <= d_in.decode.input_cr;

decode2_1: process(all)
variable v : reg_type;
variable v_int : reg_internal_type;
variable mul_a : std_ulogic_vector(63 downto 0);
variable mul_b : std_ulogic_vector(63 downto 0);
variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable is_valid : std_ulogic;
begin
v := r;
v_int := r_int;

v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;

mul_a := (others => '0');
mul_b := (others => '0');

--v.e.input_cr := d_in.decode.input_cr;
--v.m.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr;

decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);

r_out.read1_enable <= decoded_reg_a.reg_valid;
r_out.read2_enable <= decoded_reg_b.reg_valid;
r_out.read3_enable <= decoded_reg_c.reg_valid;

-- execute unit
v.e.nia := d_in.nia;
v.e.insn_type := d_in.decode.insn_type;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data;
v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data;
v.e.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.cr := c_in.read_cr_data;
v.e.input_carry := d_in.decode.input_carry;
v.e.output_carry := d_in.decode.output_carry;
if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
end if;
v.e.const1 := decode_const_a(d_in.decode.const_a, d_in.insn);
v.e.const2 := decode_const_b(d_in.decode.const_b, d_in.insn);
v.e.const3 := decode_const_c(d_in.decode.const_c, d_in.insn);

-- multiply unit
v.m.insn_type := d_in.decode.insn_type;
mul_a := decoded_reg_a.data;
mul_b := decoded_reg_b.data;
v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);

if d_in.decode.mul_32bit = '1' then
if d_in.decode.mul_signed = '1' then
v.m.data1 := (others => mul_a(31));
v.m.data1(31 downto 0) := mul_a(31 downto 0);
v.m.data2 := (others => mul_b(31));
v.m.data2(31 downto 0) := mul_b(31 downto 0);
else
v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
end if;
else
if d_in.decode.mul_signed = '1' then
v.m.data1 := mul_a(63) & mul_a;
v.m.data2 := mul_b(63) & mul_b;
else
v.m.data1 := '0' & mul_a;
v.m.data2 := '0' & mul_b;
end if;
end if;

-- load/store unit
v.l.update_reg := decoded_reg_a.reg;
v.l.addr1 := decoded_reg_a.data;
v.l.addr2 := decoded_reg_b.data;
v.l.data := decoded_reg_c.data;
v.l.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);

if d_in.decode.insn_type = OP_LOAD then
v.l.load := '1';
else
v.l.load := '0';
end if;

case d_in.decode.length is
when is1B =>
v.l.length := "0001";
when is2B =>
v.l.length := "0010";
when is4B =>
v.l.length := "0100";
when is8B =>
v.l.length := "1000";
when NONE =>
v.l.length := "0000";
end case;

v.l.byte_reverse := d_in.decode.byte_reverse;
v.l.sign_extend := d_in.decode.sign_extend;
v.l.update := d_in.decode.update;

-- single issue

if complete_in = '1' then
v_int.outstanding := v_int.outstanding - 1;
end if;

-- state machine to handle instructions that must be single
-- through the pipeline.
stall_out <= '0';
is_valid := d_in.valid;
case v_int.state is
when IDLE =>
if (flush_in = '0') and (d_in.valid = '1') and (d_in.decode.sgl_pipe = '1') then
if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_out <= '1';
is_valid := '0';
else
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
end if;

when WAIT_FOR_PREV_TO_COMPLETE =>
if v_int.outstanding = 0 then
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
else
stall_out <= '1';
is_valid := '0';
end if;

when WAIT_FOR_CURR_TO_COMPLETE =>
if v_int.outstanding = 0 then
v_int.state := IDLE;
else
stall_out <= '1';
is_valid := '0';
end if;
end case;

v.e.valid := '0';
v.m.valid := '0';
v.l.valid := '0';
case d_in.decode.unit is
when ALU =>
v.e.valid := is_valid;
when LDST =>
v.l.valid := is_valid;
when MUL =>
v.m.valid := is_valid;
when NONE =>
v.e.valid := is_valid;
v.e.insn_type := OP_ILLEGAL;
end case;

if flush_in = '1' then
v.e.valid := '0';
v.m.valid := '0';
v.l.valid := '0';
end if;

-- track outstanding instructions
if v.e.valid = '1' or v.l.valid = '1' or v.m.valid = '1' then
v_int.outstanding := v_int.outstanding + 1;
end if;

assert r_int.outstanding <= 1 report "Outstanding bad " & integer'image(r_int.outstanding) severity failure;

if rst = '1' then
v_int.state := IDLE;
v_int.outstanding := 0;
v.e := Decode2ToExecute1Init;
v.l := Decode2ToLoadStore1Init;
v.m := Decode2ToMultiplyInit;
end if;

-- Update registers
rin <= v;
rin_int <= v_int;

-- Update outputs
e_out <= r.e;
l_out <= r.l;
m_out <= r.m;
end process;
end architecture behaviour;

@ -2,103 +2,138 @@ library ieee;
use ieee.std_logic_1164.all;

package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI,
OP_FPOP, OP_FPOP_I,
OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNT, OP_PRTY, OP_RFID,
OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TLBIE, OP_TRAP,
OP_XOR,
OP_BCD, OP_ADDG6S,
OP_FETCH_FAILED
);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, SPR, FRT);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, OV, ONE);

constant SH_OFFSET : integer := 0;
constant MB_OFFSET : integer := 1;
constant ME_OFFSET : integer := 1;
constant SH32_OFFSET : integer := 0;
constant MB32_OFFSET : integer := 1;
constant ME32_OFFSET : integer := 2;

constant FXM_OFFSET : integer := 0;

constant BO_OFFSET : integer := 0;
constant BI_OFFSET : integer := 1;
constant BH_OFFSET : integer := 2;

constant BF_OFFSET : integer := 0;
constant L_OFFSET : integer := 1;

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, FPU);
type facility_t is (NONE, FPU);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type repeat_t is (NONE, -- instruction is not repeated
DRSE, -- double RS, endian twist
DRTE, -- double RT, endian twist
DUPD); -- update-form load

type decode_rom_t is record
unit : unit_t;
facility : facility_t;
insn_type : insn_type_t;
input_reg_a : input_reg_a_t;
input_reg_b : input_reg_b_t;
input_reg_c : input_reg_c_t;
output_reg_a : output_reg_a_t;

input_cr : std_ulogic;
output_cr : std_ulogic;

invert_a : std_ulogic;
invert_out : std_ulogic;
input_carry : carry_in_t;
output_carry : std_ulogic;

-- load/store signals
length : length_t;
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
reserve : std_ulogic;

-- multiplier and ALU signals
is_32bit : std_ulogic;
is_signed : std_ulogic;

rc : rc_t;
lr : std_ulogic;

sgl_pipe : std_ulogic;
repeat : repeat_t;
end record;
constant decode_rom_init : decode_rom_t := (unit => NONE, facility => NONE,
insn_type => OP_ILLEGAL, input_reg_a => NONE,
input_reg_b => NONE, input_reg_c => NONE,
output_reg_a => NONE, input_cr => '0', output_cr => '0',
invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0',
length => NONE, byte_reverse => '0', sign_extend => '0',
update => '0', reserve => '0', is_32bit => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0', repeat => NONE);
type ppc_insn_t is (PPC_ILLEGAL, PPC_ADD, PPC_ADDC, PPC_ADDE,
PPC_ADDEX, PPC_ADDI, PPC_ADDIC, PPC_ADDIC_RC, PPC_ADDIS,
PPC_ADDME, PPC_ADDPCIS, PPC_ADDZE, PPC_AND, PPC_ANDC,
PPC_ANDI_RC, PPC_ANDIS_RC, PPC_ATTN, PPC_B, PPC_BA, PPC_BC,
PPC_BCA, PPC_BCCTR, PPC_BCLA, PPC_BCLR, PPC_BCTAR, PPC_BPERM,
PPC_CMP, PPC_CMPB, PPC_CMPEQB, PPC_CMPI, PPC_CMPL, PPC_CMPLI,
PPC_CMPRB, PPC_CNTLZD, PPC_CNTLZW, PPC_CNTTZD, PPC_CNTTZW,
PPC_CRAND, PPC_CRANDC, PPC_CREQV, PPC_CRNAND, PPC_CRNOR,
PPC_CROR, PPC_CRORC, PPC_CRXOR, PPC_DARN, PPC_DCBF, PPC_DCBST,
PPC_DCBT, PPC_DCBTST, PPC_DCBZ, PPC_DIVD, PPC_DIVDE,
PPC_DIVDEU, PPC_DIVDU, PPC_DIVW, PPC_DIVWE, PPC_DIVWEU,
PPC_DIVWU, PPC_EQV, PPC_EXTSB, PPC_EXTSH, PPC_EXTSW,
PPC_EXTSWSLI, PPC_ICBI, PPC_ICBT, PPC_ISEL, PPC_ISYNC,
PPC_LBARX, PPC_LBZ, PPC_LBZU, PPC_LBZUX, PPC_LBZX, PPC_LD,
PPC_LDARX, PPC_LDBRX, PPC_LDU, PPC_LDUX, PPC_LDX, PPC_LHA,
PPC_LHARX, PPC_LHAU, PPC_LHAUX, PPC_LHAX, PPC_LHBRX, PPC_LHZ,
PPC_LHZU, PPC_LHZUX, PPC_LHZX, PPC_LWA, PPC_LWARX, PPC_LWAUX,
PPC_LWAX, PPC_LWBRX, PPC_LWZ, PPC_LWZU, PPC_LWZUX, PPC_LWZX,
PPC_MADDHD, PPC_MADDHDU, PPC_MADDLD, PPC_MCRF, PPC_MCRXR,
PPC_MCRXRX, PPC_MFCR, PPC_MFOCRF, PPC_MFSPR, PPC_MFTB,
PPC_MODSD, PPC_MODSW, PPC_MODUD, PPC_MODUW, PPC_MTCRF,
PPC_MFCTR, PPC_MTCTR, PPC_MFLR, PPC_MTLR, PPC_MTOCRF,
PPC_MTSPR, PPC_MULHD, PPC_MULHDU, PPC_MULHW, PPC_MULHWU,
PPC_MULLD, PPC_MULLI, PPC_MULLW, PPC_NAND, PPC_NEG, PPC_NOR,
PPC_OR, PPC_ORC, PPC_ORI, PPC_ORIS, PPC_POPCNTB, PPC_POPCNTD,
PPC_POPCNTW, PPC_PRTYD, PPC_PRTYW, PPC_RLDCL, PPC_RLDCR,
PPC_RLDIC, PPC_RLDICL, PPC_RLDICR, PPC_RLDIMI, PPC_RLWIMI,
PPC_RLWINM, PPC_RLWNM, PPC_SETB, PPC_SLD, PPC_SLW, PPC_SRAD,
PPC_SRADI, PPC_SRAW, PPC_SRAWI, PPC_SRD, PPC_SRW, PPC_STB,
PPC_STBCX, PPC_STBU, PPC_STBUX, PPC_STBX, PPC_STD, PPC_STDBRX,
PPC_STDCX, PPC_STDU, PPC_STDUX, PPC_STDX, PPC_STH, PPC_STHBRX,
PPC_STHCX, PPC_STHU, PPC_STHUX, PPC_STHX, PPC_STW, PPC_STWBRX,
PPC_STWCX, PPC_STWU, PPC_STWUX, PPC_STWX, PPC_SUBF, PPC_SUBFC,
PPC_SUBFE, PPC_SUBFIC, PPC_SUBFME, PPC_SUBFZE, PPC_SYNC, PPC_TD,
PPC_TDI, PPC_TW, PPC_TWI, PPC_XOR, PPC_XORI, PPC_XORIS,
PPC_SIM_CONFIG);

type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD, OP_ADDC, OP_ADDEX, OP_ADDME,
OP_ADDPCIS, OP_AND, OP_ANDC, OP_ATTN, OP_B, OP_BA, OP_BC,
OP_BCA, OP_BCCTR, OP_BCLA, OP_BCLR, OP_BCTAR, OP_BPERM, OP_CMP,
OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_CNTLZD, OP_CNTLZW, OP_CNTTZD, OP_CNTTZW, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIVD, OP_DIVDE, OP_DIVDEU, OP_DIVDU, OP_DIVW,
OP_DIVWE, OP_DIVWEU, OP_DIVWU, OP_EQV, OP_EXTSB, OP_EXTSH,
OP_EXTSW, OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFOCRF, OP_MFCTR, OP_MFLR,
OP_MFTB, OP_MFSPR, OP_MODSD, OP_MODSW, OP_MODUD, OP_MODUW,
OP_MTCRF, OP_MTOCRF, OP_MTCTR, OP_MTLR, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_NAND, OP_NEG, OP_NOR, OP_OR,
OP_ORC, OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLDCL, OP_RLDCR, OP_RLDIC, OP_RLDICL, OP_RLDICR,
OP_RLDIMI, OP_RLWIMI, OP_RLWINM, OP_RLWNM, OP_SETB, OP_SLD,
OP_SLW, OP_SRAD, OP_SRADI, OP_SRAW, OP_SRAWI, OP_SRD, OP_SRW,
OP_SUBF, OP_SUBFC, OP_SUBFME, OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG);

type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RS);
type input_reg_b_t is (NONE, RB, RS, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS);
type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA);
type constant_a_t is (NONE, SH, SH32, FXM, BO, BF, TOO, BC);
type constant_b_t is (NONE, MB, ME, MB32, BI, L);
type constant_c_t is (NONE, ME32, BH);
type rc_t is (NONE, ONE, RC);

constant SH_OFFSET : integer := 0;
constant MB_OFFSET : integer := 1;
constant ME_OFFSET : integer := 1;
constant SH32_OFFSET : integer := 0;
constant MB32_OFFSET : integer := 1;
constant ME32_OFFSET : integer := 2;

constant FXM_OFFSET : integer := 0;

constant BO_OFFSET : integer := 0;
constant BI_OFFSET : integer := 1;
constant BH_OFFSET : integer := 2;

constant BF_OFFSET : integer := 0;
constant L_OFFSET : integer := 1;

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, MUL);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record
unit : unit_t;
insn_type : insn_type_t;
input_reg_a : input_reg_a_t;
input_reg_b : input_reg_b_t;
input_reg_c : input_reg_c_t;
output_reg_a : output_reg_a_t;

const_a : constant_a_t;
const_b : constant_b_t;
const_c : constant_c_t;

input_cr : std_ulogic;
output_cr : std_ulogic;

input_carry : std_ulogic;
output_carry : std_ulogic;

-- load/store signals
length : length_t;
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
reserve : std_ulogic;

-- multiplier signals
mul_32bit : std_ulogic;
mul_signed : std_ulogic;

rc : rc_t;
lr : std_ulogic;

sgl_pipe : std_ulogic;
end record;
constant decode_rom_init : decode_rom_t := (unit => NONE,
insn_type => OP_ILLEGAL, input_reg_a => NONE,
input_reg_b => NONE, input_reg_c => NONE,
output_reg_a => NONE, const_a => NONE, const_b => NONE,
const_c => NONE, input_cr => '0', output_cr => '0',
input_carry => '0', output_carry => '0',
length => NONE, byte_reverse => '0', sign_extend => '0',
update => '0', reserve => '0', mul_32bit => '0',
mul_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0');

end decode_types;


@ -1,137 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.decode_types.all;

entity divider is
port (
clk : in std_logic;
rst : in std_logic;
d_in : in Execute1ToDividerType;
d_out : out DividerToExecute1Type
);
end entity divider;

architecture behaviour of divider is
signal dend : std_ulogic_vector(128 downto 0);
signal div : unsigned(63 downto 0);
signal quot : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
signal sresult : std_ulogic_vector(64 downto 0);
signal oresult : std_ulogic_vector(63 downto 0);
signal running : std_ulogic;
signal count : unsigned(6 downto 0);
signal neg_result : std_ulogic;
signal is_modulus : std_ulogic;
signal is_32bit : std_ulogic;
signal extended : std_ulogic;
signal is_signed : std_ulogic;
signal overflow : std_ulogic;
signal ovf32 : std_ulogic;
signal did_ovf : std_ulogic;
begin
divider_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
dend <= (others => '0');
div <= (others => '0');
quot <= (others => '0');
running <= '0';
count <= "0000000";
is_32bit <= '0';
overflow <= '0';
elsif d_in.valid = '1' then
if d_in.is_extended = '1' then
dend <= '0' & d_in.dividend & x"0000000000000000";
else
dend <= '0' & x"0000000000000000" & d_in.dividend;
end if;
div <= unsigned(d_in.divisor);
quot <= (others => '0');
neg_result <= d_in.neg_result;
is_modulus <= d_in.is_modulus;
extended <= d_in.is_extended;
is_32bit <= d_in.is_32bit;
is_signed <= d_in.is_signed;
count <= "1111111";
running <= '1';
overflow <= '0';
ovf32 <= '0';
elsif running = '1' then
if count = "0111111" then
running <= '0';
end if;
overflow <= quot(63);
if dend(128) = '1' or unsigned(dend(127 downto 64)) >= div then
ovf32 <= ovf32 or quot(31);
dend <= std_ulogic_vector(unsigned(dend(127 downto 64)) - div) &
dend(63 downto 0) & '0';
quot <= quot(62 downto 0) & '1';
count <= count + 1;
elsif dend(128 downto 57) = x"000000000000000000" and count(6 downto 3) /= "0111" then
-- consume 8 bits of zeroes in one cycle
ovf32 <= or (ovf32 & quot(31 downto 24));
dend <= dend(120 downto 0) & x"00";
quot <= quot(55 downto 0) & x"00";
count <= count + 8;
else
ovf32 <= ovf32 or quot(31);
dend <= dend(127 downto 0) & '0';
quot <= quot(62 downto 0) & '0';
count <= count + 1;
end if;
else
count <= "0000000";
end if;
end if;
end process;

divider_1: process(all)
begin
if is_modulus = '1' then
result <= dend(128 downto 65);
else
result <= quot;
end if;
if neg_result = '1' then
sresult <= std_ulogic_vector(- signed('0' & result));
else
sresult <= '0' & result;
end if;
did_ovf <= '0';
if is_32bit = '0' then
did_ovf <= overflow or (is_signed and (sresult(64) xor sresult(63)));
elsif is_signed = '1' then
if ovf32 = '1' or sresult(32) /= sresult(31) then
did_ovf <= '1';
end if;
else
did_ovf <= ovf32;
end if;
if did_ovf = '1' then
oresult <= (others => '0');
elsif (is_32bit = '1') and (is_modulus = '0') then
-- 32-bit divisions set the top 32 bits of the result to 0
oresult <= x"00000000" & sresult(31 downto 0);
else
oresult <= sresult(63 downto 0);
end if;
end process;

divider_out: process(clk)
begin
if rising_edge(clk) then
d_out.valid <= '0';
d_out.write_reg_data <= oresult;
d_out.overflow <= did_ovf;
if count = "1000000" then
d_out.valid <= '1';
end if;
end if;
end process;

end architecture behaviour;

@ -1,523 +0,0 @@
library vunit_lib;
context vunit_lib.vunit_context;

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.decode_types.all;
use work.common.all;
use work.ppc_fx_insns.all;

library osvvm;
use osvvm.RandomPkg.all;

entity divider_tb is
generic (runner_cfg : string := runner_cfg_default);
end divider_tb;

architecture behave of divider_tb is
signal clk : std_ulogic;
signal rst : std_ulogic;
constant clk_period : time := 10 ns;

signal d1 : Execute1ToDividerType;
signal d2 : DividerToExecute1Type;
begin
divider_0: entity work.divider
port map (clk => clk, rst => rst, d_in => d1, d_out => d2);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

stim_process: process
variable ra, rb, rt, behave_rt: std_ulogic_vector(63 downto 0);
variable si: std_ulogic_vector(15 downto 0);
variable d128: std_ulogic_vector(127 downto 0);
variable q128: std_ulogic_vector(127 downto 0);
variable q64: std_ulogic_vector(63 downto 0);
variable rem32: std_ulogic_vector(31 downto 0);
variable rnd : RandomPType;
begin
rnd.InitSeed(stim_process'path_name);

test_runner_setup(runner, runner_cfg);

while test_suite loop
rst <= '1';
wait for clk_period;
rst <= '0';

d1.is_signed <= '0';
d1.neg_result <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '0';
d1.valid <= '0';

if run("Test interface") then
d1.valid <= '1';
d1.dividend <= x"0000000010001000";
d1.divisor <= x"0000000000001111";

wait for clk_period;
check_false(?? d2.valid, result("for valid"));

d1.valid <= '0';

for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;

check_true(?? d2.valid, result("for valid"));
check_equal(d2.write_reg_data, 16#f001#);

wait for clk_period;
check_false(?? d2.valid, result("for valid"));

d1.valid <= '1';

wait for clk_period;
check_false(?? d2.valid, result("for valid"));

d1.valid <= '0';

for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;

check_true(?? d2.valid, result("for valid"));
check_equal(d2.write_reg_data, 16#f001#);

wait for clk_period;
check_false(?? d2.valid, result("for valid"));

elsif run("Test divd") then
divd_loop : for dlength in 1 to 8 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(signed(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" and (ra /= x"8000000000000000" or rb /= x"ffffffffffffffff") then
behave_rt := ppc_divd(ra, rb);
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divd"));
end loop;
end loop;
end loop;

elsif run("Test divdu") then
divdu_loop : for dlength in 1 to 8 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(unsigned(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(unsigned(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
behave_rt := ppc_divdu(ra, rb);
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divdu"));
end loop;
end loop;
end loop;

elsif run("Test divde") then
divde_loop : for vlength in 1 to 8 loop
for dlength in 1 to vlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(signed(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_extended <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
d128 := ra & x"0000000000000000";
q128 := std_ulogic_vector(signed(d128) / signed(rb));
if q128(127 downto 63) = x"0000000000000000" & '0' or
q128(127 downto 63) = x"ffffffffffffffff" & '1' then
behave_rt := q128(63 downto 0);
end if;
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divde"));
end loop;
end loop;
end loop;

elsif run("Test divdeu") then
divdeu_loop : for vlength in 1 to 8 loop
for dlength in 1 to vlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(unsigned(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(unsigned(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.is_extended <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if unsigned(rb) > unsigned(ra) then
d128 := ra & x"0000000000000000";
q128 := std_ulogic_vector(unsigned(d128) / unsigned(rb));
behave_rt := q128(63 downto 0);
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divdeu"));
end loop;
end loop;
end loop;

elsif run("Test divw") then
divw_loop : for dlength in 1 to 4 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(signed(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_32bit <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" and (ra /= x"ffffffff80000000" or rb /= x"ffffffffffffffff") then
behave_rt := ppc_divw(ra, rb);
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divw"));
end loop;
end loop;
end loop;

elsif run("Test divwu") then
divwu_loop : for dlength in 1 to 4 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(unsigned(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(unsigned(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.is_32bit <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
behave_rt := ppc_divwu(ra, rb);
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divwu"));
end loop;
end loop;
end loop;

elsif run("Test divwe") then
divwe_loop : for vlength in 1 to 4 loop
for dlength in 1 to vlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(signed(rnd.RandSlv(dlength * 8)), 32)) & x"00000000";
rb := std_ulogic_vector(resize(signed(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63) xor rb(63);
d1.is_32bit <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
q64 := std_ulogic_vector(signed(ra) / signed(rb));
if q64(63 downto 31) = x"00000000" & '0' or
q64(63 downto 31) = x"ffffffff" & '1' then
behave_rt := x"00000000" & q64(31 downto 0);
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divwe"));
end if;
end loop;
end loop;
end loop;

elsif run("Test divweu") then
divweu_loop : for vlength in 1 to 4 loop
for dlength in 1 to vlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(unsigned(rnd.RandSlv(dlength * 8)), 32)) & x"00000000";
rb := std_ulogic_vector(resize(unsigned(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.is_32bit <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if unsigned(rb(31 downto 0)) > unsigned(ra(63 downto 32)) then
behave_rt := std_ulogic_vector(unsigned(ra) / unsigned(rb));
end if;
check_equal(d2.write_reg_data, behave_rt, result("for divweu"));
end loop;
end loop;
end loop;

elsif run("Test modsd") then
modsd_loop : for dlength in 1 to 8 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(signed(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_modulus <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
behave_rt := std_ulogic_vector(signed(ra) rem signed(rb));
end if;
check_equal(d2.write_reg_data, behave_rt, result("for modsd"));
end loop;
end loop;
end loop;

elsif run("Test modud") then
modud_loop : for dlength in 1 to 8 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(unsigned(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(unsigned(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.is_modulus <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
behave_rt := std_ulogic_vector(unsigned(ra) rem unsigned(rb));
end if;
check_equal(d2.write_reg_data, behave_rt, result("for modud"));
end loop;
end loop;
end loop;

elsif run("Test modsw") then
modsw_loop : for dlength in 1 to 4 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(signed(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra));
d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb));
d1.is_signed <= '1';
d1.neg_result <= ra(63);
d1.is_32bit <= '1';
d1.is_modulus <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
rem32 := std_ulogic_vector(signed(ra(31 downto 0)) rem signed(rb(31 downto 0)));
if rem32(31) = '0' then
behave_rt := x"00000000" & rem32;
else
behave_rt := x"ffffffff" & rem32;
end if;
end if;
check_equal(d2.write_reg_data, behave_rt, result("for modsw"));
end loop;
end loop;
end loop;

elsif run("Test moduw") then
moduw_loop : for dlength in 1 to 4 loop
for vlength in 1 to dlength loop
for i in 0 to 100 loop
ra := std_ulogic_vector(resize(unsigned(rnd.RandSlv(dlength * 8)), 64));
rb := std_ulogic_vector(resize(unsigned(rnd.RandSlv(vlength * 8)), 64));

d1.dividend <= ra;
d1.divisor <= rb;
d1.is_32bit <= '1';
d1.is_modulus <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 66 loop
wait for clk_period;
if d2.valid = '1' then
exit;
end if;
end loop;
check_true(?? d2.valid, result("for valid"));

behave_rt := (others => '0');
if rb /= x"0000000000000000" then
behave_rt := x"00000000" & std_ulogic_vector(unsigned(ra(31 downto 0)) rem unsigned(rb(31 downto 0)));
end if;
check_equal(d2.write_reg_data(31 downto 0), behave_rt(31 downto 0), result("for moduw"));
end loop;
end loop;
end loop;
end if;
end loop;

test_runner_cleanup(runner);
end process;
end behave;

@ -1,31 +0,0 @@
-- Dummy/empty DMI interface to make toplevel happy on unsupported FPGAs

library ieee;
use ieee.std_logic_1164.all;

library work;
use work.wishbone_types.all;

entity dmi_dtm is
generic(ABITS : INTEGER:=8;
DBITS : INTEGER:=32);

port(sys_clk : in std_ulogic;
sys_reset : in std_ulogic;
dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0);
dmi_din : in std_ulogic_vector(DBITS - 1 downto 0);
dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0);
dmi_req : out std_ulogic;
dmi_wr : out std_ulogic;
dmi_ack : in std_ulogic
);
end entity dmi_dtm;

architecture behaviour of dmi_dtm is
begin
dmi_addr <= (others => '0');
dmi_dout <= (others => '0');
dmi_req <= '0';
dmi_wr <= '0';
end architecture behaviour;

@ -1,298 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.math_real.all;

library work;
use work.wishbone_types.all;

entity dmi_dtm is
generic(ABITS : INTEGER:=8;
DBITS : INTEGER:=64);

port(sys_clk : in std_ulogic;
sys_reset : in std_ulogic;
dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0);
dmi_din : in std_ulogic_vector(DBITS - 1 downto 0);
dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0);
dmi_req : out std_ulogic;
dmi_wr : out std_ulogic;
dmi_ack : in std_ulogic
-- dmi_err : in std_ulogic TODO: Add error response
);
end entity dmi_dtm;

architecture behaviour of dmi_dtm is
-- Signals coming out of the JTAGG block
signal jtag_reset_n : std_ulogic;
signal tdi : std_ulogic;
signal tdo : std_ulogic;
signal tck : std_ulogic;
signal jce1 : std_ulogic;
signal jshift : std_ulogic;
signal update : std_ulogic;

-- signals to match dmi_dtb_xilinx
signal jtag_reset : std_ulogic;
signal capture : std_ulogic;
signal jtag_clk : std_ulogic;
signal sel : std_ulogic;
signal shift : std_ulogic;

-- delays
signal jce1_d : std_ulogic;
constant TCK_DELAY : INTEGER := 8;
signal tck_d : std_ulogic_vector(TCK_DELAY+1 downto 1);

-- ** JTAG clock domain **

-- Shift register
signal shiftr : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- Latched request
signal request : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- A request is present
signal jtag_req : std_ulogic;

-- Synchronizer for jtag_rsp (sys clk -> jtag_clk)
signal dmi_ack_0 : std_ulogic;
signal dmi_ack_1 : std_ulogic;

-- ** sys clock domain **

-- Synchronizer for jtag_req (jtag clk -> sys clk)
signal jtag_req_0 : std_ulogic;
signal jtag_req_1 : std_ulogic;

-- ** combination signals
signal jtag_bsy : std_ulogic;
signal op_valid : std_ulogic;
signal rsp_op : std_ulogic_vector(1 downto 0);

-- ** Constants **
constant DMI_REQ_NOP : std_ulogic_vector(1 downto 0) := "00";
constant DMI_REQ_RD : std_ulogic_vector(1 downto 0) := "01";
constant DMI_REQ_WR : std_ulogic_vector(1 downto 0) := "10";
constant DMI_RSP_OK : std_ulogic_vector(1 downto 0) := "00";
constant DMI_RSP_BSY : std_ulogic_vector(1 downto 0) := "11";

attribute ASYNC_REG : string;
attribute ASYNC_REG of jtag_req_0: signal is "TRUE";
attribute ASYNC_REG of jtag_req_1: signal is "TRUE";
attribute ASYNC_REG of dmi_ack_0: signal is "TRUE";
attribute ASYNC_REG of dmi_ack_1: signal is "TRUE";

-- ECP5 JTAGG
component JTAGG is
generic (
ER1 : string := "ENABLED";
ER2 : string := "ENABLED"
);
port(
JTDO1 : in std_ulogic;
JTDO2 : in std_ulogic;
JTDI : out std_ulogic;
JTCK : out std_ulogic;
JRTI1 : out std_ulogic;
JRTI2 : out std_ulogic;
JSHIFT : out std_ulogic;
JUPDATE : out std_ulogic;
JRSTN : out std_ulogic;
JCE1 : out std_ulogic;
JCE2 : out std_ulogic
);
end component;

component LUT4 is
generic (
INIT : std_logic_vector
);
port(
A : in STD_ULOGIC;
B : in STD_ULOGIC;
C : in STD_ULOGIC;
D : in STD_ULOGIC;
Z : out STD_ULOGIC
);
end component;

begin

jtag: JTAGG
generic map(
ER2 => "DISABLED"
)
port map (
JTDO1 => tdo,
JTDO2 => '0',
JTDI => tdi,
JTCK => tck,
JRTI1 => open,
JRTI2 => open,
JSHIFT => jshift,
JUPDATE => update,
JRSTN => jtag_reset_n,
JCE1 => jce1,
JCE2 => open
);

-- JRTI1 looks like it could be connected to SEL, but
-- in practise JRTI1 is only high briefly, not for the duration
-- of the transmission. possibly mw_debug could be modified.
-- The ecp5 is probably the only jtag device anyway.
sel <= '1';

-- TDI needs to align with TCK, we use LUT delays here.
-- From https://github.com/enjoy-digital/litex/pull/1087
tck_d(1) <= tck;
del: for i in 1 to TCK_DELAY generate
attribute keep : boolean;
attribute keep of l: label is true;
begin
l: LUT4
generic map(
INIT => b"0000_0000_0000_0010"
)
port map (
A => tck_d(i),
B => '0', C => '0', D => '0',
Z => tck_d(i+1)
);
end generate;
jtag_clk <= tck_d(TCK_DELAY+1);

-- capture signal
jce1_sync : process(jtag_clk)
begin
if rising_edge(jtag_clk) then
jce1_d <= jce1;
capture <= jce1 and not jce1_d;
end if;
end process;

-- latch the shift signal, otherwise
-- we miss the last shift in
-- (maybe because we are delaying tck?)
shift_sync : process(jtag_clk)
begin
if (sys_reset = '1') then
shift <= '0';
elsif rising_edge(jtag_clk) then
shift <= jshift;
end if;
end process;

jtag_reset <= not jtag_reset_n;

-- dmi_req synchronization
dmi_req_sync : process(sys_clk)
begin
-- sys_reset is synchronous
if rising_edge(sys_clk) then
if (sys_reset = '1') then
jtag_req_0 <= '0';
jtag_req_1 <= '0';
else
jtag_req_0 <= jtag_req;
jtag_req_1 <= jtag_req_0;
end if;
end if;
end process;
dmi_req <= jtag_req_1;

-- dmi_ack synchronization
dmi_ack_sync: process(jtag_clk, jtag_reset)
begin
-- jtag_reset is async (see comments)
if jtag_reset = '1' then
dmi_ack_0 <= '0';
dmi_ack_1 <= '0';
elsif rising_edge(jtag_clk) then
dmi_ack_0 <= dmi_ack;
dmi_ack_1 <= dmi_ack_0;
end if;
end process;
-- jtag_bsy indicates whether we can start a new request, we can when
-- we aren't already processing one (jtag_req) and the synchronized ack
-- of the previous one is 0.
--
jtag_bsy <= jtag_req or dmi_ack_1;

-- decode request type in shift register
with shiftr(1 downto 0) select op_valid <=
'1' when DMI_REQ_RD,
'1' when DMI_REQ_WR,
'0' when others;

-- encode response op
rsp_op <= DMI_RSP_BSY when jtag_bsy = '1' else DMI_RSP_OK;

-- Some DMI out signals are directly driven from the request register
dmi_addr <= request(ABITS + DBITS + 1 downto DBITS + 2);
dmi_dout <= request(DBITS + 1 downto 2);
dmi_wr <= '1' when request(1 downto 0) = DMI_REQ_WR else '0';

-- TDO is wired to shift register bit 0
tdo <= shiftr(0);

-- Main state machine. Handles shift registers, request latch and
-- jtag_req latch. Could be split into 3 processes but it's probably
-- not worthwhile.
--
shifter: process(jtag_clk, jtag_reset, sys_reset)
begin
if jtag_reset = '1' or sys_reset = '1' then
shiftr <= (others => '0');
jtag_req <= '0';
request <= (others => '0');
elsif rising_edge(jtag_clk) then

-- Handle jtag "commands" when sel is 1
if sel = '1' then
-- Shift state, rotate the register
if shift = '1' then
shiftr <= tdi & shiftr(ABITS + DBITS + 1 downto 1);
end if;

-- Update state (trigger)
--
-- Latch the request if we aren't already processing one and
-- it has a valid command opcode.
--
if update = '1' and op_valid = '1' then
if jtag_bsy = '0' then
request <= shiftr;
jtag_req <= '1';
end if;
-- Set the shift register "op" to "busy". This will prevent
-- us from re-starting the command on the next update if
-- the command completes before that.
shiftr(1 downto 0) <= DMI_RSP_BSY;
end if;

-- Request completion.
--
-- Capture the response data for reads and clear request flag.
--
-- Note: We clear req (and thus dmi_req) here which relies on tck
-- ticking and sel set. This means we are stuck with dmi_req up if
-- the jtag interface stops. Slaves must be resilient to this.
--
if jtag_req = '1' and dmi_ack_1 = '1' then
jtag_req <= '0';
if request(1 downto 0) = DMI_REQ_RD then
request(DBITS + 1 downto 2) <= dmi_din;
end if;
end if;

-- Capture state, grab latch content with updated status
if capture = '1' then
shiftr <= request(ABITS + DBITS + 1 downto 2) & rsp_op;
end if;

end if;
end if;
end process;
end architecture behaviour;

@ -1,250 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;

library unisim;
use unisim.vcomponents.all;

entity dmi_dtm_tb is
end dmi_dtm_tb;

architecture behave of dmi_dtm_tb is
signal clk : std_ulogic;
signal rst : std_ulogic;
constant clk_period : time := 10 ns;
constant jclk_period : time := 30 ns;

-- DMI debug bus signals
signal dmi_addr : std_ulogic_vector(7 downto 0);
signal dmi_din : std_ulogic_vector(63 downto 0);
signal dmi_dout : std_ulogic_vector(63 downto 0);
signal dmi_req : std_ulogic;
signal dmi_wr : std_ulogic;
signal dmi_ack : std_ulogic;

-- Global JTAG signals (used by BSCANE2 inside dmi_dtm
alias j : glob_jtag_t is glob_jtag;

-- Wishbone interfaces
signal wishbone_ram_in : wishbone_slave_out;
signal wishbone_ram_out : wishbone_master_out;

begin
dtm: entity work.dmi_dtm
generic map(
ABITS => 8,
DBITS => 64
)
port map(
sys_clk => clk,
sys_reset => rst,
dmi_addr => dmi_addr,
dmi_din => dmi_din,
dmi_dout => dmi_dout,
dmi_req => dmi_req,
dmi_wr => dmi_wr,
dmi_ack => dmi_ack
);

simple_ram_0: entity work.wishbone_bram_wrapper
generic map(RAM_INIT_FILE => "main_ram.bin",
MEMORY_SIZE => 524288)
port map(clk => clk, rst => rst,
wishbone_in => wishbone_ram_out,
wishbone_out => wishbone_ram_in);

wishbone_debug_0: entity work.wishbone_debug_master
port map(clk => clk, rst => rst,
dmi_addr => dmi_addr(1 downto 0),
dmi_dout => dmi_din,
dmi_din => dmi_dout,
dmi_wr => dmi_wr,
dmi_ack => dmi_ack,
dmi_req => dmi_req,
wb_in => wishbone_ram_in,
wb_out => wishbone_ram_out);

-- system clock
sys_clk: process
begin
clk <= '1';
wait for clk_period / 2;
clk <= '0';
wait for clk_period / 2;
end process sys_clk;

-- system sim: just reset and wait
sys_sim: process
begin
rst <= '1';
wait for clk_period;
rst <= '0';
wait;
end process;

-- jtag sim process
sim_jtag: process
procedure clock(count: in INTEGER) is
begin
for i in 1 to count loop
j.tck <= '0';
wait for jclk_period/2;
j.tck <= '1';
wait for jclk_period/2;
end loop;
end procedure clock;

procedure shift_out(val: in std_ulogic_vector) is
begin
for i in 0 to val'length-1 loop
j.tdi <= val(i);
clock(1);
end loop;
end procedure shift_out;

procedure shift_in(val: out std_ulogic_vector) is
begin
for i in val'length-1 downto 0 loop
val := j.tdo & val(val'length-1 downto 1);
clock(1);
end loop;
end procedure shift_in;

procedure send_command(
addr : in std_ulogic_vector(7 downto 0);
data : in std_ulogic_vector(63 downto 0);
op : in std_ulogic_vector(1 downto 0)) is
begin
j.capture <= '1';
clock(1);
j.capture <= '0';
clock(1);
j.shift <= '1';
shift_out(op);
shift_out(data);
shift_out(addr);
j.shift <= '0';
j.update <= '1';
clock(1);
j.update <= '0';
clock(1);
end procedure send_command;

procedure read_resp(
op : out std_ulogic_vector(1 downto 0);
data : out std_ulogic_vector(63 downto 0)) is

variable addr : std_ulogic_vector(7 downto 0);
begin
j.capture <= '1';
clock(1);
j.capture <= '0';
clock(1);
j.shift <= '1';
shift_in(op);
shift_in(data);
shift_in(addr);
j.shift <= '0';
j.update <= '1';
clock(1);
j.update <= '0';
clock(1);
end procedure read_resp;

procedure dmi_write(addr : in std_ulogic_vector(7 downto 0);
data : in std_ulogic_vector(63 downto 0)) is
variable resp_op : std_ulogic_vector(1 downto 0);
variable resp_data : std_ulogic_vector(63 downto 0);
variable timeout : integer;
begin
send_command(addr, data, "10");
loop
read_resp(resp_op, resp_data);
case resp_op is
when "00" =>
return;
when "11" =>
timeout := timeout + 1;
assert timeout < 0
report "dmi_write timed out !" severity error;
when others =>
assert 0 > 1 report "dmi_write got odd status: " &
to_hstring(resp_op) severity error;
end case;
end loop;
end procedure dmi_write;

procedure dmi_read(addr : in std_ulogic_vector(7 downto 0);
data : out std_ulogic_vector(63 downto 0)) is
variable resp_op : std_ulogic_vector(1 downto 0);
variable timeout : integer;
begin
send_command(addr, (others => '0'), "01");
loop
read_resp(resp_op, data);
case resp_op is
when "00" =>
return;
when "11" =>
timeout := timeout + 1;
assert timeout < 0
report "dmi_read timed out !" severity error;
when others =>
assert 0 > 1 report "dmi_read got odd status: " &
to_hstring(resp_op) severity error;
end case;
end loop;
end procedure dmi_read;

variable data : std_ulogic_vector(63 downto 0);
begin
-- init & reset
j.reset <= '1';
j.sel <= "0000";
j.capture <= '0';
j.update <= '0';
j.shift <= '0';
j.tdi <= '0';
j.tms <= '0';
j.runtest <= '0';
clock(5);
j.reset <= '0';
clock(5);

-- select chain 2
j.sel <= "0010";
clock(1);

-- send command
dmi_read(x"00", data);
report "Read addr reg:" & to_hstring(data);
report "Writing addr reg to all 1's";
dmi_write(x"00", (others => '1'));
dmi_read(x"00", data);
report "Read addr reg:" & to_hstring(data);

report "Writing ctrl reg to all 1's";
dmi_write(x"02", (others => '1'));
dmi_read(x"02", data);
report "Read ctrl reg:" & to_hstring(data);

report "Read memory at 0...\n";
dmi_write(x"00", x"0000000000000000");
dmi_write(x"02", x"00000000000007ff");
dmi_read(x"01", data);
report "00:" & to_hstring(data);
dmi_read(x"01", data);
report "08:" & to_hstring(data);
dmi_read(x"01", data);
report "10:" & to_hstring(data);
dmi_read(x"01", data);
report "18:" & to_hstring(data);
clock(10);
std.env.finish;
end process;
end behave;

@ -1,280 +0,0 @@
-- Xilinx internal JTAG to DMI interface
--
-- DMI bus
--
-- req : ____/------------\_____
-- addr: xxxx< >xxxxx
-- dout: xxxx< >xxxxx
-- wr : xxxx< >xxxxx
-- din : xxxxxxxxxxxx< >xxx
-- ack : ____________/------\___
--
-- * addr/dout set along with req, can be latched on same cycle by slave
-- * ack & din remain up until req is dropped by master, the slave must
-- provide a stable output on din on reads during that time.
-- * req remains low at until at least one sysclk after ack seen down.
--
-- JTAG (tck) DMI (sys_clk)
--
-- * jtag_req = 1
-- (jtag_req_0) *
-- (jtag_req_1) -> * dmi_req = 1 >
-- *.../...
-- * dmi_ack = 1 <
-- * (dmi_ack_0)
-- * <- (dmi_ack_1)
-- * jtag_req = 0 (and latch dmi_din)
-- (jtag_req_0) *
-- (jtag_req_1) -> * dmi_req = 0 >
-- * dmi_ack = 0 <
-- * (dmi_ack_0)
-- * <- (dmi_ack_1)
--
-- jtag_req can go back to 1 when jtag_rsp_1 is 0
--
-- Questions/TODO:
-- - I use 2 flip fops for sync, is that enough ?
-- - I treat the jtag_reset as an async reset, is that necessary ?
-- - Dbl check reset situation since we have two different resets
-- each only resetting part of the logic...
-- - Look at optionally removing the synchronizer on the ack path,
-- assuming JTAG is always slow enough that ack will have been
-- stable long enough by the time CAPTURE comes in.
-- - We could avoid the latched request by not shifting while a
-- request is in progress (and force TDO to 1 to return a busy
-- status).
--
-- WARNING: This isn't the real DMI JTAG protocol (at least not yet).
-- a command while busy will be ignored. A response of "11"
-- means the previous command is still going, try again.
-- As such We don't implement the DMI "error" status, and
-- we don't implement DTMCS yet... This may still all change
-- but for now it's easier that way as the real DMI protocol
-- requires for a command to work properly that enough TCK
-- are sent while IDLE and I'm having trouble getting that
-- working with UrJtag and the Xilinx BSCAN2 for now.

library ieee;
use ieee.std_logic_1164.all;
use ieee.math_real.all;

library work;
use work.wishbone_types.all;

library unisim;
use unisim.vcomponents.all;

entity dmi_dtm is
generic(ABITS : INTEGER:=8;
DBITS : INTEGER:=32);

port(sys_clk : in std_ulogic;
sys_reset : in std_ulogic;
dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0);
dmi_din : in std_ulogic_vector(DBITS - 1 downto 0);
dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0);
dmi_req : out std_ulogic;
dmi_wr : out std_ulogic;
dmi_ack : in std_ulogic
-- dmi_err : in std_ulogic TODO: Add error response
);
end entity dmi_dtm;

architecture behaviour of dmi_dtm is

-- Signals coming out of the BSCANE2 block
signal jtag_reset : std_ulogic;
signal capture : std_ulogic;
signal update : std_ulogic;
signal drck : std_ulogic;
signal jtag_clk : std_ulogic;
signal sel : std_ulogic;
signal shift : std_ulogic;
signal tdi : std_ulogic;
signal tdo : std_ulogic;
signal tck : std_ulogic;

-- ** JTAG clock domain **

-- Shift register
signal shiftr : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- Latched request
signal request : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- A request is present
signal jtag_req : std_ulogic;

-- Synchronizer for jtag_rsp (sys clk -> jtag_clk)
signal dmi_ack_0 : std_ulogic;
signal dmi_ack_1 : std_ulogic;

-- ** sys clock domain **

-- Synchronizer for jtag_req (jtag clk -> sys clk)
signal jtag_req_0 : std_ulogic;
signal jtag_req_1 : std_ulogic;

-- ** combination signals
signal jtag_bsy : std_ulogic;
signal op_valid : std_ulogic;
signal rsp_op : std_ulogic_vector(1 downto 0);

-- ** Constants **
constant DMI_REQ_NOP : std_ulogic_vector(1 downto 0) := "00";
constant DMI_REQ_RD : std_ulogic_vector(1 downto 0) := "01";
constant DMI_REQ_WR : std_ulogic_vector(1 downto 0) := "10";
constant DMI_RSP_OK : std_ulogic_vector(1 downto 0) := "00";
constant DMI_RSP_BSY : std_ulogic_vector(1 downto 0) := "11";

attribute ASYNC_REG : string;
attribute ASYNC_REG of jtag_req_0: signal is "TRUE";
attribute ASYNC_REG of jtag_req_1: signal is "TRUE";
attribute ASYNC_REG of dmi_ack_0: signal is "TRUE";
attribute ASYNC_REG of dmi_ack_1: signal is "TRUE";
begin

-- Implement the Xilinx bscan2 for series 7 devices (TODO: use PoC to
-- wrap this if compatibility is required with older devices).
bscan : BSCANE2
generic map (
JTAG_CHAIN => 2
)
port map (
CAPTURE => capture,
DRCK => drck,
RESET => jtag_reset,
RUNTEST => open,
SEL => sel,
SHIFT => shift,
TCK => tck,
TDI => tdi,
TMS => open,
UPDATE => update,
TDO => tdo
);

-- Some examples out there suggest buffering the clock so it's
-- treated as a proper clock net. This is probably needed when using
-- drck (the gated clock) but I'm using the real tck here to avoid
-- missing the update phase so maybe not...
--
clkbuf : BUFG
port map (
-- I => drck,
I => tck,
O => jtag_clk
);

-- dmi_req synchronization
dmi_req_sync : process(sys_clk)
begin
-- sys_reset is synchronous
if rising_edge(sys_clk) then
if (sys_reset = '1') then
jtag_req_0 <= '0';
jtag_req_1 <= '0';
else
jtag_req_0 <= jtag_req;
jtag_req_1 <= jtag_req_0;
end if;
end if;
end process;
dmi_req <= jtag_req_1;

-- dmi_ack synchronization
dmi_ack_sync: process(jtag_clk, jtag_reset)
begin
-- jtag_reset is async (see comments)
if jtag_reset = '1' then
dmi_ack_0 <= '0';
dmi_ack_1 <= '0';
elsif rising_edge(jtag_clk) then
dmi_ack_0 <= dmi_ack;
dmi_ack_1 <= dmi_ack_0;
end if;
end process;
-- jtag_bsy indicates whether we can start a new request, we can when
-- we aren't already processing one (jtag_req) and the synchronized ack
-- of the previous one is 0.
--
jtag_bsy <= jtag_req or dmi_ack_1;

-- decode request type in shift register
with shiftr(1 downto 0) select op_valid <=
'1' when DMI_REQ_RD,
'1' when DMI_REQ_WR,
'0' when others;

-- encode response op
rsp_op <= DMI_RSP_BSY when jtag_bsy = '1' else DMI_RSP_OK;

-- Some DMI out signals are directly driven from the request register
dmi_addr <= request(ABITS + DBITS + 1 downto DBITS + 2);
dmi_dout <= request(DBITS + 1 downto 2);
dmi_wr <= '1' when request(1 downto 0) = DMI_REQ_WR else '0';

-- TDO is wired to shift register bit 0
tdo <= shiftr(0);

-- Main state machine. Handles shift registers, request latch and
-- jtag_req latch. Could be split into 3 processes but it's probably
-- not worthwhile.
--
shifter: process(jtag_clk, jtag_reset, sys_reset)
begin
if jtag_reset = '1' or sys_reset = '1' then
shiftr <= (others => '0');
jtag_req <= '0';
request <= (others => '0');
elsif rising_edge(jtag_clk) then

-- Handle jtag "commands" when sel is 1
if sel = '1' then
-- Shift state, rotate the register
if shift = '1' then
shiftr <= tdi & shiftr(ABITS + DBITS + 1 downto 1);
end if;

-- Update state (trigger)
--
-- Latch the request if we aren't already processing one and
-- it has a valid command opcode.
--
if update = '1' and op_valid = '1' then
if jtag_bsy = '0' then
request <= shiftr;
jtag_req <= '1';
end if;
-- Set the shift register "op" to "busy". This will prevent
-- us from re-starting the command on the next update if
-- the command completes before that.
shiftr(1 downto 0) <= DMI_RSP_BSY;
end if;

-- Request completion.
--
-- Capture the response data for reads and clear request flag.
--
-- Note: We clear req (and thus dmi_req) here which relies on tck
-- ticking and sel set. This means we are stuck with dmi_req up if
-- the jtag interface stops. Slaves must be resilient to this.
--
if jtag_req = '1' and dmi_ack_1 = '1' then
jtag_req <= '0';
if request(1 downto 0) = DMI_REQ_RD then
request(DBITS + 1 downto 2) <= dmi_din;
end if;
end if;

-- Capture state, grab latch content with updated status
if capture = '1' then
shiftr <= request(ABITS + DBITS + 1 downto 2) & rsp_op;
end if;

end if;
end if;
end process;
end architecture behaviour;

@ -1,338 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;

entity dram_tb is
generic (
DRAM_INIT_FILE : string := "";
DRAM_INIT_SIZE : natural := 0
);
end dram_tb;

architecture behave of dram_tb is
signal clk, rst: std_logic;
signal clk_in, soc_rst : std_ulogic;

-- testbench signals
constant clk_period : time := 10 ns;

-- Sim DRAM
signal wb_in : wishbone_master_out;
signal wb_out : wishbone_slave_out;
signal wb_ctrl_in : wb_io_master_out;

subtype addr_t is std_ulogic_vector(wb_in.adr'left downto 0);
subtype data_t is std_ulogic_vector(wb_in.dat'left downto 0);
subtype sel_t is std_ulogic_vector(wb_in.sel'left downto 0);

-- Counter for acks
signal acks : integer := 0;
signal reset_acks : std_ulogic;

-- Read data fifo
signal rd_ready : std_ulogic := '0';
signal rd_valid : std_ulogic;
signal rd_data : data_t;
begin

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 24,
DRAM_ALINES => 1,
DRAM_DLINES => 16,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 128,
PAYLOAD_FILE => DRAM_INIT_FILE,
PAYLOAD_SIZE => DRAM_INIT_SIZE
)
port map(
clk_in => clk_in,
rst => rst,
system_clk => clk,
system_reset => soc_rst,
core_alt_reset => open,
pll_locked => open,

wb_in => wb_in,
wb_out => wb_out,
wb_ctrl_in => wb_ctrl_in,
wb_ctrl_out => open,
wb_ctrl_is_csr => '0',
wb_ctrl_is_init => '0',

init_done => open,
init_error => open,

ddram_a => open,
ddram_ba => open,
ddram_ras_n => open,
ddram_cas_n => open,
ddram_we_n => open,
ddram_cs_n => open,
ddram_dm => open,
ddram_dq => open,
ddram_dqs_p => open,
ddram_dqs_n => open,
ddram_clk_p => open,
ddram_clk_n => open,
ddram_cke => open,
ddram_odt => open,
ddram_reset_n => open
);

clk_process: process
begin
clk_in <= '0';
wait for clk_period/2;
clk_in <= '1';
wait for clk_period/2;
end process;

rst_process: process
begin
rst <= '1';
wait for 10*clk_period;
rst <= '0';
wait;
end process;

wb_ctrl_in.cyc <= '0';
wb_ctrl_in.stb <= '0';

-- Read data receive queue
data_queue: entity work.sync_fifo
generic map (
DEPTH => 16,
WIDTH => rd_data'length
)
port map (
clk => clk,
reset => soc_rst or reset_acks,
rd_ready => rd_ready,
rd_valid => rd_valid,
rd_data => rd_data,
wr_ready => open,
wr_valid => wb_out.ack,
wr_data => wb_out.dat
);

recv_acks: process(clk)
begin
if rising_edge(clk) then
if rst = '1' or reset_acks = '1' then
acks <= 0;
elsif wb_out.ack = '1' then
acks <= acks + 1;
-- report "WB ACK ! DATA=" & to_hstring(wb_out.dat);
end if;
end if;
end process;

sim: process
procedure wb_write(addr: addr_t; data: data_t; sel: sel_t) is
begin
wb_in.adr <= addr;
wb_in.sel <= sel;
wb_in.dat <= data;
wb_in.we <= '1';
wb_in.stb <= '1';
wb_in.cyc <= '1';
loop
wait until rising_edge(clk);
if wb_out.stall = '0' then
wb_in.stb <= '0';
exit;
end if;
end loop;
end procedure;

procedure wb_read(addr: addr_t) is
begin
wb_in.adr <= addr;
wb_in.sel <= x"ff";
wb_in.we <= '0';
wb_in.stb <= '1';
wb_in.cyc <= '1';
loop
wait until rising_edge(clk);
if wb_out.stall = '0' then
wb_in.stb <= '0';
exit;
end if;
end loop;
end procedure;

procedure wait_acks(count: integer) is
begin
wait until acks = count;
wait until rising_edge(clk);
end procedure;

procedure clr_acks is
begin
reset_acks <= '1';
wait until rising_edge(clk);
reset_acks <= '0';
end procedure;

procedure read_data(data: out data_t) is
begin
assert rd_valid = '1' report "No data to read" severity failure;
rd_ready <= '1';
wait until rising_edge(clk);
rd_ready <= '0';
data := rd_data;
end procedure;

function add_off(a: addr_t; off: integer) return addr_t is
begin
return addr_t(unsigned(a) + off);
end function;

function make_pattern(num : integer) return data_t is
variable r : data_t;
variable t,b : integer;
begin
for i in 0 to (data_t'length/8)-1 loop
t := (i+1)*8-1;
b := i*8;
r(t downto b) := std_ulogic_vector(to_unsigned(num+1, 8));
end loop;
return r;
end function;

procedure check_data(p: data_t) is
variable d : data_t;
begin
read_data(d);
assert d = p report "bad data, want " & to_hstring(p) &
" got " & to_hstring(d) severity failure;
end procedure;

variable a : addr_t := (others => '0');
variable d : data_t := (others => '0');
variable d1 : data_t := (others => '0');
begin
reset_acks <= '0';
rst <= '1';
wait until rising_edge(clk_in);
wait until rising_edge(clk_in);
wait until rising_edge(clk_in);
wait until rising_edge(clk_in);
wait until rising_edge(clk_in);
rst <= '0';
wait until rising_edge(clk_in);
wait until soc_rst = '0';
wait until rising_edge(clk);

report "Simple write miss...";
clr_acks;
wb_write(a, x"0123456789abcdef", x"ff");
wait_acks(1);

report "Simple read miss...";
clr_acks;
wb_read(a);
wait_acks(1);
read_data(d);
assert d = x"0123456789abcdef" report "bad data, got " & to_hstring(d) severity failure;

report "Simple read hit...";
clr_acks;
wb_read(a);
wait_acks(1);
read_data(d);
assert d = x"0123456789abcdef" report "bad data, got " & to_hstring(d) severity failure;

report "Back to back 4 stores 4 reads on hit...";
clr_acks;
for i in 0 to 3 loop
wb_write(add_off(a, i), make_pattern(i), x"ff");
end loop;
for i in 0 to 3 loop
wb_read(add_off(a, i));
end loop;
wait_acks(8);
for i in 0 to 7 loop
if i < 4 then
read_data(d);
else
check_data(make_pattern(i-4));
end if;
end loop;

report "Back to back 4 stores 4 reads on miss...";
a(10) := '1';
clr_acks;
for i in 0 to 3 loop
wb_write(add_off(a, i), make_pattern(i), x"ff");
end loop;
for i in 0 to 3 loop
wb_read(add_off(a, i));
end loop;
wait_acks(8);
for i in 0 to 7 loop
if i < 4 then
read_data(d);
else
check_data(make_pattern(i-4));
end if;
end loop;

report "Back to back interleaved 4 stores 4 reads on hit...";
a(10) := '1';
clr_acks;
for i in 0 to 3 loop
wb_write(add_off(a, i), make_pattern(i), x"ff");
wb_read(add_off(a, i));
end loop;
wait_acks(8);
for i in 0 to 3 loop
read_data(d);
check_data(make_pattern(i));
end loop;

report "Pre-fill a line";
a(11) := '1';
clr_acks;
wb_write(add_off(a, 0), x"1111111100000000", x"ff");
wb_write(add_off(a, 1), x"3333333322222222", x"ff");
wb_write(add_off(a, 2), x"5555555544444444", x"ff");
wb_write(add_off(a, 3), x"7777777766666666", x"ff");
wb_write(add_off(a, 4), x"9999999988888888", x"ff");
wb_write(add_off(a, 5), x"bbbbbbbbaaaaaaaa", x"ff");
wb_write(add_off(a, 6), x"ddddddddcccccccc", x"ff");
wb_write(add_off(a, 7), x"ffffffffeeeeeeee", x"ff");
wb_write(add_off(a, 8), x"1111111100000000", x"ff");
wb_write(add_off(a, 9), x"3333333322222222", x"ff");
wb_write(add_off(a, 10), x"5555555544444444", x"ff");
wb_write(add_off(a, 11), x"7777777766666666", x"ff");
wb_write(add_off(a, 12), x"9999999988888888", x"ff");
wb_write(add_off(a, 13), x"bbbbbbbbaaaaaaaa", x"ff");
wb_write(add_off(a, 14), x"ddddddddcccccccc", x"ff");
wb_write(add_off(a, 15), x"ffffffffeeeeeeee", x"ff");
wait_acks(16);

report "Scattered from middle of line...";
clr_acks;
wb_read(add_off(a, 3));
wb_read(add_off(a, 4));
wb_read(add_off(a, 0));
wb_read(add_off(a, 2));
wait_acks(4);
read_data(d);
assert d = x"7777777766666666" report "bad data (24), got " & to_hstring(d) severity failure;
read_data(d);
assert d = x"9999999988888888" report "bad data (32), got " & to_hstring(d) severity failure;
read_data(d);
assert d = x"1111111100000000" report "bad data (0), got " & to_hstring(d) severity failure;
read_data(d);
assert d = x"5555555544444444" report "bad data (16), got " & to_hstring(d) severity failure;

std.env.finish;
end process;
end architecture;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,57 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.crhelpers.all;
use work.ppc_fx_insns.all;

-- 2 cycle ALU
-- We handle rc form instructions here

entity execute2 is
port (
clk : in std_ulogic;

e_in : in Execute1ToExecute2Type;
e_out : out Execute2ToWritebackType
);
end execute2;

architecture behave of execute2 is
signal r, rin : Execute2ToWritebackType;
begin
execute2_0: process(clk)
begin
if rising_edge(clk) then
r <= rin;
end if;
end process;

execute2_1: process(all)
variable v : Execute2ToWritebackType;
begin
v := rin;

v.valid := e_in.valid;
v.write_enable := e_in.write_enable;
v.write_reg := e_in.write_reg;
v.write_data := e_in.write_data;
v.write_cr_enable := e_in.write_cr_enable;
v.write_cr_mask := e_in.write_cr_mask;
v.write_cr_data := e_in.write_cr_data;

if e_in.valid = '1' and e_in.rc = '1' then
v.write_cr_enable := '1';
v.write_cr_mask := num_to_fxm(0);
v.write_cr_data := ppc_cmpi('1', e_in.write_data, x"0000") & x"0000000";
end if;

-- Update registers
rin <= v;

-- Update outputs
e_out <= r;
end process;
end;

@ -6,204 +6,68 @@ library work;
use work.common.all;

entity fetch1 is
generic(
RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
HAS_BTC : boolean := true
generic(
RESET_ADDRESS : std_logic_vector(63 downto 0)
);
port(
clk : in std_ulogic;
rst : in std_ulogic;
port(
clk : in std_ulogic;
rst : in std_ulogic;

-- Control inputs:
stall_in : in std_ulogic;
flush_in : in std_ulogic;
inval_btc : in std_ulogic;
stop_in : in std_ulogic;
alt_reset_in : in std_ulogic;
-- Control inputs:
stall_in : in std_ulogic;
flush_in : in std_ulogic;

-- redirect from writeback unit
w_in : in WritebackToFetch1Type;
-- redirect from execution unit
e_in : in Execute1ToFetch1Type;

-- redirect from decode1
d_in : in Decode1ToFetch1Type;

-- Request to icache
i_out : out Fetch1ToIcacheType;

-- outputs to logger
log_out : out std_ulogic_vector(42 downto 0)
-- fetch data out
f_out : out Fetch1ToFetch2Type
);
end entity fetch1;

architecture behaviour of fetch1 is
type reg_internal_t is record
mode_32bit: std_ulogic;
rd_is_niap4: std_ulogic;
predicted_taken: std_ulogic;
pred_not_taken: std_ulogic;
predicted_nia: std_ulogic_vector(63 downto 0);
end record;
signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t;
signal advance_nia : std_ulogic;
signal log_nia : std_ulogic_vector(42 downto 0);

constant BTC_ADDR_BITS : integer := 10;
constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
constant BTC_TARGET_BITS : integer := 62;
constant BTC_SIZE : integer := 2 ** BTC_ADDR_BITS;
constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS + 1;
type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0);

signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
signal btc_rd_valid : std_ulogic := '0';

type reg_internal_type is record
nia_next : std_ulogic_vector(63 downto 0);
end record;
signal r_int, rin_int : reg_internal_type;
signal r, rin : Fetch1ToFetch2Type;
begin

regs : process(clk)
begin
if rising_edge(clk) then
log_nia <= r.nia(63) & r.nia(43 downto 2);
if r /= r_next then
report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(r_next.virt_mode) &
" P:" & std_ulogic'image(r_next.priv_mode) &
" E:" & std_ulogic'image(r_next.big_endian) &
" 32:" & std_ulogic'image(r_next_int.mode_32bit) &
" R:" & std_ulogic'image(w_in.redirect) & std_ulogic'image(d_in.redirect) &
" S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) &
" nia:" & to_hstring(r_next.nia);
end if;
if rst = '1' or w_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then
r.virt_mode <= r_next.virt_mode;
r.priv_mode <= r_next.priv_mode;
r.big_endian <= r_next.big_endian;
r_int.mode_32bit <= r_next_int.mode_32bit;
end if;
if advance_nia = '1' then
r.predicted <= r_next.predicted;
r.pred_ntaken <= r_next.pred_ntaken;
r.nia <= r_next.nia;
r_int.predicted_taken <= r_next_int.predicted_taken;
r_int.pred_not_taken <= r_next_int.pred_not_taken;
r_int.predicted_nia <= r_next_int.predicted_nia;
r_int.rd_is_niap4 <= r_next_int.rd_is_niap4;
end if;
-- always send the up-to-date stop mark and req
r.stop_mark <= stop_in;
r.req <= not rst;
end if;
end process;
log_out <= log_nia;

btc : if HAS_BTC generate
signal btc_memory : btc_mem_type;
attribute ram_style : string;
attribute ram_style of btc_memory : signal is "block";

signal btc_valids : std_ulogic_vector(BTC_SIZE - 1 downto 0);
attribute ram_style of btc_valids : signal is "distributed";

signal btc_wr : std_ulogic;
signal btc_wr_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0);
signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0);
begin
btc_wr_data <= w_in.br_taken &
w_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
w_in.redirect_nia(63 downto 2);
btc_wr_addr <= w_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
btc_wr <= w_in.br_last;

btc_ram : process(clk)
variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
begin
if rising_edge(clk) then
raddr := unsigned(r.nia(BTC_ADDR_BITS + 1 downto 2)) +
to_unsigned(2, BTC_ADDR_BITS);
if advance_nia = '1' then
btc_rd_data <= btc_memory(to_integer(raddr));
btc_rd_valid <= btc_valids(to_integer(raddr));
end if;
if btc_wr = '1' then
btc_memory(to_integer(unsigned(btc_wr_addr))) <= btc_wr_data;
end if;
if inval_btc = '1' or rst = '1' then
btc_valids <= (others => '0');
elsif btc_wr = '1' then
btc_valids(to_integer(unsigned(btc_wr_addr))) <= '1';
end if;
end if;
end process;
end generate;

comb : process(all)
variable v : Fetch1ToIcacheType;
variable v_int : reg_internal_t;
begin
v := r;
v_int := r_int;
v.predicted := '0';
v.pred_ntaken := '0';
v_int.predicted_taken := '0';
v_int.pred_not_taken := '0';
v_int.rd_is_niap4 := '0';

if rst = '1' then
if alt_reset_in = '1' then
v.nia := ALT_RESET_ADDRESS;
else
v.nia := RESET_ADDRESS;
end if;
v.virt_mode := '0';
v.priv_mode := '1';
v.big_endian := '0';
v_int.mode_32bit := '0';
v_int.predicted_nia := (others => '0');
elsif w_in.redirect = '1' then
v.nia := w_in.redirect_nia(63 downto 2) & "00";
if w_in.mode_32bit = '1' then
v.nia(63 downto 32) := (others => '0');
end if;
v.virt_mode := w_in.virt_mode;
v.priv_mode := w_in.priv_mode;
v.big_endian := w_in.big_endian;
v_int.mode_32bit := w_in.mode_32bit;
elsif d_in.redirect = '1' then
v.nia := d_in.redirect_nia(63 downto 2) & "00";
if r_int.mode_32bit = '1' then
v.nia(63 downto 32) := (others => '0');
end if;
elsif r_int.predicted_taken = '1' then
v.nia := r_int.predicted_nia;
v.predicted := '1';
else
v_int.rd_is_niap4 := '1';
v.pred_ntaken := r_int.pred_not_taken;
v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
if r_int.mode_32bit = '1' then
v.nia(63 downto 32) := x"00000000";
end if;
if btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
btc_rd_data(BTC_WIDTH - 2 downto BTC_TARGET_BITS)
= v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
v_int.predicted_taken := btc_rd_data(BTC_WIDTH - 1);
v_int.pred_not_taken := not btc_rd_data(BTC_WIDTH - 1);
end if;
end if;
v_int.predicted_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";

-- If the last NIA value went down with a stop mark, it didn't get
-- executed, and hence we shouldn't increment NIA.
advance_nia <= rst or w_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in);

r_next <= v;
r_next_int <= v_int;

-- Update outputs to the icache
i_out <= r;

end process;
regs : process(clk)
begin
if rising_edge(clk) then
r <= rin;
r_int <= rin_int;
end if;
end process;

comb : process(all)
variable v : Fetch1ToFetch2Type;
variable v_int : reg_internal_type;
begin
v := r;
v_int := r_int;

if stall_in = '0' then
v.nia := r_int.nia_next;
v_int.nia_next := std_logic_vector(unsigned(r_int.nia_next) + 4);
end if;

if e_in.redirect = '1' then
v.nia := e_in.redirect_nia;
v_int.nia_next := std_logic_vector(unsigned(e_in.redirect_nia) + 4);
end if;

if rst = '1' then
v.nia := RESET_ADDRESS;
v_int.nia_next := std_logic_vector(unsigned(RESET_ADDRESS) + 4);
end if;

-- Update registers
rin <= v;
rin_int <= v_int;

-- Update outputs
f_out <= r;
end process;

end architecture behaviour;

@ -0,0 +1,65 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;

entity fetch2 is
port(
clk : in std_ulogic;
rst : in std_ulogic;

stall_in : in std_ulogic;
stall_out : out std_ulogic;

flush_in : in std_ulogic;

i_in : in IcacheToFetch2Type;
i_out : out Fetch2ToIcacheType;

f_in : in Fetch1ToFetch2Type;

f_out : out Fetch2ToDecode1Type
);
end entity fetch2;

architecture behaviour of fetch2 is
signal r, rin : Fetch2ToDecode1Type;
begin
regs : process(clk)
begin
if rising_edge(clk) then
-- Output state remains unchanged on stall, unless we are flushing
if rst = '1' or flush_in = '1' or stall_in = '0' then
r <= rin;
end if;
end if;
end process;

comb : process(all)
variable v : Fetch2ToDecode1Type;
begin
v := r;

-- asynchronous icache lookup
i_out.req <= '1';
i_out.addr <= f_in.nia;
v.valid := i_in.ack;
v.nia := f_in.nia;
v.insn := i_in.insn;
stall_out <= not i_in.ack;


if flush_in = '1' then
v.valid := '0';
end if;

-- Update registers
rin <= v;

-- Update outputs
f_out <= r;
end process;
end architecture behaviour;

@ -1,30 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.glibc_random.all;

entity random is
port (
clk : in std_ulogic;
data : out std_ulogic_vector(63 downto 0);
raw : out std_ulogic_vector(63 downto 0);
err : out std_ulogic
);
end entity random;

architecture behaviour of random is
begin
err <= '0';

process(clk)
variable rand : std_ulogic_vector(63 downto 0);
begin
if rising_edge(clk) then
rand := pseudorand(64);
data <= rand;
raw <= rand;
end if;
end process;
end behaviour;

@ -1,338 +0,0 @@
################################################################################
# clkin, reset, uart pins...
################################################################################
# clk200:0.p
set_property LOC J19 [get_ports {clk200_p}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {clk200_p}]

# clk200:0.n
set_property LOC H19 [get_ports {clk200_n}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {clk200_n}]

################################################################################
# P2 header used as UART
################################################################################

#set_property -dict { PACKAGE_PIN H5 IOSTANDARD LVCMOS33 } [get_ports { p2_io1_n }];
#set_property -dict { PACKAGE_PIN J5 IOSTANDARD LVCMOS33 } [get_ports { p2_io1_p }];
# AIO2_N
set_property -dict { PACKAGE_PIN J2 IOSTANDARD LVCMOS33 } [get_ports { uart_tx }];
# AIO2_P
set_property -dict { PACKAGE_PIN K2 IOSTANDARD LVCMOS33 } [get_ports { uart_rx }];

################################################################################
# DRAM
################################################################################

# ddram:0.a
set_property LOC M15 [get_ports {ddram_a[0]}]
set_property SLEW FAST [get_ports {ddram_a[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[0]}]

# ddram:0.a
set_property LOC L21 [get_ports {ddram_a[1]}]
set_property SLEW FAST [get_ports {ddram_a[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[1]}]

# ddram:0.a
set_property LOC M16 [get_ports {ddram_a[2]}]
set_property SLEW FAST [get_ports {ddram_a[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[2]}]

# ddram:0.a
set_property LOC L18 [get_ports {ddram_a[3]}]
set_property SLEW FAST [get_ports {ddram_a[3]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[3]}]

# ddram:0.a
set_property LOC K21 [get_ports {ddram_a[4]}]
set_property SLEW FAST [get_ports {ddram_a[4]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[4]}]

# ddram:0.a
set_property LOC M18 [get_ports {ddram_a[5]}]
set_property SLEW FAST [get_ports {ddram_a[5]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[5]}]

# ddram:0.a
set_property LOC M21 [get_ports {ddram_a[6]}]
set_property SLEW FAST [get_ports {ddram_a[6]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[6]}]

# ddram:0.a
set_property LOC N20 [get_ports {ddram_a[7]}]
set_property SLEW FAST [get_ports {ddram_a[7]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[7]}]

# ddram:0.a
set_property LOC M20 [get_ports {ddram_a[8]}]
set_property SLEW FAST [get_ports {ddram_a[8]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[8]}]

# ddram:0.a
set_property LOC N19 [get_ports {ddram_a[9]}]
set_property SLEW FAST [get_ports {ddram_a[9]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[9]}]

# ddram:0.a
set_property LOC J21 [get_ports {ddram_a[10]}]
set_property SLEW FAST [get_ports {ddram_a[10]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[10]}]

# ddram:0.a
set_property LOC M22 [get_ports {ddram_a[11]}]
set_property SLEW FAST [get_ports {ddram_a[11]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[11]}]

# ddram:0.a
set_property LOC K22 [get_ports {ddram_a[12]}]
set_property SLEW FAST [get_ports {ddram_a[12]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[12]}]

# ddram:0.a
set_property LOC N18 [get_ports {ddram_a[13]}]
set_property SLEW FAST [get_ports {ddram_a[13]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[13]}]

# ddram:0.a
set_property LOC N22 [get_ports {ddram_a[14]}]
set_property SLEW FAST [get_ports {ddram_a[14]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[14]}]

# ddram:0.a
set_property LOC J22 [get_ports {ddram_a[15]}]
set_property SLEW FAST [get_ports {ddram_a[15]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[15]}]

# ddram:0.ba
set_property LOC L19 [get_ports {ddram_ba[0]}]
set_property SLEW FAST [get_ports {ddram_ba[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[0]}]

# ddram:0.ba
set_property LOC J20 [get_ports {ddram_ba[1]}]
set_property SLEW FAST [get_ports {ddram_ba[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[1]}]

# ddram:0.ba
set_property LOC L20 [get_ports {ddram_ba[2]}]
set_property SLEW FAST [get_ports {ddram_ba[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[2]}]

# ddram:0.ras_n
set_property LOC H20 [get_ports {ddram_ras_n}]
set_property SLEW FAST [get_ports {ddram_ras_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ras_n}]

# ddram:0.cas_n
set_property LOC K18 [get_ports {ddram_cas_n}]
set_property SLEW FAST [get_ports {ddram_cas_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_cas_n}]

# ddram:0.we_n
set_property LOC L16 [get_ports {ddram_we_n}]
set_property SLEW FAST [get_ports {ddram_we_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_we_n}]

# ddram:0.dm
set_property LOC A19 [get_ports {ddram_dm[0]}]
set_property SLEW FAST [get_ports {ddram_dm[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[0]}]

# ddram:0.dm
set_property LOC G22 [get_ports {ddram_dm[1]}]
set_property SLEW FAST [get_ports {ddram_dm[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[1]}]

# ddram:0.dq
set_property LOC D19 [get_ports {ddram_dq[0]}]
set_property SLEW FAST [get_ports {ddram_dq[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[0]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[0]}]

# ddram:0.dq
set_property LOC B20 [get_ports {ddram_dq[1]}]
set_property SLEW FAST [get_ports {ddram_dq[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[1]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[1]}]

# ddram:0.dq
set_property LOC E19 [get_ports {ddram_dq[2]}]
set_property SLEW FAST [get_ports {ddram_dq[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[2]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[2]}]

# ddram:0.dq
set_property LOC A20 [get_ports {ddram_dq[3]}]
set_property SLEW FAST [get_ports {ddram_dq[3]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[3]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[3]}]

# ddram:0.dq
set_property LOC F19 [get_ports {ddram_dq[4]}]
set_property SLEW FAST [get_ports {ddram_dq[4]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[4]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[4]}]

# ddram:0.dq
set_property LOC C19 [get_ports {ddram_dq[5]}]
set_property SLEW FAST [get_ports {ddram_dq[5]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[5]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[5]}]

# ddram:0.dq
set_property LOC F20 [get_ports {ddram_dq[6]}]
set_property SLEW FAST [get_ports {ddram_dq[6]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[6]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[6]}]

# ddram:0.dq
set_property LOC C18 [get_ports {ddram_dq[7]}]
set_property SLEW FAST [get_ports {ddram_dq[7]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[7]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[7]}]

# ddram:0.dq
set_property LOC E22 [get_ports {ddram_dq[8]}]
set_property SLEW FAST [get_ports {ddram_dq[8]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[8]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[8]}]

# ddram:0.dq
set_property LOC G21 [get_ports {ddram_dq[9]}]
set_property SLEW FAST [get_ports {ddram_dq[9]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[9]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[9]}]

# ddram:0.dq
set_property LOC D20 [get_ports {ddram_dq[10]}]
set_property SLEW FAST [get_ports {ddram_dq[10]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[10]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[10]}]

# ddram:0.dq
set_property LOC E21 [get_ports {ddram_dq[11]}]
set_property SLEW FAST [get_ports {ddram_dq[11]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[11]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[11]}]

# ddram:0.dq
set_property LOC C22 [get_ports {ddram_dq[12]}]
set_property SLEW FAST [get_ports {ddram_dq[12]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[12]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[12]}]

# ddram:0.dq
set_property LOC D21 [get_ports {ddram_dq[13]}]
set_property SLEW FAST [get_ports {ddram_dq[13]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[13]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[13]}]

# ddram:0.dq
set_property LOC B22 [get_ports {ddram_dq[14]}]
set_property SLEW FAST [get_ports {ddram_dq[14]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[14]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[14]}]

# ddram:0.dq
set_property LOC D22 [get_ports {ddram_dq[15]}]
set_property SLEW FAST [get_ports {ddram_dq[15]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[15]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[15]}]

# ddram:0.dqs_p
set_property LOC F18 [get_ports {ddram_dqs_p[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[0]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[0]}]

# ddram:0.dqs_p
set_property LOC B21 [get_ports {ddram_dqs_p[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[1]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[1]}]

# ddram:0.dqs_n
set_property LOC E18 [get_ports {ddram_dqs_n[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[0]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[0]}]

# ddram:0.dqs_n
set_property LOC A21 [get_ports {ddram_dqs_n[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[1]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[1]}]

# ddram:0.clk_p
set_property LOC K17 [get_ports {ddram_clk_p}]
set_property SLEW FAST [get_ports {ddram_clk_p}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_clk_p}]

# ddram:0.clk_n
set_property LOC J17 [get_ports {ddram_clk_n}]
set_property SLEW FAST [get_ports {ddram_clk_n}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_clk_n}]

# ddram:0.cke
set_property LOC H22 [get_ports {ddram_cke}]
set_property SLEW FAST [get_ports {ddram_cke}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_cke}]

# ddram:0.odt
set_property LOC K19 [get_ports {ddram_odt}]
set_property SLEW FAST [get_ports {ddram_odt}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_odt}]

# ddram:0.reset_n
set_property LOC K16 [get_ports {ddram_reset_n}]
set_property SLEW FAST [get_ports {ddram_reset_n}]
set_property IOSTANDARD LVCMOS15 [get_ports {ddram_reset_n}]

################################################################################
# LEDs
################################################################################

set_property -dict { PACKAGE_PIN G3 IOSTANDARD LVCMOS33 } [get_ports { led0 }];
set_property -dict { PACKAGE_PIN H3 IOSTANDARD LVCMOS33 } [get_ports { led1 }];
set_property -dict { PACKAGE_PIN G4 IOSTANDARD LVCMOS33 } [get_ports { led2 }];
set_property -dict { PACKAGE_PIN H4 IOSTANDARD LVCMOS33 } [get_ports { led3 }];

###############################################################################
# SPI Flash
###############################################################################

set_property -dict { PACKAGE_PIN T19 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_cs_n }];
set_property -dict { PACKAGE_PIN P22 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_mosi }];
set_property -dict { PACKAGE_PIN R22 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_miso }];
set_property -dict { PACKAGE_PIN P21 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_wp_n }];
set_property -dict { PACKAGE_PIN R21 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_hold_n }];


################################################################################
# Design constraints
################################################################################

set_property INTERNAL_VREF 0.750 [get_iobanks 34]
set_property CONFIG_MODE SPIx4 [current_design]
set_property BITSTREAM.CONFIG.SPI_BUSWIDTH 4 [current_design]
set_property BITSTREAM.CONFIG.OVERTEMPPOWERDOWN ENABLE [current_design]
set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property CFGBVS VCCO [current_design]
set_property BITSTREAM.CONFIG.SPI_FALL_EDGE YES [current_design]
set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design]
set_property BITSTREAM.CONFIG.EXTMASTERCCLK_EN Div-1 [current_design]

################################################################################
# Clock constraints
################################################################################


create_clock -name clk200_p -period 5.0 [get_nets clk200_p]

################################################################################
# False path constraints
################################################################################


set_false_path -quiet -through [get_nets -hierarchical -filter {mr_ff == TRUE}]

set_false_path -quiet -to [get_pins -filter {REF_PIN_NAME == PRE} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE || ars_ff2 == TRUE}]]

set_max_delay 2 -quiet -from [get_pins -filter {REF_PIN_NAME == C} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE}]] -to [get_pins -filter {REF_PIN_NAME == D} -of_objects [get_cells -hierarchical -filter {ars_ff2 == TRUE}]]

@ -0,0 +1,10 @@
set_property -dict { PACKAGE_PIN E3 IOSTANDARD LVCMOS33 } [get_ports { ext_clk }];
create_clock -add -name sys_clk_pin -period 10.00 -waveform {0 5} [get_ports { ext_clk }];

set_property -dict { PACKAGE_PIN C2 IOSTANDARD LVCMOS33 } [get_ports { ext_rst }];

set_property -dict { PACKAGE_PIN D10 IOSTANDARD LVCMOS33 } [get_ports { uart0_txd }];
set_property -dict { PACKAGE_PIN A9 IOSTANDARD LVCMOS33 } [get_ports { uart0_rxd }];

set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property CFGBVS VCCO [current_design]

@ -1,554 +0,0 @@
################################################################################
# clkin, reset, uart pins...
################################################################################

set_property -dict { PACKAGE_PIN E3 IOSTANDARD LVCMOS33 } [get_ports { ext_clk }];

set_property -dict { PACKAGE_PIN C2 IOSTANDARD LVCMOS33 } [get_ports { ext_rst_n }];

set_property -dict { PACKAGE_PIN D10 IOSTANDARD LVCMOS33 } [get_ports { uart_main_tx }];
set_property -dict { PACKAGE_PIN A9 IOSTANDARD LVCMOS33 } [get_ports { uart_main_rx }];

################################################################################
# RGB LEDs
################################################################################

set_property -dict { PACKAGE_PIN E1 IOSTANDARD LVCMOS33 } [get_ports { led0_b }];
set_property -dict { PACKAGE_PIN F6 IOSTANDARD LVCMOS33 } [get_ports { led0_g }];
set_property -dict { PACKAGE_PIN G6 IOSTANDARD LVCMOS33 } [get_ports { led0_r }];
#set_property -dict { PACKAGE_PIN G4 IOSTANDARD LVCMOS33 } [get_ports { led1_b }];
#set_property -dict { PACKAGE_PIN J4 IOSTANDARD LVCMOS33 } [get_ports { led1_g }];
#set_property -dict { PACKAGE_PIN G3 IOSTANDARD LVCMOS33 } [get_ports { led1_r }];
#set_property -dict { PACKAGE_PIN H4 IOSTANDARD LVCMOS33 } [get_ports { led2_b }];
#set_property -dict { PACKAGE_PIN J2 IOSTANDARD LVCMOS33 } [get_ports { led2_g }];
#set_property -dict { PACKAGE_PIN J3 IOSTANDARD LVCMOS33 } [get_ports { led2_r }];
#set_property -dict { PACKAGE_PIN K2 IOSTANDARD LVCMOS33 } [get_ports { led3_b }];
#set_property -dict { PACKAGE_PIN H6 IOSTANDARD LVCMOS33 } [get_ports { led3_g }];
#set_property -dict { PACKAGE_PIN K1 IOSTANDARD LVCMOS33 } [get_ports { led3_r }];

################################################################################
# Normal LEDs
################################################################################

set_property -dict { PACKAGE_PIN H5 IOSTANDARD LVCMOS33 } [get_ports { led4 }];
set_property -dict { PACKAGE_PIN J5 IOSTANDARD LVCMOS33 } [get_ports { led5 }];
set_property -dict { PACKAGE_PIN T9 IOSTANDARD LVCMOS33 } [get_ports { led6 }];
set_property -dict { PACKAGE_PIN T10 IOSTANDARD LVCMOS33 } [get_ports { led7 }];

################################################################################
# SPI Flash
################################################################################

set_property -dict { PACKAGE_PIN L13 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_cs_n }];
set_property -dict { PACKAGE_PIN L16 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_clk }];
set_property -dict { PACKAGE_PIN K17 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_mosi }];
set_property -dict { PACKAGE_PIN K18 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_miso }];
set_property -dict { PACKAGE_PIN L14 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_wp_n }];
set_property -dict { PACKAGE_PIN M14 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_hold_n }];

# Put registers into IOBs to improve timing
set_property IOB true [get_cells -hierarchical -filter {NAME =~*/spi_rxtx/*sck_1*}]
set_property IOB true [get_cells -hierarchical -filter {NAME =~*/spi_rxtx/input_delay_1.dat_i_l*}]

################################################################################
# PMOD header JA (standard, 200 ohm protection resisters)
################################################################################

#set_property -dict { PACKAGE_PIN G13 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_1 }];
#set_property -dict { PACKAGE_PIN B11 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_2 }];
#set_property -dict { PACKAGE_PIN A11 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_3 }];
#set_property -dict { PACKAGE_PIN D12 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_4 }];
#set_property -dict { PACKAGE_PIN D13 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_7 }];
#set_property -dict { PACKAGE_PIN B18 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_8 }];
#set_property -dict { PACKAGE_PIN A18 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_9 }];
#set_property -dict { PACKAGE_PIN K16 IOSTANDARD LVCMOS33 } [get_ports { pmod_ja_10 }];

# connection to Digilent PmodSD on JA
set_property -dict { PACKAGE_PIN G13 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[3] }];
set_property -dict { PACKAGE_PIN B11 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_cmd }];
set_property -dict { PACKAGE_PIN A11 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[0] }];
set_property -dict { PACKAGE_PIN D12 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_clk }];
set_property -dict { PACKAGE_PIN D13 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[1] }];
set_property -dict { PACKAGE_PIN B18 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[2] }];
set_property -dict { PACKAGE_PIN A18 IOSTANDARD LVCMOS33 } [get_ports { sdcard_cd }];
#set_property -dict { PACKAGE_PIN K16 IOSTANDARD LVCMOS33 } [get_ports { sdcard_wp }];

# Put registers into IOBs to improve timing
set_property IOB true [get_cells -hierarchical -filter {NAME =~*.litesdcard/sdcard_*}]

################################################################################
# PMOD header JB (high-speed, no protection resisters)
################################################################################

#set_property -dict { PACKAGE_PIN E15 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_1 }];
#set_property -dict { PACKAGE_PIN E16 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_2 }];
#set_property -dict { PACKAGE_PIN D15 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_3 }];
#set_property -dict { PACKAGE_PIN C15 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_4 }];
#set_property -dict { PACKAGE_PIN J17 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_7 }];
#set_property -dict { PACKAGE_PIN J18 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_8 }];
#set_property -dict { PACKAGE_PIN K15 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_9 }];
#set_property -dict { PACKAGE_PIN J15 IOSTANDARD LVCMOS33 } [get_ports { pmod_jb_10 }];

# connection to Digilent PmodSD on JB
#set_property -dict { PACKAGE_PIN E15 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[3] }];
#set_property -dict { PACKAGE_PIN E16 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_cmd }];
#set_property -dict { PACKAGE_PIN D15 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[0] }];
#set_property -dict { PACKAGE_PIN C15 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_clk }];
#set_property -dict { PACKAGE_PIN J17 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[1] }];
#set_property -dict { PACKAGE_PIN J18 IOSTANDARD LVCMOS33 SLEW FAST PULLUP TRUE } [get_ports { sdcard_data[2] }];
#set_property -dict { PACKAGE_PIN K15 IOSTANDARD LVCMOS33 } [get_ports { sdcard_cd }];
#set_property -dict { PACKAGE_PIN J15 IOSTANDARD LVCMOS33 } [get_ports { sdcard_wp }];

################################################################################
# PMOD header JC (high-speed, no protection resisters)
################################################################################

#set_property -dict { PACKAGE_PIN U12 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_1 }];
#set_property -dict { PACKAGE_PIN V12 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_2 }];
#set_property -dict { PACKAGE_PIN V10 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_3 }];
#set_property -dict { PACKAGE_PIN V11 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_4 }];
#set_property -dict { PACKAGE_PIN U14 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_7 }];
#set_property -dict { PACKAGE_PIN V14 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_8 }];
#set_property -dict { PACKAGE_PIN T13 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_9 }];
#set_property -dict { PACKAGE_PIN U13 IOSTANDARD LVCMOS33 } [get_ports { pmod_jc_10 }];

################################################################################
# PMOD header JD (standard, 200 ohm protection resisters)
################################################################################

#set_property -dict { PACKAGE_PIN D4 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_1 }];
#set_property -dict { PACKAGE_PIN D3 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_2 }];
#set_property -dict { PACKAGE_PIN F4 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_3 }];
#set_property -dict { PACKAGE_PIN F3 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_4 }];
#set_property -dict { PACKAGE_PIN E2 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_7 }];
#set_property -dict { PACKAGE_PIN D2 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_8 }];
#set_property -dict { PACKAGE_PIN H2 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_9 }];
#set_property -dict { PACKAGE_PIN G2 IOSTANDARD LVCMOS33 } [get_ports { pmod_jd_10 }];

################################################################################
# Arduino/chipKIT shield connector
################################################################################

set_property -dict { PACKAGE_PIN V15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[0] }];
set_property -dict { PACKAGE_PIN U16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[1] }];
set_property -dict { PACKAGE_PIN P14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[2] }];
set_property -dict { PACKAGE_PIN T11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[3] }];
set_property -dict { PACKAGE_PIN R12 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[4] }];
set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[5] }];
set_property -dict { PACKAGE_PIN T15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[6] }];
set_property -dict { PACKAGE_PIN T16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[7] }];
set_property -dict { PACKAGE_PIN N15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[8] }];
set_property -dict { PACKAGE_PIN M16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[9] }];
set_property -dict { PACKAGE_PIN V17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[10] }];
set_property -dict { PACKAGE_PIN U18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[11] }];
set_property -dict { PACKAGE_PIN R17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[12] }];
set_property -dict { PACKAGE_PIN P17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[13] }];
set_property -dict { PACKAGE_PIN U11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[26] }];
set_property -dict { PACKAGE_PIN V16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[27] }];
set_property -dict { PACKAGE_PIN M13 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[28] }];
set_property -dict { PACKAGE_PIN R10 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[29] }];
set_property -dict { PACKAGE_PIN R11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[30] }];
set_property -dict { PACKAGE_PIN R13 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[31] }];
set_property -dict { PACKAGE_PIN R15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[32] }];
set_property -dict { PACKAGE_PIN P15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[33] }];
set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[34] }];
set_property -dict { PACKAGE_PIN N16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[35] }];
set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[36] }];
set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[37] }];
set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[38] }];
set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[39] }];
set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[40] }];
set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[41] }];
set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[42] }]; # A
set_property -dict { PACKAGE_PIN L18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[43] }]; # SCL
set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[44] }]; # SDA
#set_property -dict { PACKAGE_PIN C2 IOSTANDARD LVCMOS33 } [get_ports { shield_rst }];

#set_property -dict { PACKAGE_PIN C1 IOSTANDARD LVCMOS33 } [get_ports { spi_hdr_ss }];
#set_property -dict { PACKAGE_PIN F1 IOSTANDARD LVCMOS33 } [get_ports { spi_hdr_clk }];
#set_property -dict { PACKAGE_PIN H1 IOSTANDARD LVCMOS33 } [get_ports { spi_hdr_mosi }];
#set_property -dict { PACKAGE_PIN G1 IOSTANDARD LVCMOS33 } [get_ports { spi_hdr_miso }];

################################################################################
# Ethernet (generated by LiteX)
################################################################################

# eth_ref_clk:0
set_property LOC G18 [get_ports {eth_ref_clk}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_ref_clk}]

# eth_clocks:0.tx
set_property LOC H16 [get_ports {eth_clocks_tx}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_clocks_tx}]

# eth_clocks:0.rx
set_property LOC F15 [get_ports {eth_clocks_rx}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_clocks_rx}]

# eth:0.rst_n
set_property LOC C16 [get_ports {eth_rst_n}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rst_n}]

# eth:0.mdio
set_property LOC K13 [get_ports {eth_mdio}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_mdio}]

# eth:0.mdc
set_property LOC F16 [get_ports {eth_mdc}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_mdc}]

# eth:0.rx_dv
set_property LOC G16 [get_ports {eth_rx_dv}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_dv}]

# eth:0.rx_er
set_property LOC C17 [get_ports {eth_rx_er}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_er}]

# eth:0.rx_data
set_property LOC D18 [get_ports {eth_rx_data[0]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[0]}]

# eth:0.rx_data
set_property LOC E17 [get_ports {eth_rx_data[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[1]}]

# eth:0.rx_data
set_property LOC E18 [get_ports {eth_rx_data[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[2]}]

# eth:0.rx_data
set_property LOC G17 [get_ports {eth_rx_data[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[3]}]

# eth:0.tx_en
set_property LOC H15 [get_ports {eth_tx_en}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_en}]

# eth:0.tx_data
set_property LOC H14 [get_ports {eth_tx_data[0]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[0]}]

# eth:0.tx_data
set_property LOC J14 [get_ports {eth_tx_data[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[1]}]

# eth:0.tx_data
set_property LOC J13 [get_ports {eth_tx_data[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[2]}]

# eth:0.tx_data
set_property LOC H17 [get_ports {eth_tx_data[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[3]}]

# eth:0.col
set_property LOC D17 [get_ports {eth_col}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_col}]

# eth:0.crs
set_property LOC G14 [get_ports {eth_crs}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_crs}]

################################################################################
# DRAM (generated by LiteX)
################################################################################

# ddram:0.a
set_property LOC R2 [get_ports {ddram_a[0]}]
set_property SLEW FAST [get_ports {ddram_a[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[0]}]

# ddram:0.a
set_property LOC M6 [get_ports {ddram_a[1]}]
set_property SLEW FAST [get_ports {ddram_a[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[1]}]

# ddram:0.a
set_property LOC N4 [get_ports {ddram_a[2]}]
set_property SLEW FAST [get_ports {ddram_a[2]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[2]}]

# ddram:0.a
set_property LOC T1 [get_ports {ddram_a[3]}]
set_property SLEW FAST [get_ports {ddram_a[3]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[3]}]

# ddram:0.a
set_property LOC N6 [get_ports {ddram_a[4]}]
set_property SLEW FAST [get_ports {ddram_a[4]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[4]}]

# ddram:0.a
set_property LOC R7 [get_ports {ddram_a[5]}]
set_property SLEW FAST [get_ports {ddram_a[5]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[5]}]

# ddram:0.a
set_property LOC V6 [get_ports {ddram_a[6]}]
set_property SLEW FAST [get_ports {ddram_a[6]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[6]}]

# ddram:0.a
set_property LOC U7 [get_ports {ddram_a[7]}]
set_property SLEW FAST [get_ports {ddram_a[7]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[7]}]

# ddram:0.a
set_property LOC R8 [get_ports {ddram_a[8]}]
set_property SLEW FAST [get_ports {ddram_a[8]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[8]}]

# ddram:0.a
set_property LOC V7 [get_ports {ddram_a[9]}]
set_property SLEW FAST [get_ports {ddram_a[9]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[9]}]

# ddram:0.a
set_property LOC R6 [get_ports {ddram_a[10]}]
set_property SLEW FAST [get_ports {ddram_a[10]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[10]}]

# ddram:0.a
set_property LOC U6 [get_ports {ddram_a[11]}]
set_property SLEW FAST [get_ports {ddram_a[11]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[11]}]

# ddram:0.a
set_property LOC T6 [get_ports {ddram_a[12]}]
set_property SLEW FAST [get_ports {ddram_a[12]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[12]}]

# ddram:0.a
set_property LOC T8 [get_ports {ddram_a[13]}]
set_property SLEW FAST [get_ports {ddram_a[13]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[13]}]

# ddram:0.ba
set_property LOC R1 [get_ports {ddram_ba[0]}]
set_property SLEW FAST [get_ports {ddram_ba[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ba[0]}]

# ddram:0.ba
set_property LOC P4 [get_ports {ddram_ba[1]}]
set_property SLEW FAST [get_ports {ddram_ba[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ba[1]}]

# ddram:0.ba
set_property LOC P2 [get_ports {ddram_ba[2]}]
set_property SLEW FAST [get_ports {ddram_ba[2]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ba[2]}]

# ddram:0.ras_n
set_property LOC P3 [get_ports {ddram_ras_n}]
set_property SLEW FAST [get_ports {ddram_ras_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ras_n}]

# ddram:0.cas_n
set_property LOC M4 [get_ports {ddram_cas_n}]
set_property SLEW FAST [get_ports {ddram_cas_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_cas_n}]

# ddram:0.we_n
set_property LOC P5 [get_ports {ddram_we_n}]
set_property SLEW FAST [get_ports {ddram_we_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_we_n}]

# ddram:0.cs_n
set_property LOC U8 [get_ports {ddram_cs_n}]
set_property SLEW FAST [get_ports {ddram_cs_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_cs_n}]

# ddram:0.dm
set_property LOC L1 [get_ports {ddram_dm[0]}]
set_property SLEW FAST [get_ports {ddram_dm[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dm[0]}]

# ddram:0.dm
set_property LOC U1 [get_ports {ddram_dm[1]}]
set_property SLEW FAST [get_ports {ddram_dm[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dm[1]}]

# ddram:0.dq
set_property LOC K5 [get_ports {ddram_dq[0]}]
set_property SLEW FAST [get_ports {ddram_dq[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[0]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[0]}]

# ddram:0.dq
set_property LOC L3 [get_ports {ddram_dq[1]}]
set_property SLEW FAST [get_ports {ddram_dq[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[1]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[1]}]

# ddram:0.dq
set_property LOC K3 [get_ports {ddram_dq[2]}]
set_property SLEW FAST [get_ports {ddram_dq[2]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[2]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[2]}]

# ddram:0.dq
set_property LOC L6 [get_ports {ddram_dq[3]}]
set_property SLEW FAST [get_ports {ddram_dq[3]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[3]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[3]}]

# ddram:0.dq
set_property LOC M3 [get_ports {ddram_dq[4]}]
set_property SLEW FAST [get_ports {ddram_dq[4]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[4]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[4]}]

# ddram:0.dq
set_property LOC M1 [get_ports {ddram_dq[5]}]
set_property SLEW FAST [get_ports {ddram_dq[5]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[5]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[5]}]

# ddram:0.dq
set_property LOC L4 [get_ports {ddram_dq[6]}]
set_property SLEW FAST [get_ports {ddram_dq[6]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[6]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[6]}]

# ddram:0.dq
set_property LOC M2 [get_ports {ddram_dq[7]}]
set_property SLEW FAST [get_ports {ddram_dq[7]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[7]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[7]}]

# ddram:0.dq
set_property LOC V4 [get_ports {ddram_dq[8]}]
set_property SLEW FAST [get_ports {ddram_dq[8]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[8]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[8]}]

# ddram:0.dq
set_property LOC T5 [get_ports {ddram_dq[9]}]
set_property SLEW FAST [get_ports {ddram_dq[9]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[9]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[9]}]

# ddram:0.dq
set_property LOC U4 [get_ports {ddram_dq[10]}]
set_property SLEW FAST [get_ports {ddram_dq[10]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[10]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[10]}]

# ddram:0.dq
set_property LOC V5 [get_ports {ddram_dq[11]}]
set_property SLEW FAST [get_ports {ddram_dq[11]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[11]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[11]}]

# ddram:0.dq
set_property LOC V1 [get_ports {ddram_dq[12]}]
set_property SLEW FAST [get_ports {ddram_dq[12]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[12]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[12]}]

# ddram:0.dq
set_property LOC T3 [get_ports {ddram_dq[13]}]
set_property SLEW FAST [get_ports {ddram_dq[13]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[13]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[13]}]

# ddram:0.dq
set_property LOC U3 [get_ports {ddram_dq[14]}]
set_property SLEW FAST [get_ports {ddram_dq[14]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[14]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[14]}]

# ddram:0.dq
set_property LOC R3 [get_ports {ddram_dq[15]}]
set_property SLEW FAST [get_ports {ddram_dq[15]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[15]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[15]}]

# ddram:0.dqs_p
set_property LOC N2 [get_ports {ddram_dqs_p[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[0]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_p[0]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_p[0]}]

# ddram:0.dqs_p
set_property LOC U2 [get_ports {ddram_dqs_p[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[1]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_p[1]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_p[1]}]

# ddram:0.dqs_n
set_property LOC N1 [get_ports {ddram_dqs_n[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[0]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_n[0]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_n[0]}]

# ddram:0.dqs_n
set_property LOC V2 [get_ports {ddram_dqs_n[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[1]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_n[1]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_n[1]}]

# ddram:0.clk_p
set_property LOC U9 [get_ports {ddram_clk_p}]
set_property SLEW FAST [get_ports {ddram_clk_p}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_clk_p}]

# ddram:0.clk_n
set_property LOC V9 [get_ports {ddram_clk_n}]
set_property SLEW FAST [get_ports {ddram_clk_n}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_clk_n}]

# ddram:0.cke
set_property LOC N5 [get_ports {ddram_cke}]
set_property SLEW FAST [get_ports {ddram_cke}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_cke}]

# ddram:0.odt
set_property LOC R5 [get_ports {ddram_odt}]
set_property SLEW FAST [get_ports {ddram_odt}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_odt}]

# ddram:0.reset_n
set_property LOC K6 [get_ports {ddram_reset_n}]
set_property SLEW FAST [get_ports {ddram_reset_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_reset_n}]

################################################################################
# Design constraints and bitsteam attributes
################################################################################

#Internal VREF
set_property INTERNAL_VREF 0.675 [get_iobanks 34]

set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property CFGBVS VCCO [current_design]

set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design]
set_property BITSTREAM.CONFIG.CONFIGRATE 33 [current_design]
set_property CONFIG_MODE SPIx4 [current_design]

################################################################################
# Clock constraints
################################################################################

create_clock -name sys_clk_pin -period 10.00 [get_ports { ext_clk }];

create_clock -name eth_rx_clk -period 40.0 [get_ports { eth_clocks_rx }]

create_clock -name eth_tx_clk -period 40.0 [get_ports { eth_clocks_tx }]

set_clock_groups -group [get_clocks -include_generated_clocks -of [get_nets system_clk]] -group [get_clocks -include_generated_clocks -of [get_nets eth_clocks_rx]] -asynchronous

set_clock_groups -group [get_clocks -include_generated_clocks -of [get_nets system_clk]] -group [get_clocks -include_generated_clocks -of [get_nets eth_clocks_tx]] -asynchronous

set_clock_groups -group [get_clocks -include_generated_clocks -of [get_nets eth_clocks_rx]] -group [get_clocks -include_generated_clocks -of [get_nets eth_clocks_tx]] -asynchronous

################################################################################
# False path constraints (from LiteX as they relate to LiteDRAM and LiteEth)
################################################################################

set_false_path -quiet -through [get_nets -hierarchical -filter {mr_ff == TRUE}]

set_false_path -quiet -to [get_pins -filter {REF_PIN_NAME == PRE} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE || ars_ff2 == TRUE}]]

set_max_delay 2 -quiet -from [get_pins -filter {REF_PIN_NAME == C} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE}]] -to [get_pins -filter {REF_PIN_NAME == D} -of_objects [get_cells -hierarchical -filter {ars_ff2 == TRUE}]]

@ -2,11 +2,6 @@ library ieee;
use ieee.std_logic_1164.all;

entity clock_generator is
generic (
CLK_INPUT_HZ : positive := 50000000;
CLK_OUTPUT_HZ : positive := 50000000
);

port (
ext_clk : in std_logic;
pll_rst_in : in std_logic;
@ -18,8 +13,8 @@ end entity clock_generator;
architecture bypass of clock_generator is

begin
assert CLK_INPUT_HZ = CLK_OUTPUT_HZ severity FAILURE;

pll_locked_out <= not pll_rst_in;
pll_clk_out <= ext_clk;
pll_locked_out <= not pll_rst_in;
pll_clk_out <= ext_clk;

end architecture bypass;

@ -1,136 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;

entity clock_generator is
generic (
CLK_INPUT_HZ : positive := 12000000;
CLK_OUTPUT_HZ : positive := 50000000
);

port (
ext_clk : in std_logic;
pll_rst_in : in std_logic;
pll_clk_out : out std_logic;
pll_locked_out : out std_logic
);

end entity clock_generator;

architecture bypass of clock_generator is

-- prototype of ECP5 PLL
component EHXPLLL is
generic (
CLKI_DIV : integer := 1;
CLKFB_DIV : integer := 1;
CLKOP_DIV : integer := 8;
CLKOS_DIV : integer := 8;
CLKOS2_DIV : integer := 8;
CLKOS3_DIV : integer := 8;
CLKOP_ENABLE : string := "ENABLED";
CLKOS_ENABLE : string := "DISABLED";
CLKOS2_ENABLE : string := "DISABLED";
CLKOS3_ENABLE : string := "DISABLED";
CLKOP_CPHASE : integer := 0;
CLKOS_CPHASE : integer := 0;
CLKOS2_CPHASE : integer := 0;
CLKOS3_CPHASE : integer := 0;
CLKOP_FPHASE : integer := 0;
CLKOS_FPHASE : integer := 0;
CLKOS2_FPHASE : integer := 0;
CLKOS3_FPHASE : integer := 0;
FEEDBK_PATH : string := "CLKOP";
CLKOP_TRIM_POL : string := "RISING";
CLKOP_TRIM_DELAY : integer := 0;
CLKOS_TRIM_POL : string := "RISING";
CLKOS_TRIM_DELAY : integer := 0;
OUTDIVIDER_MUXA : string := "DIVA";
OUTDIVIDER_MUXB : string := "DIVB";
OUTDIVIDER_MUXC : string := "DIVC";
OUTDIVIDER_MUXD : string := "DIVD";
PLL_LOCK_MODE : integer := 0;
PLL_LOCK_DELAY : integer := 200;
STDBY_ENABLE : string := "DISABLED";
REFIN_RESET : string := "DISABLED";
SYNC_ENABLE : string := "DISABLED";
INT_LOCK_STICKY : string := "ENABLED";
DPHASE_SOURCE : string := "DISABLED";
PLLRST_ENA : string := "DISABLED";
INTFB_WAKE : string := "DISABLED" );
port (
CLKI : in std_logic;
CLKFB : in std_logic;
PHASESEL1 : in std_logic;
PHASESEL0 : in std_logic;
PHASEDIR : in std_logic;
PHASESTEP : in std_logic;
PHASELOADREG : in std_logic;
STDBY : in std_logic;
PLLWAKESYNC : in std_logic;
RST : in std_logic;
ENCLKOP : in std_logic;
ENCLKOS : in std_logic;
ENCLKOS2 : in std_logic;
ENCLKOS3 : in std_logic;
CLKOP : out std_logic;
CLKOS : out std_logic;
CLKOS2 : out std_logic;
CLKOS3 : out std_logic;
LOCK : out std_logic;
INTLOCK : out std_logic;
REFCLK : out std_logic;
CLKINTFB : out std_logic );
end component;

signal clkos : std_ulogic;
signal clkop : std_logic;
signal lock : std_logic;

-- PLL constants
-- According to the datasheet, PLL_IN needs to be between 10 and 400 MHz
-- PLL_OUT needs to be between 400 and 800 MHz
-- PLL_IN is chosen based on 12 and 48 MHz being common values
-- for the reference clock.
constant PLL_IN : natural := 12000000;
constant PLL_OUT : natural := 480000000;

-- Configration for ECP5 PLL
constant PLL_CLKOP_DIV : natural := PLL_OUT/CLK_OUTPUT_HZ;
constant PLL_CLKOS_DIV : natural := 2;
constant PLL_CLKFB_DIV : natural := PLL_OUT/PLL_CLKOS_DIV/PLL_IN;
constant PLL_CLKI_DIV : natural := CLK_INPUT_HZ/PLL_IN;

begin
pll_clk_out <= clkop;
pll_locked_out <= lock;

clkgen: EHXPLLL
generic map(
CLKOP_DIV => PLL_CLKOP_DIV,
CLKOS_ENABLE => "ENABLED",
CLKOS_DIV => PLL_CLKOS_DIV,
CLKFB_DIV => PLL_CLKFB_DIV,
CLKI_DIV => PLL_CLKI_DIV,
FEEDBK_PATH => "CLKOS"
)
port map (
CLKI => ext_clk,
CLKOP => clkop,
CLKOS => clkos,
CLKFB => clkos,
LOCK => lock,
RST => pll_rst_in,
PHASESEL1 => '0',
PHASESEL0 => '0',
PHASEDIR => '0',
PHASESTEP => '0',
PHASELOADREG => '0',
STDBY => '0',
PLLWAKESYNC => '0',
ENCLKOP => '1',
ENCLKOS => '1',
ENCLKOS2 => '0',
ENCLKOS3 => '0'
);

end architecture bypass;

@ -6,9 +6,7 @@ use UNISIM.vcomponents.all;

entity clock_generator is
generic (
CLK_INPUT_HZ : positive := 12000000;
CLK_OUTPUT_HZ : positive := 50000000
);
clk_period_hz : positive := 12000000);
port (
ext_clk : in std_logic;
pll_rst_in : in std_logic;
@ -24,66 +22,28 @@ architecture rtl of clock_generator is
clkfbout_mult : real range 2.0 to 64.0;
clkout_divide : real range 1.0 to 128.0;
divclk_divide : integer range 1 to 106;
force_rst : std_ulogic;
end record;

function gen_pll_settings (
constant input_hz : positive;
constant output_hz : positive)
constant freq_hz : positive)
return pll_settings_t is

constant bad_settings : pll_settings_t :=
(clkin_period => 0.0,
clkfbout_mult => 2.0,
clkout_divide => 1.0,
divclk_divide => 1,
force_rst => '1');
begin
case input_hz is
when 100000000 =>
case output_hz is
when 100000000 =>
return (clkin_period => 10.0,
clkfbout_mult => 16.0,
clkout_divide => 16.0,
divclk_divide => 1,
force_rst => '0');
when 50000000 =>
return (clkin_period => 10.0,
clkfbout_mult => 16.0,
clkout_divide => 32.0,
divclk_divide => 1,
force_rst => '0');
when others =>
report "Unsupported output frequency" severity failure;
return bad_settings;
end case;
when 12000000 =>
case output_hz is
when 100000000 =>
return (clkin_period => 83.33,
clkfbout_mult => 50.0,
clkout_divide => 6.0,
divclk_divide => 1,
force_rst => '0');
when 50000000 =>
return (clkin_period => 83.33,
clkfbout_mult => 50.0,
clkout_divide => 12.0,
divclk_divide => 1,
force_rst => '0');
when others =>
report "Unsupported output frequency" severity failure;
return bad_settings;
end case;
when others =>
if freq_hz = 100000000 then
return (clkin_period => 10.0,
clkfbout_mult => 16.0,
clkout_divide => 32.0,
divclk_divide => 1);
elsif freq_hz = 12000000 then
return (clkin_period => 83.33,
clkfbout_mult => 50.0,
clkout_divide => 12.0,
divclk_divide => 1);
else
report "Unsupported input frequency" severity failure;
return bad_settings;
end case;
end if;
end function gen_pll_settings;

constant pll_settings : pll_settings_t := gen_pll_settings(clk_input_hz,
clk_output_hz);
constant pll_settings : pll_settings_t := gen_pll_settings(clk_period_hz);
begin
pll : MMCME2_BASE
generic map (
@ -111,6 +71,6 @@ begin
CLKFBIN => clkfb,
CLKIN1 => ext_clk,
PWRDWN => '0',
RST => pll_rst_in or pll_settings.force_rst
RST => pll_rst_in
);
end architecture rtl;

@ -5,113 +5,67 @@ Library UNISIM;
use UNISIM.vcomponents.all;

entity clock_generator is
generic (
CLK_INPUT_HZ : positive := 100000000;
CLK_OUTPUT_HZ : positive := 100000000
);
port (
ext_clk : in std_logic;
pll_rst_in : in std_logic;
pll_clk_out : out std_logic;
pll_locked_out : out std_logic);
generic (
clk_period_hz : positive := 100000000);
port (
ext_clk : in std_logic;
pll_rst_in : in std_logic;
pll_clk_out : out std_logic;
pll_locked_out : out std_logic);
end entity clock_generator;

architecture rtl of clock_generator is
signal clkfb : std_ulogic;

type pll_settings_t is record
clkin_period : real range 0.000 to 52.631;
clkfbout_mult : integer range 2 to 64;
clkout_divide : integer range 1 to 128;
divclk_divide : integer range 1 to 56;
force_rst : std_ulogic;
end record;
signal clkfb : std_ulogic;

function gen_pll_settings (
constant input_hz : positive;
constant output_hz : positive)
return pll_settings_t is
type pll_settings_t is record
clkin_period : real range 0.000 to 52.631;
clkfbout_mult : integer range 2 to 64;
clkout_divide : integer range 1 to 128;
divclk_divide : integer range 1 to 56;
end record;

constant bad_settings : pll_settings_t :=
(clkin_period => 0.0,
clkfbout_mult => 2,
clkout_divide => 1,
divclk_divide => 1,
force_rst => '1');
begin
case input_hz is
when 200000000 =>
case output_hz is
when 100000000 =>
return (clkin_period => 5.0,
clkfbout_mult => 8,
clkout_divide => 16,
divclk_divide => 1,
force_rst => '0');
when others =>
report "Unsupported output frequency" severity failure;
return bad_settings;
end case;
when 100000000 =>
case output_hz is
when 100000000 =>
return (clkin_period => 10.0,
clkfbout_mult => 16,
clkout_divide => 16,
divclk_divide => 1,
force_rst => '0');
when 50000000 =>
return (clkin_period => 10.0,
clkfbout_mult => 16,
clkout_divide => 32,
divclk_divide => 1,
force_rst => '0');
when others =>
report "Unsupported output frequency" severity failure;
return bad_settings;
end case;
when 50000000 =>
case output_hz is
when 100000000 =>
return (clkin_period => 20.0,
clkfbout_mult => 32,
clkout_divide => 16,
divclk_divide => 1,
force_rst => '0');
when others =>
report "Unsupported output frequency" severity failure;
return bad_settings;
end case;
when others =>
report "Unsupported input frequency" severity failure;
return bad_settings;
end case;
end function gen_pll_settings;
function gen_pll_settings (
constant freq_hz : positive)
return pll_settings_t is
begin
if freq_hz = 100000000 then
return (clkin_period => 10.0,
clkfbout_mult => 16,
clkout_divide => 32,
divclk_divide => 1);
else
report "Unsupported input frequency" severity failure;
-- return (clkin_period => 0.0,
-- clkfbout_mult => 0,
-- clkout_divide => 0,
-- divclk_divide => 0);
end if;
end function gen_pll_settings;

constant pll_settings : pll_settings_t := gen_pll_settings(clk_input_hz,
clk_output_hz);
constant pll_settings : pll_settings_t := gen_pll_settings(clk_period_hz);
begin

pll : PLLE2_BASE
generic map (
BANDWIDTH => "OPTIMIZED",
CLKFBOUT_MULT => pll_settings.clkfbout_mult,
CLKIN1_PERIOD => pll_settings.clkin_period,
CLKOUT0_DIVIDE => pll_settings.clkout_divide,
DIVCLK_DIVIDE => pll_settings.divclk_divide,
STARTUP_WAIT => "FALSE")
port map (
CLKOUT0 => pll_clk_out,
CLKOUT1 => open,
CLKOUT2 => open,
CLKOUT3 => open,
CLKOUT4 => open,
CLKOUT5 => open,
CLKFBOUT => clkfb,
LOCKED => pll_locked_out,
CLKIN1 => ext_clk,
PWRDWN => '0',
RST => pll_rst_in or pll_settings.force_rst,
CLKFBIN => clkfb);
pll : PLLE2_BASE
generic map (
BANDWIDTH => "OPTIMIZED",
CLKFBOUT_MULT => pll_settings.clkfbout_mult,
CLKIN1_PERIOD => pll_settings.clkin_period,
CLKOUT0_DIVIDE => pll_settings.clkout_divide,
DIVCLK_DIVIDE => pll_settings.divclk_divide,
STARTUP_WAIT => "FALSE")
port map (
CLKOUT0 => pll_clk_out,
CLKOUT1 => open,
CLKOUT2 => open,
CLKOUT3 => open,
CLKOUT4 => open,
CLKOUT5 => open,
CLKFBOUT => clkfb,
LOCKED => pll_locked_out,
CLKIN1 => ext_clk,
PWRDWN => '0',
RST => pll_rst_in,
CLKFBIN => clkfb);

end architecture rtl;

@ -1,15 +1,8 @@
## Clock signal 12 MHz
set_property -dict { PACKAGE_PIN L17 IOSTANDARD LVCMOS33 } [get_ports { ext_clk }];
create_clock -name sys_clk_pin -period 83.33 [get_ports {ext_clk}];
create_clock -add -name sys_clk_pin -period 83.33 -waveform {0 41.66} [get_ports {ext_clk}];

set_property -dict { PACKAGE_PIN J18 IOSTANDARD LVCMOS33 } [get_ports { uart0_txd }];
set_property -dict { PACKAGE_PIN J17 IOSTANDARD LVCMOS33 } [get_ports { uart0_rxd }];

set_property -dict { PACKAGE_PIN A18 IOSTANDARD LVCMOS33 } [get_ports { ext_rst }];

set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property CFGBVS VCCO [current_design]

set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design]
set_property BITSTREAM.CONFIG.CONFIGRATE 33 [current_design]
set_property CONFIG_MODE SPIx4 [current_design]

@ -1,53 +0,0 @@
-- Random number generator for Microwatt
-- Based on https://pdfs.semanticscholar.org/83ac/9e9c1bb3dad5180654984604c8d5d8137412.pdf
-- "High Speed True Random Number Generators in Xilinx FPGAs"
-- by Catalin Baetoniu, Xilinx Inc.

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;

entity random is
port (
clk : in std_ulogic;
data : out std_ulogic_vector(63 downto 0);
raw : out std_ulogic_vector(63 downto 0);
err : out std_ulogic
);
end entity random;

architecture behaviour of random is
signal ringosc : std_ulogic_vector(63 downto 0);
signal ro_reg : std_ulogic_vector(63 downto 0);
signal lhca : std_ulogic_vector(63 downto 0);

constant lhca_diag : std_ulogic_vector(63 downto 0) := x"fffffffffffffffb";

begin
random_osc : process(all)
begin
-- chaotic set of ring oscillators
ringosc(0) <= ringosc(63) xor ringosc(0) xor ringosc(1);
for i in 1 to 62 loop
ringosc(i) <= ringosc(i-1) xor ringosc(i) xor ringosc(i+1);
end loop;
ringosc(63) <= not (ringosc(62) xor ringosc(63) xor ringosc(0));
end process;

lhca_update : process(clk)
begin
if rising_edge(clk) then
ro_reg <= ringosc;
raw <= ro_reg;
-- linear hybrid cellular automaton
-- used to even out the statistics of the ring oscillators
lhca <= ('0' & lhca(63 downto 1)) xor (lhca and lhca_diag) xor
(lhca(62 downto 0) & '0') xor ro_reg;
end if;
end process;

data <= lhca;
err <= '0';
end behaviour;

@ -1,3 +0,0 @@
set_property ALLOW_COMBINATORIAL_LOOPS TRUE [get_nets soc0/processor/execute1_0/random_0/ro_reg*]
set_property ALLOW_COMBINATORIAL_LOOPS TRUE [get_nets soc0/processor/execute1_0/random_0/p_*]
set_property ALLOW_COMBINATORIAL_LOOPS TRUE [get_nets soc0/processor/execute1_0/random_0/D*]

@ -1,463 +0,0 @@
#### Genesys-2 Rev.H

## Clock & Reset
set_property -dict { PACKAGE_PIN AD11 IOSTANDARD LVDS } [get_ports { clk200_n }]
set_property -dict { PACKAGE_PIN AD12 IOSTANDARD LVDS } [get_ports { clk200_p }]
create_clock -period 5.000 -name tc_clk100_p [get_ports clk200_p]
create_clock -period 5.000 -name tc_clk100_n [get_ports clk200_n]

set_property -dict { PACKAGE_PIN R19 IOSTANDARD LVCMOS33 } [get_ports { ext_rst }]

## UART
set_property -dict { PACKAGE_PIN Y20 IOSTANDARD LVCMOS33 } [get_ports { uart_main_rx }]
set_property -dict { PACKAGE_PIN Y23 IOSTANDARD LVCMOS33 } [get_ports { uart_main_tx }]

## LEDs
set_property -dict { PACKAGE_PIN T28 IOSTANDARD LVCMOS33 } [get_ports { led0 }]
set_property -dict { PACKAGE_PIN V19 IOSTANDARD LVCMOS33 } [get_ports { led1 }]
set_property -dict { PACKAGE_PIN U30 IOSTANDARD LVCMOS33 } [get_ports { led2 }]
set_property -dict { PACKAGE_PIN U29 IOSTANDARD LVCMOS33 } [get_ports { led3 }]

## QSPI
set_property -dict { PACKAGE_PIN U19 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_cs_n }]
set_property -dict { PACKAGE_PIN P24 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_mosi }]
set_property -dict { PACKAGE_PIN R25 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_miso }]
set_property -dict { PACKAGE_PIN R20 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_wp_n }]
set_property -dict { PACKAGE_PIN R21 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_hold_n }]


## DRAM

# ddram:0.a
set_property LOC AC12 [get_ports {ddram_a[0]}]
set_property SLEW FAST [get_ports {ddram_a[0]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[0]}]

# ddram:0.a
set_property LOC AE8 [get_ports {ddram_a[1]}]
set_property SLEW FAST [get_ports {ddram_a[1]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[1]}]

# ddram:0.a
set_property LOC AD8 [get_ports {ddram_a[2]}]
set_property SLEW FAST [get_ports {ddram_a[2]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[2]}]

# ddram:0.a
set_property LOC AC10 [get_ports {ddram_a[3]}]
set_property SLEW FAST [get_ports {ddram_a[3]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[3]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[3]}]

# ddram:0.a
set_property LOC AD9 [get_ports {ddram_a[4]}]
set_property SLEW FAST [get_ports {ddram_a[4]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[4]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[4]}]

# ddram:0.a
set_property LOC AA13 [get_ports {ddram_a[5]}]
set_property SLEW FAST [get_ports {ddram_a[5]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[5]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[5]}]

# ddram:0.a
set_property LOC AA10 [get_ports {ddram_a[6]}]
set_property SLEW FAST [get_ports {ddram_a[6]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[6]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[6]}]

# ddram:0.a
set_property LOC AA11 [get_ports {ddram_a[7]}]
set_property SLEW FAST [get_ports {ddram_a[7]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[7]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[7]}]

# ddram:0.a
set_property LOC Y10 [get_ports {ddram_a[8]}]
set_property SLEW FAST [get_ports {ddram_a[8]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[8]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[8]}]

# ddram:0.a
set_property LOC Y11 [get_ports {ddram_a[9]}]
set_property SLEW FAST [get_ports {ddram_a[9]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[9]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[9]}]

# ddram:0.a
set_property LOC AB8 [get_ports {ddram_a[10]}]
set_property SLEW FAST [get_ports {ddram_a[10]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[10]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[10]}]

# ddram:0.a
set_property LOC AA8 [get_ports {ddram_a[11]}]
set_property SLEW FAST [get_ports {ddram_a[11]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[11]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[11]}]

# ddram:0.a
set_property LOC AB12 [get_ports {ddram_a[12]}]
set_property SLEW FAST [get_ports {ddram_a[12]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[12]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[12]}]

# ddram:0.a
set_property LOC AA12 [get_ports {ddram_a[13]}]
set_property SLEW FAST [get_ports {ddram_a[13]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[13]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[13]}]

# ddram:0.a
set_property LOC AH9 [get_ports {ddram_a[14]}]
set_property SLEW FAST [get_ports {ddram_a[14]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_a[14]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[14]}]

# ddram:0.ba
set_property LOC AE9 [get_ports {ddram_ba[0]}]
set_property SLEW FAST [get_ports {ddram_ba[0]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_ba[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[0]}]

# ddram:0.ba
set_property LOC AB10 [get_ports {ddram_ba[1]}]
set_property SLEW FAST [get_ports {ddram_ba[1]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_ba[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[1]}]

# ddram:0.ba
set_property LOC AC11 [get_ports {ddram_ba[2]}]
set_property SLEW FAST [get_ports {ddram_ba[2]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_ba[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[2]}]

# ddram:0.ras_n
set_property LOC AE11 [get_ports {ddram_ras_n}]
set_property SLEW FAST [get_ports {ddram_ras_n}]
set_property VCCAUX_IO HIGH [get_ports {ddram_ras_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ras_n}]

# ddram:0.cas_n
set_property LOC AF11 [get_ports {ddram_cas_n}]
set_property SLEW FAST [get_ports {ddram_cas_n}]
set_property VCCAUX_IO HIGH [get_ports {ddram_cas_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_cas_n}]

# ddram:0.we_n
set_property LOC AG13 [get_ports {ddram_we_n}]
set_property SLEW FAST [get_ports {ddram_we_n}]
set_property VCCAUX_IO HIGH [get_ports {ddram_we_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_we_n}]

# ddram:0.cs_n
set_property LOC AH12 [get_ports {ddram_cs_n}]
set_property SLEW FAST [get_ports {ddram_cs_n}]
set_property VCCAUX_IO HIGH [get_ports {ddram_cs_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_cs_n}]

# ddram:0.dm
set_property LOC AD4 [get_ports {ddram_dm[0]}]
set_property SLEW FAST [get_ports {ddram_dm[0]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dm[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[0]}]

# ddram:0.dm
set_property LOC AF3 [get_ports {ddram_dm[1]}]
set_property SLEW FAST [get_ports {ddram_dm[1]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dm[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[1]}]

# ddram:0.dm
set_property LOC AH4 [get_ports {ddram_dm[2]}]
set_property SLEW FAST [get_ports {ddram_dm[2]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dm[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[2]}]

# ddram:0.dm
set_property LOC AF8 [get_ports {ddram_dm[3]}]
set_property SLEW FAST [get_ports {ddram_dm[3]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dm[3]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[3]}]

# ddram:0.dq
set_property LOC AD3 [get_ports {ddram_dq[0]}]
set_property SLEW FAST [get_ports {ddram_dq[0]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[0]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[0]}]

# ddram:0.dq
set_property LOC AC2 [get_ports {ddram_dq[1]}]
set_property SLEW FAST [get_ports {ddram_dq[1]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[1]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[1]}]

# ddram:0.dq
set_property LOC AC1 [get_ports {ddram_dq[2]}]
set_property SLEW FAST [get_ports {ddram_dq[2]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[2]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[2]}]

# ddram:0.dq
set_property LOC AC5 [get_ports {ddram_dq[3]}]
set_property SLEW FAST [get_ports {ddram_dq[3]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[3]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[3]}]

# ddram:0.dq
set_property LOC AC4 [get_ports {ddram_dq[4]}]
set_property SLEW FAST [get_ports {ddram_dq[4]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[4]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[4]}]

# ddram:0.dq
set_property LOC AD6 [get_ports {ddram_dq[5]}]
set_property SLEW FAST [get_ports {ddram_dq[5]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[5]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[5]}]

# ddram:0.dq
set_property LOC AE6 [get_ports {ddram_dq[6]}]
set_property SLEW FAST [get_ports {ddram_dq[6]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[6]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[6]}]

# ddram:0.dq
set_property LOC AC7 [get_ports {ddram_dq[7]}]
set_property SLEW FAST [get_ports {ddram_dq[7]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[7]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[7]}]

# ddram:0.dq
set_property LOC AF2 [get_ports {ddram_dq[8]}]
set_property SLEW FAST [get_ports {ddram_dq[8]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[8]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[8]}]

# ddram:0.dq
set_property LOC AE1 [get_ports {ddram_dq[9]}]
set_property SLEW FAST [get_ports {ddram_dq[9]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[9]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[9]}]

# ddram:0.dq
set_property LOC AF1 [get_ports {ddram_dq[10]}]
set_property SLEW FAST [get_ports {ddram_dq[10]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[10]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[10]}]

# ddram:0.dq
set_property LOC AE4 [get_ports {ddram_dq[11]}]
set_property SLEW FAST [get_ports {ddram_dq[11]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[11]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[11]}]

# ddram:0.dq
set_property LOC AE3 [get_ports {ddram_dq[12]}]
set_property SLEW FAST [get_ports {ddram_dq[12]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[12]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[12]}]

# ddram:0.dq
set_property LOC AE5 [get_ports {ddram_dq[13]}]
set_property SLEW FAST [get_ports {ddram_dq[13]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[13]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[13]}]

# ddram:0.dq
set_property LOC AF5 [get_ports {ddram_dq[14]}]
set_property SLEW FAST [get_ports {ddram_dq[14]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[14]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[14]}]

# ddram:0.dq
set_property LOC AF6 [get_ports {ddram_dq[15]}]
set_property SLEW FAST [get_ports {ddram_dq[15]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[15]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[15]}]

# ddram:0.dq
set_property LOC AJ4 [get_ports {ddram_dq[16]}]
set_property SLEW FAST [get_ports {ddram_dq[16]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[16]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[16]}]

# ddram:0.dq
set_property LOC AH6 [get_ports {ddram_dq[17]}]
set_property SLEW FAST [get_ports {ddram_dq[17]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[17]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[17]}]

# ddram:0.dq
set_property LOC AH5 [get_ports {ddram_dq[18]}]
set_property SLEW FAST [get_ports {ddram_dq[18]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[18]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[18]}]

# ddram:0.dq
set_property LOC AH2 [get_ports {ddram_dq[19]}]
set_property SLEW FAST [get_ports {ddram_dq[19]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[19]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[19]}]

# ddram:0.dq
set_property LOC AJ2 [get_ports {ddram_dq[20]}]
set_property SLEW FAST [get_ports {ddram_dq[20]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[20]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[20]}]

# ddram:0.dq
set_property LOC AJ1 [get_ports {ddram_dq[21]}]
set_property SLEW FAST [get_ports {ddram_dq[21]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[21]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[21]}]

# ddram:0.dq
set_property LOC AK1 [get_ports {ddram_dq[22]}]
set_property SLEW FAST [get_ports {ddram_dq[22]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[22]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[22]}]

# ddram:0.dq
set_property LOC AJ3 [get_ports {ddram_dq[23]}]
set_property SLEW FAST [get_ports {ddram_dq[23]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[23]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[23]}]

# ddram:0.dq
set_property LOC AF7 [get_ports {ddram_dq[24]}]
set_property SLEW FAST [get_ports {ddram_dq[24]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[24]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[24]}]

# ddram:0.dq
set_property LOC AG7 [get_ports {ddram_dq[25]}]
set_property SLEW FAST [get_ports {ddram_dq[25]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[25]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[25]}]

# ddram:0.dq
set_property LOC AJ6 [get_ports {ddram_dq[26]}]
set_property SLEW FAST [get_ports {ddram_dq[26]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[26]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[26]}]

# ddram:0.dq
set_property LOC AK6 [get_ports {ddram_dq[27]}]
set_property SLEW FAST [get_ports {ddram_dq[27]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[27]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[27]}]

# ddram:0.dq
set_property LOC AJ8 [get_ports {ddram_dq[28]}]
set_property SLEW FAST [get_ports {ddram_dq[28]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[28]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[28]}]

# ddram:0.dq
set_property LOC AK8 [get_ports {ddram_dq[29]}]
set_property SLEW FAST [get_ports {ddram_dq[29]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[29]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[29]}]

# ddram:0.dq
set_property LOC AK5 [get_ports {ddram_dq[30]}]
set_property SLEW FAST [get_ports {ddram_dq[30]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[30]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[30]}]

# ddram:0.dq
set_property LOC AK4 [get_ports {ddram_dq[31]}]
set_property SLEW FAST [get_ports {ddram_dq[31]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dq[31]}]
set_property IOSTANDARD SSTL15_T_DCI [get_ports {ddram_dq[31]}]

# ddram:0.dqs_p
set_property LOC AD2 [get_ports {ddram_dqs_p[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[0]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_p[0]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[0]}]

# ddram:0.dqs_p
set_property LOC AG4 [get_ports {ddram_dqs_p[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[1]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_p[1]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[1]}]

# ddram:0.dqs_p
set_property LOC AG2 [get_ports {ddram_dqs_p[2]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[2]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_p[2]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[2]}]

# ddram:0.dqs_p
set_property LOC AH7 [get_ports {ddram_dqs_p[3]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[3]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_p[3]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[3]}]

# ddram:0.dqs_n
set_property LOC AD1 [get_ports {ddram_dqs_n[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[0]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_n[0]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[0]}]

# ddram:0.dqs_n
set_property LOC AG3 [get_ports {ddram_dqs_n[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[1]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_n[1]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[1]}]

# ddram:0.dqs_n
set_property LOC AH1 [get_ports {ddram_dqs_n[2]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[2]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_n[2]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[2]}]

# ddram:0.dqs_n
set_property LOC AJ7 [get_ports {ddram_dqs_n[3]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[3]}]
set_property VCCAUX_IO HIGH [get_ports {ddram_dqs_n[3]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[3]}]

# ddram:0.clk_p
set_property LOC AB9 [get_ports {ddram_clk_p}]
set_property SLEW FAST [get_ports {ddram_clk_p}]
set_property VCCAUX_IO HIGH [get_ports {ddram_clk_p}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_clk_p}]

# ddram:0.clk_n
set_property LOC AC9 [get_ports {ddram_clk_n}]
set_property SLEW FAST [get_ports {ddram_clk_n}]
set_property VCCAUX_IO HIGH [get_ports {ddram_clk_n}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_clk_n}]

# ddram:0.cke
set_property LOC AJ9 [get_ports {ddram_cke}]
set_property SLEW FAST [get_ports {ddram_cke}]
set_property VCCAUX_IO HIGH [get_ports {ddram_cke}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_cke}]

# ddram:0.odt
set_property LOC AK9 [get_ports {ddram_odt}]
set_property SLEW FAST [get_ports {ddram_odt}]
set_property VCCAUX_IO HIGH [get_ports {ddram_odt}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_odt}]

# ddram:0.reset_n
set_property LOC AG5 [get_ports {ddram_reset_n}]
set_property SLEW FAST [get_ports {ddram_reset_n}]
set_property VCCAUX_IO HIGH [get_ports {ddram_reset_n}]
set_property IOSTANDARD LVCMOS15 [get_ports {ddram_reset_n}]


set_property INTERNAL_VREF 0.750 [get_iobanks 34]

# False path constraints
set_false_path -quiet -through [get_nets -hierarchical -filter {mr_ff == TRUE}]
set_false_path -quiet -to [get_pins -filter {REF_PIN_NAME == PRE} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE || ars_ff2 == TRUE}]]
set_max_delay 2 -quiet -from [get_pins -filter {REF_PIN_NAME == C} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE}]] -to [get_pins -filter {REF_PIN_NAME == D} -of_objects [get_cells -hierarchical -filter {ars_ff2 == TRUE}]]

@ -1,11 +1,23 @@
000000004800012c
0000000000000000
4800002408000048
01006b69a600607d
a602487d05009f42
a64b5a7d14004a39
2402004ca64b7b7d
00000000480000f4
3c20000048000004
782107c660210000
60212f0064210000
618c00003d800000
658c0000798c07c6
7d8903a6618c113c
480000004e800421
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
@ -30,18 +42,6 @@ a64b5a7d14004a39
0000000000000000
0000000000000000
0000000000000000
4800002408000048
01006b69a600607d
a602487d05009f42
a64b5a7d14004a39
2402004ca64b7b7d
3c20000048000004
782107c660210000
60211f0064210000
618c00003d800000
658c0000798c07c6
7d8903a6618c1014
480000004e800421
0000000000000000
0000000000000000
0000000000000000
@ -513,74 +513,74 @@ a64b5a7d14004a39
e8010010ebc1fff0
7c0803a6ebe1fff8
3c4000014e800020
7c0802a63842a000
3fe2fffffbe1fff8
f80100103bff7240
48000051f821ffd1
7fe3fb7860000000
60000000480001d5
7fe3fb787c641b78
600000004800017d
60000000480000ed
480001295463063e
4bffffec60000000
0100000000000000
3c40000100000180
3d20c0003842a000
6129200060000000
f922800079290020
612900203d20c000
7c0004ac79290020
3d40001c7d204eea
7d295392614a2000
394a0018e9428000
7c0004ac3929ffff
4e8000207d2057ea
3940001a3d20c000
7929002061292018
4e800020f9490000
0000000000000000
3c40000100000000
600000003842a000
394000ffe9228000
7c0004ac39290020
4e8000207d404fea
0000000000000000
3c40000100000000
600000003842a000
39400000e9228000
7c0004ac39290020
4e8000207d404fea
0000000000000000
3c40000100000000
600000003842a000
39290010e9228000
7d204eea7c0004ac
4082ffe871290001
38630008e8628000
7c601eea7c0004ac
390a0010e9428000
71290001e9280000
e86a00084082fff8
4e8000205463063e
0000000000000000
3c40000100000000
600000003842a000
39290010e9228000
7d204eea7c0004ac
4082ffe871290008
7c0004ace9228000
4e8000207c604fea
390a0010e9428000
71290008e9280000
f86a00004082fff8
000000004e800020
0000000000000000
3842a0003c400001
fbc1fff07c0802a6
7fc32214fbe1fff8
f80100107c7f1b78
7fbff040f821ffd1
38210030409e000c
887f00004bffff10
4bffff993bff0001
000000004bffffe4
0000028001000000
7d4348ae39200000
409e000c2f8a0000
4e8000207d234b78
4bffffe839290001
0000000000000000
3c40000100000000
7c0802a63842a000
fbe1fff8fbc1fff0
7c7f1b787fc32214
f821ffd1f8010010
409e000c7fbff040
4bfffe0c38210030
3bff0001887f0000
4bffffe44bffff8d
0100000000000000
3920000000000280
2f8a00007d4348ae
7d234b78409e000c
392900014e800020
000000004bffffe8
0000000000000000
3fe2fffffbe1fff8
f80100103bff7190
4bfffec1f821ffd1
4bffffad7fe3fb78
7fe3fb787c641b78
4bfffee94bffff59
4bffff195463063e
000000004bfffff4
0000018001000000
6f57206f6c6c6548
0000000a0d646c72
0000000000000010
0141780400527a01
0000001000010c1b
fffffe5800000018
0000000000000040
0000002c00000010
00000038fffffe84
0000001000000000
fffffea800000040
0000000000000034
0000005400000028
00000050fffffec8
9f029e0041094500
437e4111300e4401
4106dedf41000e0a
000000100000000b
fffffeec00000080
000000000000002c
000000940000001c
00000054ffffff04
44019f0041094400
0000007e4111300e

@ -1,82 +0,0 @@
-- Single port Block RAM with one cycle output buffer

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;

entity main_bram is
generic(
WIDTH : natural := 64;
HEIGHT_BITS : natural := 1024;
MEMORY_SIZE : natural := 65536;
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ;
din : in std_logic_vector(WIDTH-1 downto 0);
dout : out std_logic_vector(WIDTH-1 downto 0);
sel : in std_logic_vector((WIDTH/8)-1 downto 0);
re : in std_ulogic;
we : in std_ulogic
);
end entity main_bram;

architecture behaviour of main_bram is

constant WIDTH_BYTES : natural := WIDTH / 8;

-- RAM type definition
type ram_t is array(0 to (MEMORY_SIZE / WIDTH_BYTES) - 1) of std_logic_vector(WIDTH-1 downto 0);

-- RAM loading
impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(WIDTH-1 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE / WIDTH_BYTES) - 1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

-- RAM instance
signal memory : ram_t := init_ram(RAM_INIT_FILE);
attribute ram_style : string;
attribute ram_style of memory : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

-- Others
signal obuf : std_logic_vector(WIDTH-1 downto 0);
begin

-- Actual RAM template
memory_0: process(clk)
begin
if rising_edge(clk) then
if we = '1' then
for i in 0 to 7 loop
if sel(i) = '1' then
memory(to_integer(unsigned(addr)))((i + 1) * 8 - 1 downto i * 8) <=
din((i + 1) * 8 - 1 downto i * 8);
end if;
end loop;
end if;
if re = '1' then
obuf <= memory(to_integer(unsigned(addr)));
end if;
dout <= obuf;
end if;
end process;

end architecture behaviour;

@ -0,0 +1,106 @@
-- Based on:
-- The Potato Processor - A simple processor for FPGAs
-- (c) Kristian Klomsten Skordal 2014 - 2015 <kristian.skordal@wafflemail.net>

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.wishbone_types.all;

use work.pp_utilities.all;

--! @brief Simple memory module for use in Wishbone-based systems.
entity mw_soc_memory is
generic(
MEMORY_SIZE : natural := 4096; --! Memory size in bytes.
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
rst : in std_logic;

-- Wishbone interface:
wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end entity mw_soc_memory;

architecture behaviour of mw_soc_memory is
signal wb_adr_in : std_logic_vector(log2(MEMORY_SIZE) - 1 downto 0);
type ram_t is array(0 to (MEMORY_SIZE / 8) - 1) of std_logic_vector(63 downto 0);

impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(63 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE/8)-1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

signal memory : ram_t := init_ram(RAM_INIT_FILE);

attribute ram_style : string;
attribute ram_style of memory : signal is "block";

attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

type state_type is (IDLE, ACK);
signal state : state_type;

signal read_ack : std_logic;

begin

wb_adr_in <= wishbone_in.adr(log2(MEMORY_SIZE) - 1 downto 0);

wishbone_out.ack <= read_ack and wishbone_in.stb;

memory_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
read_ack <= '0';
state <= IDLE;
else
if wishbone_in.cyc = '1' then
case state is
when IDLE =>
if wishbone_in.stb = '1' and wishbone_in.we = '1' then
for i in 0 to 7 loop
if wishbone_in.sel(i) = '1' then
memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))))(((i + 1) * 8) - 1 downto i * 8)
<= wishbone_in.dat(((i + 1) * 8) - 1 downto i * 8);
end if;
end loop;
read_ack <= '1';
state <= ACK;
elsif wishbone_in.stb = '1' then
wishbone_out.dat <= memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))));
read_ack <= '1';
state <= ACK;
end if;
when ACK =>
read_ack <= '0';
state <= IDLE;
end case;
else
state <= IDLE;
read_ack <= '0';
end if;
end if;
end if;
end process;

end architecture behaviour;

@ -1,421 +1,29 @@
################################################################################
# clkin, reset, uart pins...
################################################################################

set_property -dict {PACKAGE_PIN R4 IOSTANDARD LVCMOS33} [get_ports ext_clk]

set_property -dict {PACKAGE_PIN G4 IOSTANDARD LVCMOS15} [get_ports ext_rst_n]

set_property -dict {PACKAGE_PIN AA19 IOSTANDARD LVCMOS33} [get_ports uart_main_tx]
set_property -dict {PACKAGE_PIN V18 IOSTANDARD LVCMOS33} [get_ports uart_main_rx]

################################################################################
# Pmod Header JC: UART (bottom)
################################################################################

#set_property -dict { PACKAGE_PIN Y21 IOSTANDARD LVCMOS33 } [get_ports { uart_pmod_cts_n }];
#set_property -dict { PACKAGE_PIN AA21 IOSTANDARD LVCMOS33 } [get_ports { uart_pmod_tx }];
#set_property -dict { PACKAGE_PIN AA20 IOSTANDARD LVCMOS33 } [get_ports { uart_pmod_rx }];
#set_property -dict { PACKAGE_PIN AA18 IOSTANDARD LVCMOS33 } [get_ports { uart_pmod_rts_n }];

################################################################################
# LEDs
################################################################################

set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS25 } [get_ports { led0 }];
set_property -dict { PACKAGE_PIN T15 IOSTANDARD LVCMOS25 } [get_ports { led1 }];
set_property -dict { PACKAGE_PIN T16 IOSTANDARD LVCMOS25 } [get_ports { led2 }];
set_property -dict { PACKAGE_PIN U16 IOSTANDARD LVCMOS25 } [get_ports { led3 }];
set_property -dict { PACKAGE_PIN V15 IOSTANDARD LVCMOS25 } [get_ports { led4 }];
set_property -dict { PACKAGE_PIN W16 IOSTANDARD LVCMOS25 } [get_ports { led5 }];
set_property -dict { PACKAGE_PIN W15 IOSTANDARD LVCMOS25 } [get_ports { led6 }];
set_property -dict { PACKAGE_PIN Y13 IOSTANDARD LVCMOS25 } [get_ports { led7 }];

################################################################################
# SPI Flash
################################################################################

set_property -dict { PACKAGE_PIN T19 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_cs_n }];
set_property -dict { PACKAGE_PIN P22 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_mosi }];
set_property -dict { PACKAGE_PIN R22 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_miso }];
set_property -dict { PACKAGE_PIN P21 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_wp_n }];
set_property -dict { PACKAGE_PIN R21 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_hold_n }];

################################################################################
# SD card
################################################################################

set_property -dict { PACKAGE_PIN W19 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_clk }]
set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 } [get_ports { sdcard_cd }]
set_property -dict { PACKAGE_PIN W20 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_cmd }]
set_property -dict { PACKAGE_PIN V19 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[0] }]
set_property -dict { PACKAGE_PIN T21 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[1] }]
set_property -dict { PACKAGE_PIN T20 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[2] }]
set_property -dict { PACKAGE_PIN U18 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[3] }]
set_property -dict { PACKAGE_PIN V20 IOSTANDARD LVCMOS33 } [get_ports { sdcard_reset }]

# Put registers into IOBs to improve timing
set_property IOB true [get_cells -hierarchical -filter {NAME =~*.litesdcard/sdcard_*}]

################################################################################
# Ethernet (generated by LiteX)
################################################################################

# eth_clocks:0.tx
set_property LOC AA14 [get_ports {eth_clocks_tx}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_clocks_tx}]

# eth_clocks:0.rx
set_property LOC V13 [get_ports {eth_clocks_rx}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_clocks_rx}]

# eth:0.rst_n
set_property LOC U7 [get_ports {eth_rst_n}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_rst_n}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rst_n}]

# eth:0.int_n
set_property LOC Y14 [get_ports {eth_int_n}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_int_n}]

# eth:0.mdio
set_property LOC Y16 [get_ports {eth_mdio}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_mdio}]

# eth:0.mdc
set_property LOC AA16 [get_ports {eth_mdc}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_mdc}]

# eth:0.rx_ctl
set_property LOC W10 [get_ports {eth_rx_ctl}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_rx_ctl}]

# eth:0.rx_data
set_property LOC AB16 [get_ports {eth_rx_data[0]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_rx_data[0]}]

# eth:0.rx_data
set_property LOC AA15 [get_ports {eth_rx_data[1]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_rx_data[1]}]

# eth:0.rx_data
set_property LOC AB15 [get_ports {eth_rx_data[2]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_rx_data[2]}]

# eth:0.rx_data
set_property LOC AB11 [get_ports {eth_rx_data[3]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_rx_data[3]}]

# eth:0.tx_ctl
set_property LOC V10 [get_ports {eth_tx_ctl}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_tx_ctl}]

# eth:0.tx_data
set_property LOC Y12 [get_ports {eth_tx_data[0]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_tx_data[0]}]

# eth:0.tx_data
set_property LOC W12 [get_ports {eth_tx_data[1]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_tx_data[1]}]

# eth:0.tx_data
set_property LOC W11 [get_ports {eth_tx_data[2]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_tx_data[2]}]

# eth:0.tx_data
set_property LOC Y11 [get_ports {eth_tx_data[3]}]
set_property IOSTANDARD LVCMOS25 [get_ports {eth_tx_data[3]}]

################################################################################
# DRAM (generated by LiteX)
################################################################################

# ddram:0.a
set_property LOC M2 [get_ports {ddram_a[0]}]
set_property SLEW FAST [get_ports {ddram_a[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[0]}]

# ddram:0.a
set_property LOC M5 [get_ports {ddram_a[1]}]
set_property SLEW FAST [get_ports {ddram_a[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[1]}]

# ddram:0.a
set_property LOC M3 [get_ports {ddram_a[2]}]
set_property SLEW FAST [get_ports {ddram_a[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[2]}]

# ddram:0.a
set_property LOC M1 [get_ports {ddram_a[3]}]
set_property SLEW FAST [get_ports {ddram_a[3]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[3]}]

# ddram:0.a
set_property LOC L6 [get_ports {ddram_a[4]}]
set_property SLEW FAST [get_ports {ddram_a[4]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[4]}]

# ddram:0.a
set_property LOC P1 [get_ports {ddram_a[5]}]
set_property SLEW FAST [get_ports {ddram_a[5]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[5]}]

# ddram:0.a
set_property LOC N3 [get_ports {ddram_a[6]}]
set_property SLEW FAST [get_ports {ddram_a[6]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[6]}]

# ddram:0.a
set_property LOC N2 [get_ports {ddram_a[7]}]
set_property SLEW FAST [get_ports {ddram_a[7]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[7]}]

# ddram:0.a
set_property LOC M6 [get_ports {ddram_a[8]}]
set_property SLEW FAST [get_ports {ddram_a[8]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[8]}]

# ddram:0.a
set_property LOC R1 [get_ports {ddram_a[9]}]
set_property SLEW FAST [get_ports {ddram_a[9]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[9]}]

# ddram:0.a
set_property LOC L5 [get_ports {ddram_a[10]}]
set_property SLEW FAST [get_ports {ddram_a[10]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[10]}]

# ddram:0.a
set_property LOC N5 [get_ports {ddram_a[11]}]
set_property SLEW FAST [get_ports {ddram_a[11]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[11]}]

# ddram:0.a
set_property LOC N4 [get_ports {ddram_a[12]}]
set_property SLEW FAST [get_ports {ddram_a[12]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[12]}]

# ddram:0.a
set_property LOC P2 [get_ports {ddram_a[13]}]
set_property SLEW FAST [get_ports {ddram_a[13]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[13]}]

# ddram:0.a
set_property LOC P6 [get_ports {ddram_a[14]}]
set_property SLEW FAST [get_ports {ddram_a[14]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_a[14]}]

# ddram:0.ba
set_property LOC L3 [get_ports {ddram_ba[0]}]
set_property SLEW FAST [get_ports {ddram_ba[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[0]}]

# ddram:0.ba
set_property LOC K6 [get_ports {ddram_ba[1]}]
set_property SLEW FAST [get_ports {ddram_ba[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[1]}]

# ddram:0.ba
set_property LOC L4 [get_ports {ddram_ba[2]}]
set_property SLEW FAST [get_ports {ddram_ba[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ba[2]}]

# ddram:0.ras_n
set_property LOC J4 [get_ports {ddram_ras_n}]
set_property SLEW FAST [get_ports {ddram_ras_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_ras_n}]

# ddram:0.cas_n
set_property LOC K3 [get_ports {ddram_cas_n}]
set_property SLEW FAST [get_ports {ddram_cas_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_cas_n}]

# ddram:0.we_n
set_property LOC L1 [get_ports {ddram_we_n}]
set_property SLEW FAST [get_ports {ddram_we_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_we_n}]

# ddram:0.dm
set_property LOC G3 [get_ports {ddram_dm[0]}]
set_property SLEW FAST [get_ports {ddram_dm[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[0]}]

# ddram:0.dm
set_property LOC F1 [get_ports {ddram_dm[1]}]
set_property SLEW FAST [get_ports {ddram_dm[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dm[1]}]

# ddram:0.dq
set_property LOC G2 [get_ports {ddram_dq[0]}]
set_property SLEW FAST [get_ports {ddram_dq[0]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[0]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[0]}]

# ddram:0.dq
set_property LOC H4 [get_ports {ddram_dq[1]}]
set_property SLEW FAST [get_ports {ddram_dq[1]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[1]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[1]}]

# ddram:0.dq
set_property LOC H5 [get_ports {ddram_dq[2]}]
set_property SLEW FAST [get_ports {ddram_dq[2]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[2]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[2]}]

# ddram:0.dq
set_property LOC J1 [get_ports {ddram_dq[3]}]
set_property SLEW FAST [get_ports {ddram_dq[3]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[3]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[3]}]

# ddram:0.dq
set_property LOC K1 [get_ports {ddram_dq[4]}]
set_property SLEW FAST [get_ports {ddram_dq[4]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[4]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[4]}]

# ddram:0.dq
set_property LOC H3 [get_ports {ddram_dq[5]}]
set_property SLEW FAST [get_ports {ddram_dq[5]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[5]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[5]}]

# ddram:0.dq
set_property LOC H2 [get_ports {ddram_dq[6]}]
set_property SLEW FAST [get_ports {ddram_dq[6]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[6]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[6]}]

# ddram:0.dq
set_property LOC J5 [get_ports {ddram_dq[7]}]
set_property SLEW FAST [get_ports {ddram_dq[7]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[7]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[7]}]

# ddram:0.dq
set_property LOC E3 [get_ports {ddram_dq[8]}]
set_property SLEW FAST [get_ports {ddram_dq[8]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[8]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[8]}]

# ddram:0.dq
set_property LOC B2 [get_ports {ddram_dq[9]}]
set_property SLEW FAST [get_ports {ddram_dq[9]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[9]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[9]}]

# ddram:0.dq
set_property LOC F3 [get_ports {ddram_dq[10]}]
set_property SLEW FAST [get_ports {ddram_dq[10]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[10]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[10]}]

# ddram:0.dq
set_property LOC D2 [get_ports {ddram_dq[11]}]
set_property SLEW FAST [get_ports {ddram_dq[11]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[11]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[11]}]

# ddram:0.dq
set_property LOC C2 [get_ports {ddram_dq[12]}]
set_property SLEW FAST [get_ports {ddram_dq[12]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[12]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[12]}]

# ddram:0.dq
set_property LOC A1 [get_ports {ddram_dq[13]}]
set_property SLEW FAST [get_ports {ddram_dq[13]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[13]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[13]}]

# ddram:0.dq
set_property LOC E2 [get_ports {ddram_dq[14]}]
set_property SLEW FAST [get_ports {ddram_dq[14]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[14]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[14]}]

# ddram:0.dq
set_property LOC B1 [get_ports {ddram_dq[15]}]
set_property SLEW FAST [get_ports {ddram_dq[15]}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_dq[15]}]
set_property IN_TERM UNTUNED_SPLIT_50 [get_ports {ddram_dq[15]}]

# ddram:0.dqs_p
set_property LOC K2 [get_ports {ddram_dqs_p[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[0]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[0]}]

# ddram:0.dqs_p
set_property LOC E1 [get_ports {ddram_dqs_p[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[1]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_p[1]}]

# ddram:0.dqs_n
set_property LOC J2 [get_ports {ddram_dqs_n[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[0]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[0]}]

# ddram:0.dqs_n
set_property LOC D1 [get_ports {ddram_dqs_n[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[1]}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_dqs_n[1]}]

# ddram:0.clk_p
set_property LOC P5 [get_ports {ddram_clk_p}]
set_property SLEW FAST [get_ports {ddram_clk_p}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_clk_p}]

# ddram:0.clk_n
set_property LOC P4 [get_ports {ddram_clk_n}]
set_property SLEW FAST [get_ports {ddram_clk_n}]
set_property IOSTANDARD DIFF_SSTL15 [get_ports {ddram_clk_n}]

# ddram:0.cke
set_property LOC J6 [get_ports {ddram_cke}]
set_property SLEW FAST [get_ports {ddram_cke}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_cke}]

# ddram:0.odt
set_property LOC K4 [get_ports {ddram_odt}]
set_property SLEW FAST [get_ports {ddram_odt}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_odt}]

# ddram:0.reset_n
set_property LOC G1 [get_ports {ddram_reset_n}]
set_property SLEW FAST [get_ports {ddram_reset_n}]
set_property IOSTANDARD SSTL15 [get_ports {ddram_reset_n}]

################################################################################
# Design constraints and bitsteam attributes
################################################################################

#Internal VREF
set_property INTERNAL_VREF 0.750 [get_iobanks 35]
create_clock -period 10.000 -name sys_clk_pin -waveform {0.000 5.000} -add [get_ports ext_clk]

set_property -dict {PACKAGE_PIN G4 IOSTANDARD LVCMOS15} [get_ports ext_rst]

set_property -dict {PACKAGE_PIN AA19 IOSTANDARD LVCMOS33} [get_ports uart0_txd]
set_property -dict {PACKAGE_PIN V18 IOSTANDARD LVCMOS33} [get_ports uart0_rxd]

set_property -dict { PACKAGE_PIN AB22 IOSTANDARD LVCMOS33 } [get_ports { ja[0] }]; #IO_L10N_T1_D15_14 Sch=ja[1]
set_property -dict { PACKAGE_PIN AB21 IOSTANDARD LVCMOS33 } [get_ports { ja[1] }]; #IO_L10P_T1_D14_14 Sch=ja[2]
set_property -dict { PACKAGE_PIN AB20 IOSTANDARD LVCMOS33 } [get_ports { ja[2] }]; #IO_L15N_T2_DQS_DOUT_CSO_B_14 Sch=ja[3]
set_property -dict { PACKAGE_PIN AB18 IOSTANDARD LVCMOS33 } [get_ports { ja[3] }]; #IO_L17N_T2_A13_D29_14 Sch=ja[4]
set_property -dict { PACKAGE_PIN Y21 IOSTANDARD LVCMOS33 } [get_ports { ja[4] }]; #IO_L9P_T1_DQS_14 Sch=ja[7]
set_property -dict { PACKAGE_PIN AA21 IOSTANDARD LVCMOS33 } [get_ports { ja[5] }]; #IO_L8N_T1_D12_14 Sch=ja[8]
set_property -dict { PACKAGE_PIN AA20 IOSTANDARD LVCMOS33 } [get_ports { ja[6] }]; #IO_L8P_T1_D11_14 Sch=ja[9]
set_property -dict { PACKAGE_PIN AA18 IOSTANDARD LVCMOS33 } [get_ports { ja[7] }]; #IO_L17P_T2_A14_D30_14 Sch=ja[10]

set_property -dict { PACKAGE_PIN V9 IOSTANDARD LVCMOS33 } [get_ports { jb[0] }]; #IO_L21P_T3_DQS_34 Sch=jb_p[1]
set_property -dict { PACKAGE_PIN V8 IOSTANDARD LVCMOS33 } [get_ports { jb[1] }]; #IO_L21N_T3_DQS_34 Sch=jb_n[1]
set_property -dict { PACKAGE_PIN V7 IOSTANDARD LVCMOS33 } [get_ports { jb[2] }]; #IO_L19P_T3_34 Sch=jb_p[2]
set_property -dict { PACKAGE_PIN W7 IOSTANDARD LVCMOS33 } [get_ports { jb[3] }]; #IO_L19N_T3_VREF_34 Sch=jb_n[2]
set_property -dict { PACKAGE_PIN W9 IOSTANDARD LVCMOS33 } [get_ports { jb[4] }]; #IO_L24P_T3_34 Sch=jb_p[3]
set_property -dict { PACKAGE_PIN Y9 IOSTANDARD LVCMOS33 } [get_ports { jb[5] }]; #IO_L24N_T3_34 Sch=jb_n[3]
set_property -dict { PACKAGE_PIN Y8 IOSTANDARD LVCMOS33 } [get_ports { jb[6] }]; #IO_L23P_T3_34 Sch=jb_p[4]
set_property -dict { PACKAGE_PIN Y7 IOSTANDARD LVCMOS33 } [get_ports { jb[7] }]; #IO_L23N_T3_34 Sch=jb_n[4]

set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property CFGBVS VCCO [current_design]

set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design]
set_property BITSTREAM.CONFIG.CONFIGRATE 33 [current_design]
set_property CONFIG_MODE SPIx4 [current_design]

################################################################################
# Clock constraints
################################################################################

create_clock -name sys_clk_pin -period 10.00 [get_ports { ext_clk }];

create_clock -name eth_clocks_rx -period 8.0 [get_ports { eth_clocks_rx }]

set_clock_groups -asynchronous -group [get_clocks sys_clk_pin -include_generated_clocks] -group [get_clocks eth_clocks_rx -include_generated_clocks]

################################################################################
# False path constraints (from LiteX as they relate to LiteDRAM and LiteEth)
################################################################################

set_false_path -quiet -through [get_nets -hierarchical -filter {mr_ff == TRUE}]

set_false_path -quiet -to [get_pins -filter {REF_PIN_NAME == PRE} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE || ars_ff2 == TRUE}]]

set_max_delay 2 -quiet -from [get_pins -filter {REF_PIN_NAME == C} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE}]] -to [get_pins -filter {REF_PIN_NAME == D} -of_objects [get_cells -hierarchical -filter {ars_ff2 == TRUE}]]

@ -1,5 +1,5 @@
set_property -dict {PACKAGE_PIN E3 IOSTANDARD LVCMOS33} [get_ports ext_clk]
create_clock -period 10.000 -name sys_clk_pin [get_ports ext_clk]
create_clock -period 10.000 -name sys_clk_pin -waveform {0.000 5.000} -add [get_ports ext_clk]

set_property -dict {PACKAGE_PIN C12 IOSTANDARD LVCMOS33} [get_ports ext_rst]

@ -8,7 +8,3 @@ set_property -dict {PACKAGE_PIN C4 IOSTANDARD LVCMOS33} [get_ports uart0_rxd]

set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property CFGBVS VCCO [current_design]

set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design]
set_property BITSTREAM.CONFIG.CONFIGRATE 33 [current_design]
set_property CONFIG_MODE SPIx4 [current_design]

@ -0,0 +1,216 @@
[PATCH] Hack out ppc64le gcc fixed point divide instructions

This is a pretty horrible short term hack that removes hardware fixed
point divides from ppc64le gcc. It breaks VMX/VSX, but we aren't using
either on microwatt. We'll implement a hardware divide shortly and this
can go away. Please don't tell my toolchain team.

The firmware.hex file in this directory is a build of micropython using
a recent mainline gcc with this patch.

Signed-off-by: Anton Blanchard <anton@linux.ibm.com>
---

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 0a2bdb79e15..02e325b73a9 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1581,7 +1581,6 @@ BU_VSX_2 (VEC_MERGEH_V2DF, "mergeh_2df", CONST, vsx_mergeh_v2df)
BU_VSX_2 (VEC_MERGEH_V2DI, "mergeh_2di", CONST, vsx_mergeh_v2di)
BU_VSX_2 (XXSPLTD_V2DF, "xxspltd_2df", CONST, vsx_xxspltd_v2df)
BU_VSX_2 (XXSPLTD_V2DI, "xxspltd_2di", CONST, vsx_xxspltd_v2di)
-BU_VSX_2 (DIV_V2DI, "div_2di", CONST, vsx_div_v2di)
BU_VSX_2 (UDIV_V2DI, "udiv_2di", CONST, vsx_udiv_v2di)
BU_VSX_2 (MUL_V2DI, "mul_2di", CONST, vsx_mul_v2di)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 7f0cdc73d9b..ad0a8a74e63 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -1459,8 +1459,6 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
- { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_DIV_V2DI,
- RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
{ VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVSXDDP,
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 832eda7cbad..1c5245c781b 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5445,7 +5445,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
gsi_replace (gsi, g, true);
return true;
/* Flavors of vec_div (Integer). */
- case VSX_BUILTIN_DIV_V2DI:
case VSX_BUILTIN_UDIV_V2DI:
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9a7a1da987f..c443c2fe579 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3071,45 +3071,6 @@
"maddld %0,%1,%2,%3"
[(set_attr "type" "mul")])
-(define_insn "udiv<mode>3"
- [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
- (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
- (match_operand:GPR 2 "gpc_reg_operand" "r")))]
- ""
- "div<wd>u %0,%1,%2"
- [(set_attr "type" "div")
- (set_attr "size" "<bits>")])
-
-
-;; For powers of two we can do sra[wd]i/addze for divide and then adjust for
-;; modulus. If it isn't a power of two, force operands into register and do
-;; a normal divide.
-(define_expand "div<mode>3"
- [(set (match_operand:GPR 0 "gpc_reg_operand")
- (div:GPR (match_operand:GPR 1 "gpc_reg_operand")
- (match_operand:GPR 2 "reg_or_cint_operand")))]
- ""
-{
- if (CONST_INT_P (operands[2])
- && INTVAL (operands[2]) > 0
- && exact_log2 (INTVAL (operands[2])) >= 0)
- {
- emit_insn (gen_div<mode>3_sra (operands[0], operands[1], operands[2]));
- DONE;
- }
-
- operands[2] = force_reg (<MODE>mode, operands[2]);
-})
-
-(define_insn "*div<mode>3"
- [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
- (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
- (match_operand:GPR 2 "gpc_reg_operand" "r")))]
- ""
- "div<wd> %0,%1,%2"
- [(set_attr "type" "div")
- (set_attr "size" "<bits>")])
-
(define_insn "div<mode>3_sra"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(div:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
@@ -3170,37 +3131,6 @@
(set_attr "length" "8,12")
(set_attr "cell_micro" "not")])
-(define_expand "mod<mode>3"
- [(set (match_operand:GPR 0 "gpc_reg_operand")
- (mod:GPR (match_operand:GPR 1 "gpc_reg_operand")
- (match_operand:GPR 2 "reg_or_cint_operand")))]
- ""
-{
- int i;
- rtx temp1;
- rtx temp2;
-
- if (!CONST_INT_P (operands[2])
- || INTVAL (operands[2]) <= 0
- || (i = exact_log2 (INTVAL (operands[2]))) < 0)
- {
- if (!TARGET_MODULO)
- FAIL;
-
- operands[2] = force_reg (<MODE>mode, operands[2]);
- }
- else
- {
- temp1 = gen_reg_rtx (<MODE>mode);
- temp2 = gen_reg_rtx (<MODE>mode);
-
- emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
- emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i)));
- emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
- DONE;
- }
-})
-
;; In order to enable using a peephole2 for combining div/mod to eliminate the
;; mod, prefer putting the result of mod into a different register
(define_insn "*mod<mode>3"
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7633171df9c..1a2ac66bd43 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1602,53 +1602,6 @@
"xvdiv<sd>p %x0,%x1,%x2"
[(set_attr "type" "<VStype_div>")])
-; Emulate vector with scalar for vec_div in V2DImode
-(define_insn_and_split "vsx_div_v2di"
- [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
- (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
- (match_operand:V2DI 2 "vsx_register_operand" "wa")]
- UNSPEC_VSX_DIVSD))]
- "VECTOR_MEM_VSX_P (V2DImode)"
- "#"
- "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
- [(const_int 0)]
-{
- rtx op0 = operands[0];
- rtx op1 = operands[1];
- rtx op2 = operands[2];
- rtx op3 = gen_reg_rtx (DImode);
- rtx op4 = gen_reg_rtx (DImode);
- rtx op5 = gen_reg_rtx (DImode);
- emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
- emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
- if (TARGET_POWERPC64)
- emit_insn (gen_divdi3 (op5, op3, op4));
- else
- {
- rtx libfunc = optab_libfunc (sdiv_optab, DImode);
- rtx target = emit_library_call_value (libfunc,
- op5, LCT_NORMAL, DImode,
- op3, DImode,
- op4, DImode);
- emit_move_insn (op5, target);
- }
- emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
- emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
- if (TARGET_POWERPC64)
- emit_insn (gen_divdi3 (op3, op3, op4));
- else
- {
- rtx libfunc = optab_libfunc (sdiv_optab, DImode);
- rtx target = emit_library_call_value (libfunc,
- op3, LCT_NORMAL, DImode,
- op3, DImode,
- op4, DImode);
- emit_move_insn (op3, target);
- }
- emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
- DONE;
-}
- [(set_attr "type" "div")])
(define_insn_and_split "vsx_udiv_v2di"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
@@ -1668,9 +1621,6 @@
rtx op5 = gen_reg_rtx (DImode);
emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
- if (TARGET_POWERPC64)
- emit_insn (gen_udivdi3 (op5, op3, op4));
- else
{
rtx libfunc = optab_libfunc (udiv_optab, DImode);
rtx target = emit_library_call_value (libfunc,
@@ -1681,9 +1631,6 @@
}
emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
- if (TARGET_POWERPC64)
- emit_insn (gen_udivdi3 (op3, op3, op4));
- else
{
rtx libfunc = optab_libfunc (udiv_optab, DImode);
rtx target = emit_library_call_value (libfunc,

@ -7,85 +7,85 @@ use ieee.std_logic_1164.all;
--! @brief A generic FIFO module.
--! Adopted from the FIFO module in <https://github.com/skordal/smallthings>.
entity pp_fifo is
generic(
DEPTH : natural := 64;
WIDTH : natural := 32
);
port(
-- Control lines:
clk : in std_logic;
reset : in std_logic;
generic(
DEPTH : natural := 64;
WIDTH : natural := 32
);
port(
-- Control lines:
clk : in std_logic;
reset : in std_logic;

-- Status lines:
full : out std_logic;
empty : out std_logic;
-- Status lines:
full : out std_logic;
empty : out std_logic;

-- Data in:
data_in : in std_logic_vector(WIDTH - 1 downto 0);
data_out : out std_logic_vector(WIDTH - 1 downto 0);
push, pop : in std_logic
);
-- Data in:
data_in : in std_logic_vector(WIDTH - 1 downto 0);
data_out : out std_logic_vector(WIDTH - 1 downto 0);
push, pop : in std_logic
);
end entity pp_fifo;

architecture behaviour of pp_fifo is

type memory_array is array(0 to DEPTH - 1) of std_logic_vector(WIDTH - 1 downto 0);
signal memory : memory_array := (others => (others => '0'));
type memory_array is array(0 to DEPTH - 1) of std_logic_vector(WIDTH - 1 downto 0);
shared variable memory : memory_array := (others => (others => '0'));

subtype index_type is integer range 0 to DEPTH - 1;
signal top, bottom : index_type;
subtype index_type is integer range 0 to DEPTH - 1;
signal top, bottom : index_type;

type fifo_op is (FIFO_POP, FIFO_PUSH);
signal prev_op : fifo_op := FIFO_POP;
type fifo_op is (FIFO_POP, FIFO_PUSH);
signal prev_op : fifo_op := FIFO_POP;

begin

empty <= '1' when top = bottom and prev_op = FIFO_POP else '0';
full <= '1' when top = bottom and prev_op = FIFO_PUSH else '0';
empty <= '1' when top = bottom and prev_op = FIFO_POP else '0';
full <= '1' when top = bottom and prev_op = FIFO_PUSH else '0';

read: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
bottom <= 0;
else
if pop = '1' then
data_out <= memory(bottom);
bottom <= (bottom + 1) mod DEPTH;
end if;
end if;
end if;
end process read;
read: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
bottom <= 0;
else
if pop = '1' then
data_out <= memory(bottom);
bottom <= (bottom + 1) mod DEPTH;
end if;
end if;
end if;
end process read;

write: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
top <= 0;
else
if push = '1' then
memory(top) <= data_in;
top <= (top + 1) mod DEPTH;
end if;
end if;
end if;
end process write;
write: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
top <= 0;
else
if push = '1' then
memory(top) := data_in;
top <= (top + 1) mod DEPTH;
end if;
end if;
end if;
end process write;

set_prev_op: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
prev_op <= FIFO_POP;
else
if push = '1' and pop = '1' then
-- Keep the same value for prev_op
elsif push = '1' then
prev_op <= FIFO_PUSH;
elsif pop = '1' then
prev_op <= FIFO_POP;
end if;
end if;
end if;
end process set_prev_op;
set_prev_op: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
prev_op <= FIFO_POP;
else
if push = '1' and pop = '1' then
prev_op <= FIFO_POP;
elsif push = '1' then
prev_op <= FIFO_PUSH;
elsif pop = '1' then
prev_op <= FIFO_POP;
end if;
end if;
end if;
end process set_prev_op;

end architecture behaviour;

@ -34,362 +34,351 @@ use ieee.numeric_std.all;
--! - Bit 0: data received (receive buffer not empty)
--! - Bit 1: ready to send data (transmit buffer empty)
entity pp_soc_uart is
generic(
FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs.
generic(
FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs.
);
port(
clk : in std_logic;
reset : in std_logic;

-- UART ports:
txd : out std_logic;
rxd : in std_logic;

-- Interrupt signal:
irq : out std_logic;

-- Wishbone ports:
wb_adr_in : in std_logic_vector(11 downto 0);
wb_dat_in : in std_logic_vector( 7 downto 0);
wb_dat_out : out std_logic_vector( 7 downto 0);
wb_we_in : in std_logic;
wb_cyc_in : in std_logic;
wb_stb_in : in std_logic;
wb_ack_out : out std_logic
port(
clk : in std_logic;
reset : in std_logic;

-- UART ports:
txd : out std_logic;
rxd : in std_logic;

-- Interrupt signal:
irq : out std_logic;

-- Wishbone ports:
wb_adr_in : in std_logic_vector(11 downto 0);
wb_dat_in : in std_logic_vector( 7 downto 0);
wb_dat_out : out std_logic_vector( 7 downto 0);
wb_we_in : in std_logic;
wb_cyc_in : in std_logic;
wb_stb_in : in std_logic;
wb_ack_out : out std_logic
);
end entity pp_soc_uart;

architecture behaviour of pp_soc_uart is

subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit.

-- UART sample clock signals:
signal sample_clk : std_logic;
signal sample_clk_divisor : std_logic_vector(7 downto 0);
signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range);

-- UART receive process signals:
type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT);
signal rx_state : rx_state_type;
signal rx_byte : std_logic_vector(7 downto 0);
signal rx_current_bit : bitnumber;

subtype rx_sample_counter_type is natural range 0 to 15;
signal rx_sample_counter : rx_sample_counter_type;
signal rx_sample_value : rx_sample_counter_type;

subtype rx_sample_delay_type is natural range 0 to 7;
signal rx_sample_delay : rx_sample_delay_type;

-- UART transmit process signals:
type tx_state_type is (IDLE, TRANSMIT, STOPBIT);
signal tx_state : tx_state_type;
signal tx_byte : std_logic_vector(7 downto 0);
signal tx_current_bit : bitnumber;

-- UART transmit clock:
subtype uart_tx_counter_type is natural range 0 to 15;
signal uart_tx_counter : uart_tx_counter_type := 0;
signal uart_tx_clk : std_logic;

-- Buffer signals:
signal send_buffer_full, send_buffer_empty : std_logic;
signal recv_buffer_full, recv_buffer_empty : std_logic;
signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0);
signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0);
signal send_buffer_push, send_buffer_pop : std_logic := '0';
signal recv_buffer_push, recv_buffer_pop : std_logic := '0';

-- IRQ enable signals:
signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0';

-- Wishbone signals:
type wb_state_type is (IDLE, WRITE_ACK, READ_ACK);
signal wb_state : wb_state_type;

signal rxd2 : std_logic := '1';
signal rxd3 : std_logic := '1';
signal txd2 : std_ulogic := '1';
subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit.

-- UART sample clock signals:
signal sample_clk : std_logic;
signal sample_clk_divisor : std_logic_vector(7 downto 0);
signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range);

-- UART receive process signals:
type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT);
signal rx_state : rx_state_type;
signal rx_byte : std_logic_vector(7 downto 0);
signal rx_current_bit : bitnumber;

subtype rx_sample_counter_type is natural range 0 to 15;
signal rx_sample_counter : rx_sample_counter_type;
signal rx_sample_value : rx_sample_counter_type;

subtype rx_sample_delay_type is natural range 0 to 7;
signal rx_sample_delay : rx_sample_delay_type;

-- UART transmit process signals:
type tx_state_type is (IDLE, TRANSMIT, STOPBIT);
signal tx_state : tx_state_type;
signal tx_byte : std_logic_vector(7 downto 0);
signal tx_current_bit : bitnumber;

-- UART transmit clock:
subtype uart_tx_counter_type is natural range 0 to 15;
signal uart_tx_counter : uart_tx_counter_type := 0;
signal uart_tx_clk : std_logic;

-- Buffer signals:
signal send_buffer_full, send_buffer_empty : std_logic;
signal recv_buffer_full, recv_buffer_empty : std_logic;
signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0);
signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0);
signal send_buffer_push, send_buffer_pop : std_logic := '0';
signal recv_buffer_push, recv_buffer_pop : std_logic := '0';

-- IRQ enable signals:
signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0';

-- Wishbone signals:
type wb_state_type is (IDLE, WRITE_ACK, READ_ACK);
signal wb_state : wb_state_type;

signal wb_ack : std_logic; --! Wishbone acknowledge signal

begin

irq <= (irq_recv_enable and (not recv_buffer_empty))
or (irq_tx_ready_enable and send_buffer_empty);

---------- UART receive ----------

recv_buffer_input <= rx_byte;

-- Add a few FFs on the RX input to avoid metastability issues
process (clk) is
begin
if rising_edge(clk) then
rxd3 <= rxd2;
rxd2 <= rxd;
end if;
end process;
txd <= txd2;

uart_receive: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_state <= IDLE;
recv_buffer_push <= '0';
else
case rx_state is
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd3 = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
irq <= (irq_recv_enable and (not recv_buffer_empty))
or (irq_tx_ready_enable and send_buffer_empty);

---------- UART receive ----------

recv_buffer_input <= rx_byte;

uart_receive: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_state <= IDLE;
recv_buffer_push <= '0';
else
rx_sample_delay <= rx_sample_delay + 1;
case rx_state is
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
else
rx_sample_delay <= rx_sample_delay + 1;
end if;
end if;
when RECEIVE =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
if rx_current_bit /= 7 then
rx_byte(rx_current_bit) <= rxd;
rx_current_bit <= rx_current_bit + 1;
else
rx_byte(rx_current_bit) <= rxd;
rx_state <= STOPBIT;
end if;
end if;
when STOPBIT =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
rx_state <= IDLE;

if recv_buffer_full = '0' then
recv_buffer_push <= '1';
end if;
end if;
end case;
end if;
end if;
when RECEIVE =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
if rx_current_bit /= 7 then
rx_byte(rx_current_bit) <= rxd3;
rx_current_bit <= rx_current_bit + 1;
else
rx_byte(rx_current_bit) <= rxd3;
rx_state <= STOPBIT;
end if;
end process uart_receive;

sample_counter: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_sample_counter <= 0;
elsif sample_clk = '1' then
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if;
end if;
end if;
when STOPBIT =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
rx_state <= IDLE;
end if;
end process sample_counter;

---------- UART transmit ----------

if recv_buffer_full = '0' then
recv_buffer_push <= '1';
tx_byte <= send_buffer_output;

uart_transmit: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
txd <= '1';
tx_state <= IDLE;
send_buffer_pop <= '0';
tx_current_bit <= 0;
else
case tx_state is
when IDLE =>
if send_buffer_empty = '0' and uart_tx_clk = '1' then
txd <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd <= '1';
tx_state <= IDLE;
end if;
end case;
end if;
end if;
end case;
end if;
end if;
end process uart_receive;

sample_counter: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_sample_counter <= 0;
elsif sample_clk = '1' then
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if;
end if;
end if;
end process sample_counter;

---------- UART transmit ----------

tx_byte <= send_buffer_output;

uart_transmit: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
txd2 <= '1';
tx_state <= IDLE;
send_buffer_pop <= '0';
tx_current_bit <= 0;
else
case tx_state is
when IDLE =>
if send_buffer_empty = '0' and uart_tx_clk = '1' then
txd2 <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd2 <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd2 <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd2 <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd2 <= '1';
tx_state <= IDLE;
end if;
end case;
end if;
end if;
end process uart_transmit;

uart_tx_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
uart_tx_counter <= 0;
uart_tx_clk <= '0';
else
if sample_clk = '1' then
if uart_tx_counter = 15 then
uart_tx_counter <= 0;
uart_tx_clk <= '1';
else
uart_tx_counter <= uart_tx_counter + 1;
uart_tx_clk <= '0';
end if;
else
uart_tx_clk <= '0';
end process uart_transmit;

uart_tx_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
uart_tx_counter <= 0;
uart_tx_clk <= '0';
else
if sample_clk = '1' then
if uart_tx_counter = 15 then
uart_tx_counter <= 0;
uart_tx_clk <= '1';
else
uart_tx_counter <= uart_tx_counter + 1;
uart_tx_clk <= '0';
end if;
else
uart_tx_clk <= '0';
end if;
end if;
end if;
end if;
end if;
end process uart_tx_clock_generator;

---------- Sample clock generator ----------

sample_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
sample_clk_counter <= (others => '0');
sample_clk <= '0';
else
if sample_clk_divisor /= x"00" then
if sample_clk_counter = sample_clk_divisor then
sample_clk_counter <= (others => '0');
sample_clk <= '1';
else
sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1);
sample_clk <= '0';
end if;
end process uart_tx_clock_generator;

---------- Sample clock generator ----------

sample_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
sample_clk_counter <= (others => '0');
sample_clk <= '0';
else
if sample_clk_divisor /= x"00" then
if sample_clk_counter = sample_clk_divisor then
sample_clk_counter <= (others => '0');
sample_clk <= '1';
else
sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1);
sample_clk <= '0';
end if;
end if;
end if;
end if;
end if;
end if;
end process sample_clock_generator;

---------- Data Buffers ----------

send_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => send_buffer_full,
empty => send_buffer_empty,
data_in => send_buffer_input,
data_out => send_buffer_output,
push => send_buffer_push,
pop => send_buffer_pop
end process sample_clock_generator;

---------- Data Buffers ----------

send_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => send_buffer_full,
empty => send_buffer_empty,
data_in => send_buffer_input,
data_out => send_buffer_output,
push => send_buffer_push,
pop => send_buffer_pop
);

recv_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => recv_buffer_full,
empty => recv_buffer_empty,
data_in => recv_buffer_input,
data_out => recv_buffer_output,
push => recv_buffer_push,
pop => recv_buffer_pop
recv_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => recv_buffer_full,
empty => recv_buffer_empty,
data_in => recv_buffer_input,
data_out => recv_buffer_output,
push => recv_buffer_push,
pop => recv_buffer_pop
);

---------- Wishbone Interface ----------

wishbone: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
wb_ack_out <= '0';
wb_state <= IDLE;
send_buffer_push <= '0';
recv_buffer_pop <= '0';
sample_clk_divisor <= (others => '0');
irq_recv_enable <= '0';
irq_tx_ready_enable <= '0';
else
case wb_state is
when IDLE =>
if wb_cyc_in = '1' and wb_stb_in = '1' then
if wb_we_in = '1' then -- Write to register
if wb_adr_in = x"000" then
send_buffer_input <= wb_dat_in;
send_buffer_push <= '1';
elsif wb_adr_in = x"018" then
sample_clk_divisor <= wb_dat_in;
elsif wb_adr_in = x"020" then
irq_recv_enable <= wb_dat_in(0);
irq_tx_ready_enable <= wb_dat_in(1);
end if;

-- Invalid writes are acked and ignored.
wb_ack_out <= '1';
wb_state <= WRITE_ACK;
else -- Read from register
if wb_adr_in = x"008" then
recv_buffer_pop <= '1';
elsif wb_adr_in = x"010" then
wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full &
send_buffer_empty & recv_buffer_empty;
wb_ack_out <= '1';
elsif wb_adr_in = x"018" then
wb_dat_out <= sample_clk_divisor;
wb_ack_out <= '1';
elsif wb_adr_in = x"020" then
wb_dat_out <= (0 => irq_recv_enable,
1 => irq_tx_ready_enable,
others => '0');
wb_ack_out <= '1';
else
wb_dat_out <= (others => '0');
wb_ack_out <= '1';
end if;
wb_state <= READ_ACK;
---------- Wishbone Interface ----------

wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in;

wishbone: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
wb_ack <= '0';
wb_state <= IDLE;
send_buffer_push <= '0';
recv_buffer_pop <= '0';
sample_clk_divisor <= (others => '0');
irq_recv_enable <= '0';
irq_tx_ready_enable <= '0';
else
case wb_state is
when IDLE =>
if wb_cyc_in = '1' and wb_stb_in = '1' then
if wb_we_in = '1' then -- Write to register
if wb_adr_in = x"000" then
send_buffer_input <= wb_dat_in;
send_buffer_push <= '1';
elsif wb_adr_in = x"018" then
sample_clk_divisor <= wb_dat_in;
elsif wb_adr_in = x"020" then
irq_recv_enable <= wb_dat_in(0);
irq_tx_ready_enable <= wb_dat_in(1);
end if;

-- Invalid writes are acked and ignored.

wb_ack <= '1';
wb_state <= WRITE_ACK;
else -- Read from register
if wb_adr_in = x"008" then
recv_buffer_pop <= '1';
elsif wb_adr_in = x"010" then
wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full & send_buffer_empty & recv_buffer_empty;
wb_ack <= '1';
elsif wb_adr_in = x"018" then
wb_dat_out <= sample_clk_divisor;
wb_ack <= '1';
elsif wb_adr_in = x"020" then
wb_dat_out <= (0 => irq_recv_enable, 1 => irq_tx_ready_enable, others => '0');
wb_ack <= '1';
else
wb_dat_out <= (others => '0');
wb_ack <= '1';
end if;
wb_state <= READ_ACK;
end if;
end if;
when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack <= '1';
end if;

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
end case;
end if;
end if;
when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack_out <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack_out <= '1';
end if;

if wb_stb_in = '0' then
wb_ack_out <= '0';
wb_state <= IDLE;
end if;
end case;
end if;
end if;
end process wishbone;
end if;
end process wishbone;

end architecture behaviour;

@ -1,11 +1,10 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity soc_reset is
generic (
PLL_RESET_BITS : integer := 5;
SOC_RESET_BITS : integer := 5;
PLL_RESET_CLOCKS : integer := 32;
SOC_RESET_CLOCKS : integer := 32;
RESET_LOW : boolean := true
);
port (
@ -21,38 +20,26 @@ entity soc_reset is
end soc_reset;

architecture rtl of soc_reset is
signal ext_rst0_n : std_ulogic;
signal ext_rst1_n : std_ulogic := '0';
signal ext_rst2_n : std_ulogic := '0';
signal rst0_n : std_ulogic;
signal rst1_n : std_ulogic := '0';
signal rst2_n : std_ulogic := '0';
signal pll_rst_cnt : std_ulogic_vector(PLL_RESET_BITS downto 0) := (others => '0');
signal soc_rst_cnt : std_ulogic_vector(SOC_RESET_BITS downto 0) := (others => '0');
signal ext_rst_n : std_ulogic;
signal rst_n : std_ulogic;
signal pll_rst_reg : std_ulogic_vector(PLL_RESET_CLOCKS downto 0) := (others => '1');
signal soc_rst_reg : std_ulogic_vector(SOC_RESET_CLOCKS downto 0) := (others => '1');
begin
ext_rst0_n <= ext_rst_in when RESET_LOW else not ext_rst_in;
rst0_n <= ext_rst0_n and pll_locked_in and not pll_rst_out;
ext_rst_n <= ext_rst_in when RESET_LOW else not ext_rst_in;
rst_n <= ext_rst_n and pll_locked_in;

-- PLL reset is active high
pll_rst_out <= not pll_rst_cnt(pll_rst_cnt'left);
pll_rst_out <= pll_rst_reg(0);
-- Pass active high reset around
rst_out <= not soc_rst_cnt(soc_rst_cnt'left);
rst_out <= soc_rst_reg(0);

-- Wait for external clock to become stable before starting the PLL
-- By the time the FPGA has been loaded the clock should be well and
-- truly stable, but lets give it a few cycles to be sure.
--
-- [BenH] Some designs seem to require a lot more..
pll_reset_0 : process(ext_clk)
begin
if (rising_edge(ext_clk)) then
ext_rst1_n <= ext_rst0_n;
ext_rst2_n <= ext_rst1_n;
if (ext_rst2_n = '0') then
pll_rst_cnt <= (others => '0');
elsif (pll_rst_cnt(pll_rst_cnt'left) = '0') then
pll_rst_cnt <= std_ulogic_vector(unsigned(pll_rst_cnt) + 1);
end if;
pll_rst_reg <= '0' & pll_rst_reg(pll_rst_reg'length-1 downto 1);
end if;
end process;

@ -62,12 +49,10 @@ begin
soc_reset_0 : process(pll_clk)
begin
if (rising_edge(pll_clk)) then
rst1_n <= rst0_n;
rst2_n <= rst1_n;
if (rst2_n = '0') then
soc_rst_cnt <= (others => '0');
elsif (soc_rst_cnt(soc_rst_cnt'left) = '0') then
soc_rst_cnt <= std_ulogic_vector(unsigned(soc_rst_cnt) + 1);
if (rst_n = '0') then
soc_rst_reg <= (others => '1');
else
soc_rst_reg <= '0' & soc_rst_reg(soc_rst_reg'length-1 downto 1);
end if;
end if;
end process;

@ -12,14 +12,16 @@ architecture behave of soc_reset_tb is
signal ext_rst_in : std_ulogic;

signal pll_rst_out : std_ulogic;
signal pll_rst_out_expected : std_ulogic;
signal rst_out : std_ulogic;
signal rst_out_expected : std_ulogic;

constant clk_period : time := 10 ns;

type test_vector is record
pll_locked_in : std_ulogic;
ext_rst_in : std_ulogic;
pll_rst_out : std_ulogic;
pll_rst_out : std_ulogic;
rst_out : std_ulogic;
end record;

@ -30,8 +32,6 @@ architecture behave of soc_reset_tb is
('0', '1', '1', '1'),
('0', '1', '1', '1'),
('0', '1', '1', '1'),
('0', '1', '1', '1'),
('0', '1', '1', '1'),
-- Reset is removed from the PLL
('0', '1', '0', '1'),
('0', '1', '0', '1'),
@ -41,27 +41,15 @@ architecture behave of soc_reset_tb is
('1', '1', '0', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
-- Finally SOC comes out of reset
('1', '1', '0', '0'),
('1', '1', '0', '0'),

-- PLL locked, reset button pressed
('1', '0', '0', '0'),
('1', '0', '0', '0'),
('1', '0', '0', '0'),
('1', '0', '1', '1'),
('1', '0', '0', '1'),
('1', '0', '0', '1'),
('1', '0', '0', '1'),
-- PLL locked, reset button released
('1', '1', '1', '1'),
('1', '1', '1', '1'),
('1', '1', '1', '1'),
('1', '1', '1', '1'),
('1', '1', '1', '1'),
('1', '1', '1', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
('1', '1', '0', '1'),
@ -71,8 +59,8 @@ architecture behave of soc_reset_tb is
begin
soc_reset_0: entity work.soc_reset
generic map (
PLL_RESET_BITS => 2,
SOC_RESET_BITS => 2,
PLL_RESET_CLOCKS => 4,
SOC_RESET_CLOCKS => 4,
RESET_LOW => true
)
port map (
@ -95,35 +83,24 @@ begin
end process clock;

stim: process
variable tv : test_vector;
begin
-- skew us a bit
wait for clk_period/4;

for i in test_vectors'range loop
tv := test_vectors(i);

pll_locked_in <= tv.pll_locked_in;
ext_rst_in <= tv.ext_rst_in;
(pll_locked_in, ext_rst_in, pll_rst_out_expected, rst_out_expected) <= test_vectors(i);

report " ** STEP " & integer'image(i);
report "pll_locked_in " & std_ulogic'image(pll_locked_in);
report "ext_rst_in " & std_ulogic'image(ext_rst_in);
report "pll_rst_out " & std_ulogic'image(pll_rst_out);
report "rst_out" & std_ulogic'image(rst_out);
--report "pll_locked_in " & std_ulogic'image(pll_locked_in);
--report "ext_rst_in " & std_ulogic'image(ext_rst_in);
--report "pll_rst_out " & std_ulogic'image(pll_rst_out);
--report "rst_out" & std_ulogic'image(rst_out);

assert tv.pll_rst_out = pll_rst_out report
"pll_rst_out bad exp=" & std_ulogic'image(tv.pll_rst_out) &
" got=" & std_ulogic'image(pll_rst_out);
assert tv.rst_out = rst_out report
"rst_out bad exp=" & std_ulogic'image(tv.rst_out) &
" got=" & std_ulogic'image(rst_out);
assert pll_rst_out_expected = pll_rst_out report "pll_rst_out bad";
assert rst_out_expected = rst_out report "rst_out bad";

wait for clk_period;
end loop;

wait for clk_period;

std.env.finish;
assert false report "end of test" severity failure;
wait;
end process;
end behave;

@ -1,327 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library unisim;
use unisim.vcomponents.all;

library work;
use work.wishbone_types.all;

entity toplevel is
generic (
MEMORY_SIZE : integer := 16384;
RAM_INIT_FILE : string := "firmware.hex";
CLK_FREQUENCY : positive := 100000000;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
SPI_FLASH_OFFSET : integer := 10485760;
SPI_FLASH_DEF_CKDV : natural := 1;
SPI_FLASH_DEF_QUAD : boolean := true;
LOG_LENGTH : natural := 2048;
UART_IS_16550 : boolean := true
);
port(
clk200_p : in std_ulogic;
clk200_n : in std_ulogic;

-- P2 signals used as UART
uart_rx : in std_ulogic;
uart_tx : out std_ulogic;

-- LEDs
led0 : out std_logic;
led1 : out std_logic;
led2 : out std_logic;
led3 : out std_logic;

-- SPI
spi_flash_cs_n : out std_ulogic;
spi_flash_mosi : inout std_ulogic;
spi_flash_miso : inout std_ulogic;
spi_flash_wp_n : inout std_ulogic;
spi_flash_hold_n : inout std_ulogic;

-- DRAM wires
ddram_a : out std_logic_vector(15 downto 0);
ddram_ba : out std_logic_vector(2 downto 0);
ddram_ras_n : out std_logic;
ddram_cas_n : out std_logic;
ddram_we_n : out std_logic;
ddram_dm : out std_logic_vector(1 downto 0);
ddram_dq : inout std_logic_vector(15 downto 0);
ddram_dqs_p : inout std_logic_vector(1 downto 0);
ddram_dqs_n : inout std_logic_vector(1 downto 0);
ddram_clk_p : out std_logic;
ddram_clk_n : out std_logic;
ddram_cke : out std_logic;
ddram_odt : out std_logic;
ddram_reset_n : out std_logic
);
end entity toplevel;

architecture behaviour of toplevel is

-- Internal clock
signal ext_clk : std_ulogic;

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;

-- Internal clock signals:
signal system_clk : std_ulogic;
signal system_clk_locked : std_ulogic;

-- DRAM main data wishbone connection
signal wb_dram_in : wishbone_master_out;
signal wb_dram_out : wishbone_slave_out;

-- DRAM control wishbone connection
signal wb_ext_io_in : wb_io_master_out;
signal wb_ext_io_out : wb_io_slave_out;
signal wb_ext_is_dram_csr : std_ulogic;
signal wb_ext_is_dram_init : std_ulogic;

-- Control/status
signal core_alt_reset : std_ulogic;

-- SPI flash
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic;
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);

-- ddram clock signals as vectors
signal ddram_clk_p_vec : std_logic_vector(0 downto 0);
signal ddram_clk_n_vec : std_logic_vector(0 downto 0);

-- Fixup various memory sizes based on generics
function get_bram_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return 0;
else
return MEMORY_SIZE;
end if;
end function;

function get_payload_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return MEMORY_SIZE;
else
return 0;
end if;
end function;

constant BRAM_SIZE : natural := get_bram_size;
constant PAYLOAD_SIZE : natural := get_payload_size;
begin

-- Main SoC
soc0: entity work.soc
generic map(
MEMORY_SIZE => BRAM_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 1024 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
LOG_LENGTH => LOG_LENGTH,
UART0_IS_16550 => UART_IS_16550
)
port map (
-- System signals
system_clk => system_clk,
rst => soc_rst,

-- UART signals
uart0_txd => uart_tx,
uart0_rxd => uart_rx,

-- SPI signals
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i,

-- DRAM wishbone
wb_dram_in => wb_dram_in,
wb_dram_out => wb_dram_out,
wb_ext_io_in => wb_ext_io_in,
wb_ext_io_out => wb_ext_io_out,
wb_ext_is_dram_csr => wb_ext_is_dram_csr,
wb_ext_is_dram_init => wb_ext_is_dram_init,
alt_reset => core_alt_reset
);

-- SPI Flash. The SPI clk needs to be fed through the STARTUPE2
-- primitive of the FPGA as it's not a normal pin
--
spi_flash_cs_n <= spi_cs_n;
spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z';
spi_sdat_i(0) <= spi_flash_mosi;
spi_sdat_i(1) <= spi_flash_miso;
spi_sdat_i(2) <= spi_flash_wp_n;
spi_sdat_i(3) <= spi_flash_hold_n;

STARTUPE2_INST: STARTUPE2
port map (
CLK => '0',
GSR => '0',
GTS => '0',
KEYCLEARB => '0',
PACK => '0',
USRCCLKO => spi_sck,
USRCCLKTS => '0',
USRDONEO => '1',
USRDONETS => '0'
);

clk200: IBUFDS
port map (
i => clk200_p,
ib => clk200_n,
o => ext_clk
);

nodram: if not USE_LITEDRAM generate
signal ddram_clk_dummy : std_ulogic;
begin
reset_controller: entity work.soc_reset
generic map(
RESET_LOW => false
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => '0',
pll_rst_out => pll_rst,
rst_out => soc_rst
);

clkgen: entity work.clock_generator
generic map(
CLK_INPUT_HZ => 200000000,
CLK_OUTPUT_HZ => CLK_FREQUENCY
)
port map(
ext_clk => ext_clk,
pll_rst_in => pll_rst,
pll_clk_out => system_clk,
pll_locked_out => system_clk_locked
);

led0 <= soc_rst;
led1 <= pll_rst;
led2 <= not system_clk_locked;
led3 <= '0';
core_alt_reset <= '0';

-- Vivado barfs on those differential signals if left
-- unconnected. So instanciate a diff. buffer and feed
-- it a constant '0'.
dummy_dram_clk: OBUFDS
port map (
O => ddram_clk_p,
OB => ddram_clk_n,
I => ddram_clk_dummy
);
ddram_clk_dummy <= '0';

end generate;

has_dram: if USE_LITEDRAM generate
signal dram_init_done : std_ulogic;
signal dram_init_error : std_ulogic;
signal dram_sys_rst : std_ulogic;
begin

-- Eventually dig out the frequency from the generator
-- but for now, assert it's 100Mhz
assert CLK_FREQUENCY = 100000000;

ddram_clk_p_vec <= (others => ddram_clk_p);
ddram_clk_n_vec <= (others => ddram_clk_n);

reset_controller: entity work.soc_reset
generic map(
RESET_LOW => false,
PLL_RESET_BITS => 18,
SOC_RESET_BITS => 1
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => '1',
ext_rst_in => '0',
pll_rst_out => pll_rst,
rst_out => open
);

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 26,
DRAM_ALINES => 16,
DRAM_DLINES => 16,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 128,
PAYLOAD_FILE => RAM_INIT_FILE,
PAYLOAD_SIZE => PAYLOAD_SIZE
)
port map(
clk_in => ext_clk,
rst => pll_rst,
system_clk => system_clk,
system_reset => soc_rst,
core_alt_reset => core_alt_reset,
pll_locked => system_clk_locked,

wb_in => wb_dram_in,
wb_out => wb_dram_out,
wb_ctrl_in => wb_ext_io_in,
wb_ctrl_out => wb_ext_io_out,
wb_ctrl_is_csr => wb_ext_is_dram_csr,
wb_ctrl_is_init => wb_ext_is_dram_init,

init_done => dram_init_done,
init_error => dram_init_error,

ddram_a => ddram_a,
ddram_ba => ddram_ba,
ddram_ras_n => ddram_ras_n,
ddram_cas_n => ddram_cas_n,
ddram_we_n => ddram_we_n,
ddram_cs_n => open,
ddram_dm => ddram_dm,
ddram_dq => ddram_dq,
ddram_dqs_p => ddram_dqs_p,
ddram_dqs_n => ddram_dqs_n,
ddram_clk_p => ddram_clk_p_vec,
ddram_clk_n => ddram_clk_n_vec,
ddram_cke => ddram_cke,
ddram_odt => ddram_odt,
ddram_reset_n => ddram_reset_n
);

led0 <= soc_rst;
led1 <= pll_rst;
led2 <= not dram_init_done or dram_init_error;
led3 <= not dram_init_error; -- Make it blink ?
end generate;
end architecture behaviour;

@ -1,770 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library unisim;
use unisim.vcomponents.all;

library work;
use work.wishbone_types.all;

entity toplevel is
generic (
MEMORY_SIZE : integer := 16384;
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT : boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
SCLK_STARTUPE2 : boolean := false;
SPI_FLASH_OFFSET : integer := 4194304;
SPI_FLASH_DEF_CKDV : natural := 1;
SPI_FLASH_DEF_QUAD : boolean := true;
LOG_LENGTH : natural := 512;
USE_LITEETH : boolean := false;
UART_IS_16550 : boolean := false;
HAS_UART1 : boolean := true;
USE_LITESDCARD : boolean := false;
HAS_GPIO : boolean := true;
NGPIO : natural := 32
);
port(
ext_clk : in std_ulogic;
ext_rst_n : in std_ulogic;

-- UART0 signals:
uart_main_tx : out std_ulogic;
uart_main_rx : in std_ulogic;

-- LEDs
led0_b : out std_ulogic;
led0_g : out std_ulogic;
led0_r : out std_ulogic;
led4 : out std_ulogic;
led5 : out std_ulogic;
led6 : out std_ulogic;
led7 : out std_ulogic;

-- SPI
spi_flash_cs_n : out std_ulogic;
spi_flash_clk : out std_ulogic;
spi_flash_mosi : inout std_ulogic;
spi_flash_miso : inout std_ulogic;
spi_flash_wp_n : inout std_ulogic;
spi_flash_hold_n : inout std_ulogic;

-- GPIO
shield_io : inout std_ulogic_vector(44 downto 0);

-- Ethernet
eth_ref_clk : out std_ulogic;
eth_clocks_tx : in std_ulogic;
eth_clocks_rx : in std_ulogic;
eth_rst_n : out std_ulogic;
eth_mdio : inout std_ulogic;
eth_mdc : out std_ulogic;
eth_rx_dv : in std_ulogic;
eth_rx_er : in std_ulogic;
eth_rx_data : in std_ulogic_vector(3 downto 0);
eth_tx_en : out std_ulogic;
eth_tx_data : out std_ulogic_vector(3 downto 0);
eth_col : in std_ulogic;
eth_crs : in std_ulogic;

-- SD card
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;

-- DRAM wires
ddram_a : out std_ulogic_vector(13 downto 0);
ddram_ba : out std_ulogic_vector(2 downto 0);
ddram_ras_n : out std_ulogic;
ddram_cas_n : out std_ulogic;
ddram_we_n : out std_ulogic;
ddram_cs_n : out std_ulogic;
ddram_dm : out std_ulogic_vector(1 downto 0);
ddram_dq : inout std_ulogic_vector(15 downto 0);
ddram_dqs_p : inout std_ulogic_vector(1 downto 0);
ddram_dqs_n : inout std_ulogic_vector(1 downto 0);
ddram_clk_p : out std_ulogic;
ddram_clk_n : out std_ulogic;
ddram_cke : out std_ulogic;
ddram_odt : out std_ulogic;
ddram_reset_n : out std_ulogic
);
end entity toplevel;

architecture behaviour of toplevel is

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;

-- Internal clock signals:
signal system_clk : std_ulogic;
signal system_clk_locked : std_ulogic;
signal eth_clk_locked : std_ulogic;

-- External IOs from the SoC
signal wb_ext_io_in : wb_io_master_out;
signal wb_ext_io_out : wb_io_slave_out;
signal wb_ext_is_dram_csr : std_ulogic;
signal wb_ext_is_dram_init : std_ulogic;
signal wb_ext_is_eth : std_ulogic;
signal wb_ext_is_sdcard : std_ulogic;

-- DRAM main data wishbone connection
signal wb_dram_in : wishbone_master_out;
signal wb_dram_out : wishbone_slave_out;

-- DRAM control wishbone connection
signal wb_dram_ctrl_out : wb_io_slave_out := wb_io_slave_out_init;

-- LiteEth connection
signal ext_irq_eth : std_ulogic;
signal wb_eth_out : wb_io_slave_out := wb_io_slave_out_init;

-- LiteSDCard connection
signal ext_irq_sdcard : std_ulogic := '0';
signal wb_sdcard_out : wb_io_slave_out := wb_io_slave_out_init;
signal wb_sddma_out : wb_io_master_out := wb_io_master_out_init;
signal wb_sddma_in : wb_io_slave_out;
signal wb_sddma_nr : wb_io_master_out;
signal wb_sddma_ir : wb_io_slave_out;
-- for conversion from non-pipelined wishbone to pipelined
signal wb_sddma_stb_sent : std_ulogic;

-- Control/status
signal core_alt_reset : std_ulogic;

-- Status LED
signal led0_b_pwm : std_ulogic;
signal led0_r_pwm : std_ulogic;
signal led0_g_pwm : std_ulogic;

-- Dumb PWM for the LEDs, those RGB LEDs are too bright otherwise
signal pwm_counter : std_ulogic_vector(8 downto 0);

-- SPI flash
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic;
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);

-- GPIO
signal gpio_in : std_ulogic_vector(NGPIO - 1 downto 0);
signal gpio_out : std_ulogic_vector(NGPIO - 1 downto 0);
signal gpio_dir : std_ulogic_vector(NGPIO - 1 downto 0);

-- ddram clock signals as vectors
signal ddram_clk_p_vec : std_logic_vector(0 downto 0);
signal ddram_clk_n_vec : std_logic_vector(0 downto 0);

-- Fixup various memory sizes based on generics
function get_bram_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return 0;
else
return MEMORY_SIZE;
end if;
end function;

function get_payload_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return MEMORY_SIZE;
else
return 0;
end if;
end function;
constant BRAM_SIZE : natural := get_bram_size;
constant PAYLOAD_SIZE : natural := get_payload_size;
begin

-- Main SoC
soc0: entity work.soc
generic map(
MEMORY_SIZE => BRAM_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT => HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
LOG_LENGTH => LOG_LENGTH,
HAS_LITEETH => USE_LITEETH,
UART0_IS_16550 => UART_IS_16550,
HAS_UART1 => HAS_UART1,
HAS_SD_CARD => USE_LITESDCARD,
HAS_GPIO => HAS_GPIO,
NGPIO => NGPIO
)
port map (
-- System signals
system_clk => system_clk,
rst => soc_rst,

-- UART signals
uart0_txd => uart_main_tx,
uart0_rxd => uart_main_rx,

-- UART1 signals
--uart1_txd => uart_pmod_tx,
--uart1_rxd => uart_pmod_rx,

-- SPI signals
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i,

-- GPIO signals
gpio_in => gpio_in,
gpio_out => gpio_out,
gpio_dir => gpio_dir,

-- External interrupts
ext_irq_eth => ext_irq_eth,
ext_irq_sdcard => ext_irq_sdcard,

-- DRAM wishbone
wb_dram_in => wb_dram_in,
wb_dram_out => wb_dram_out,

-- IO wishbone
wb_ext_io_in => wb_ext_io_in,
wb_ext_io_out => wb_ext_io_out,
wb_ext_is_dram_csr => wb_ext_is_dram_csr,
wb_ext_is_dram_init => wb_ext_is_dram_init,
wb_ext_is_eth => wb_ext_is_eth,
wb_ext_is_sdcard => wb_ext_is_sdcard,

-- DMA wishbone
wishbone_dma_in => wb_sddma_in,
wishbone_dma_out => wb_sddma_out,

alt_reset => core_alt_reset
);

--uart_pmod_rts_n <= '0';

-- SPI Flash
--
-- Note: Unlike many other boards, the SPI flash on the Arty has
-- an actual pin to generate the clock and doesn't require to use
-- the STARTUPE2 primitive.
--
spi_flash_cs_n <= spi_cs_n;
spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z';
spi_sdat_i(0) <= spi_flash_mosi;
spi_sdat_i(1) <= spi_flash_miso;
spi_sdat_i(2) <= spi_flash_wp_n;
spi_sdat_i(3) <= spi_flash_hold_n;

spi_sclk_startupe2: if SCLK_STARTUPE2 generate
spi_flash_clk <= 'Z';

STARTUPE2_INST: STARTUPE2
port map (
CLK => '0',
GSR => '0',
GTS => '0',
KEYCLEARB => '0',
PACK => '0',
USRCCLKO => spi_sck,
USRCCLKTS => '0',
USRDONEO => '1',
USRDONETS => '0'
);
end generate;

spi_direct_sclk: if not SCLK_STARTUPE2 generate
spi_flash_clk <= spi_sck;
end generate;

nodram: if not USE_LITEDRAM generate
signal ddram_clk_dummy : std_ulogic;
begin
reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked and eth_clk_locked,
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => soc_rst
);

clkgen: entity work.clock_generator
generic map(
CLK_INPUT_HZ => 100000000,
CLK_OUTPUT_HZ => CLK_FREQUENCY
)
port map(
ext_clk => ext_clk,
pll_rst_in => pll_rst,
pll_clk_out => system_clk,
pll_locked_out => system_clk_locked
);

led0_b_pwm <= '1';
led0_r_pwm <= '1';
led0_g_pwm <= '0';
core_alt_reset <= '0';

-- Vivado barfs on those differential signals if left
-- unconnected. So instanciate a diff. buffer and feed
-- it a constant '0'.
dummy_dram_clk: OBUFDS
port map (
O => ddram_clk_p,
OB => ddram_clk_n,
I => ddram_clk_dummy
);
ddram_clk_dummy <= '0';

end generate;

has_dram: if USE_LITEDRAM generate
signal dram_init_done : std_ulogic;
signal dram_init_error : std_ulogic;
signal dram_sys_rst : std_ulogic;
signal rst_gen_rst : std_ulogic;
begin

-- Eventually dig out the frequency from the generator
-- but for now, assert it's 100Mhz
assert CLK_FREQUENCY = 100000000;

reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW,
PLL_RESET_BITS => 18,
SOC_RESET_BITS => 1
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => eth_clk_locked,
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => rst_gen_rst
);

-- Generate SoC reset
soc_rst_gen: process(system_clk)
begin
if ext_rst_n = '0' then
soc_rst <= '1';
elsif rising_edge(system_clk) then
soc_rst <= dram_sys_rst or not eth_clk_locked or not system_clk_locked;
end if;
end process;

ddram_clk_p_vec <= (others => ddram_clk_p);
ddram_clk_n_vec <= (others => ddram_clk_n);

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 24,
DRAM_ALINES => 14,
DRAM_DLINES => 16,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 128,
PAYLOAD_FILE => RAM_INIT_FILE,
PAYLOAD_SIZE => PAYLOAD_SIZE
)
port map(
clk_in => ext_clk,
rst => pll_rst,
system_clk => system_clk,
system_reset => dram_sys_rst,
core_alt_reset => core_alt_reset,
pll_locked => system_clk_locked,

wb_in => wb_dram_in,
wb_out => wb_dram_out,
wb_ctrl_in => wb_ext_io_in,
wb_ctrl_out => wb_dram_ctrl_out,
wb_ctrl_is_csr => wb_ext_is_dram_csr,
wb_ctrl_is_init => wb_ext_is_dram_init,

init_done => dram_init_done,
init_error => dram_init_error,

ddram_a => ddram_a,
ddram_ba => ddram_ba,
ddram_ras_n => ddram_ras_n,
ddram_cas_n => ddram_cas_n,
ddram_we_n => ddram_we_n,
ddram_cs_n => ddram_cs_n,
ddram_dm => ddram_dm,
ddram_dq => ddram_dq,
ddram_dqs_p => ddram_dqs_p,
ddram_dqs_n => ddram_dqs_n,
ddram_clk_p => ddram_clk_p_vec,
ddram_clk_n => ddram_clk_n_vec,
ddram_cke => ddram_cke,
ddram_odt => ddram_odt,
ddram_reset_n => ddram_reset_n
);

led0_b_pwm <= not dram_init_done;
led0_r_pwm <= dram_init_error;
led0_g_pwm <= dram_init_done and not dram_init_error;

end generate;

has_liteeth : if USE_LITEETH generate

component liteeth_core port (
sys_clock : in std_ulogic;
sys_reset : in std_ulogic;
mii_eth_clocks_tx : in std_ulogic;
mii_eth_clocks_rx : in std_ulogic;
mii_eth_rst_n : out std_ulogic;
mii_eth_mdio : in std_ulogic;
mii_eth_mdc : out std_ulogic;
mii_eth_rx_dv : in std_ulogic;
mii_eth_rx_er : in std_ulogic;
mii_eth_rx_data : in std_ulogic_vector(3 downto 0);
mii_eth_tx_en : out std_ulogic;
mii_eth_tx_data : out std_ulogic_vector(3 downto 0);
mii_eth_col : in std_ulogic;
mii_eth_crs : in std_ulogic;
wishbone_adr : in std_ulogic_vector(29 downto 0);
wishbone_dat_w : in std_ulogic_vector(31 downto 0);
wishbone_dat_r : out std_ulogic_vector(31 downto 0);
wishbone_sel : in std_ulogic_vector(3 downto 0);
wishbone_cyc : in std_ulogic;
wishbone_stb : in std_ulogic;
wishbone_ack : out std_ulogic;
wishbone_we : in std_ulogic;
wishbone_cti : in std_ulogic_vector(2 downto 0);
wishbone_bte : in std_ulogic_vector(1 downto 0);
wishbone_err : out std_ulogic;
interrupt : out std_ulogic
);
end component;

signal wb_eth_cyc : std_ulogic;
signal wb_eth_adr : std_ulogic_vector(29 downto 0);

-- Change this to use a PLL instead of a BUFR to generate the 25Mhz
-- reference clock to the PHY.
constant USE_PLL : boolean := false;
begin
eth_use_pll: if USE_PLL generate
signal eth_clk_25 : std_ulogic;
signal eth_clkfb : std_ulogic;
begin
pll_eth : PLLE2_BASE
generic map (
BANDWIDTH => "OPTIMIZED",
CLKFBOUT_MULT => 16,
CLKIN1_PERIOD => 10.0,
CLKOUT0_DIVIDE => 64,
DIVCLK_DIVIDE => 1,
STARTUP_WAIT => "FALSE")
port map (
CLKOUT0 => eth_clk_25,
CLKOUT1 => open,
CLKOUT2 => open,
CLKOUT3 => open,
CLKOUT4 => open,
CLKOUT5 => open,
CLKFBOUT => eth_clkfb,
LOCKED => eth_clk_locked,
CLKIN1 => ext_clk,
PWRDWN => '0',
RST => pll_rst,
CLKFBIN => eth_clkfb);

eth_clk_buf: BUFG
port map (
I => eth_clk_25,
O => eth_ref_clk
);
end generate;

eth_use_bufr: if not USE_PLL generate
eth_clk_div: BUFR
generic map (
BUFR_DIVIDE => "4"
)
port map (
I => system_clk,
O => eth_ref_clk,
CE => '1',
CLR => '0'
);
eth_clk_locked <= '1';
end generate;

liteeth : liteeth_core
port map(
sys_clock => system_clk,
sys_reset => soc_rst,
mii_eth_clocks_tx => eth_clocks_tx,
mii_eth_clocks_rx => eth_clocks_rx,
mii_eth_rst_n => eth_rst_n,
mii_eth_mdio => eth_mdio,
mii_eth_mdc => eth_mdc,
mii_eth_rx_dv => eth_rx_dv,
mii_eth_rx_er => eth_rx_er,
mii_eth_rx_data => eth_rx_data,
mii_eth_tx_en => eth_tx_en,
mii_eth_tx_data => eth_tx_data,
mii_eth_col => eth_col,
mii_eth_crs => eth_crs,
wishbone_adr => wb_eth_adr,
wishbone_dat_w => wb_ext_io_in.dat,
wishbone_dat_r => wb_eth_out.dat,
wishbone_sel => wb_ext_io_in.sel,
wishbone_cyc => wb_eth_cyc,
wishbone_stb => wb_ext_io_in.stb,
wishbone_ack => wb_eth_out.ack,
wishbone_we => wb_ext_io_in.we,
wishbone_cti => "000",
wishbone_bte => "00",
wishbone_err => open,
interrupt => ext_irq_eth
);

-- Gate cyc with "chip select" from soc
wb_eth_cyc <= wb_ext_io_in.cyc and wb_ext_is_eth;

-- Remove top address bits as liteeth decoder doesn't know about them
wb_eth_adr <= x"000" & "000" & wb_ext_io_in.adr(14 downto 0);

-- LiteETH isn't pipelined
wb_eth_out.stall <= not wb_eth_out.ack;

end generate;

no_liteeth : if not USE_LITEETH generate
eth_clk_locked <= '1';
ext_irq_eth <= '0';
end generate;

-- SD card pmod
has_sdcard : if USE_LITESDCARD generate
component litesdcard_core port (
clk : in std_ulogic;
rst : in std_ulogic;
-- wishbone for accessing control registers
wb_ctrl_adr : in std_ulogic_vector(29 downto 0);
wb_ctrl_dat_w : in std_ulogic_vector(31 downto 0);
wb_ctrl_dat_r : out std_ulogic_vector(31 downto 0);
wb_ctrl_sel : in std_ulogic_vector(3 downto 0);
wb_ctrl_cyc : in std_ulogic;
wb_ctrl_stb : in std_ulogic;
wb_ctrl_ack : out std_ulogic;
wb_ctrl_we : in std_ulogic;
wb_ctrl_cti : in std_ulogic_vector(2 downto 0);
wb_ctrl_bte : in std_ulogic_vector(1 downto 0);
wb_ctrl_err : out std_ulogic;
-- wishbone for SD card core to use for DMA
wb_dma_adr : out std_ulogic_vector(29 downto 0);
wb_dma_dat_w : out std_ulogic_vector(31 downto 0);
wb_dma_dat_r : in std_ulogic_vector(31 downto 0);
wb_dma_sel : out std_ulogic_vector(3 downto 0);
wb_dma_cyc : out std_ulogic;
wb_dma_stb : out std_ulogic;
wb_dma_ack : in std_ulogic;
wb_dma_we : out std_ulogic;
wb_dma_cti : out std_ulogic_vector(2 downto 0);
wb_dma_bte : out std_ulogic_vector(1 downto 0);
wb_dma_err : in std_ulogic;
-- connections to SD card
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;
irq : out std_ulogic
);
end component;

signal wb_sdcard_cyc : std_ulogic;
signal wb_sdcard_adr : std_ulogic_vector(29 downto 0);

begin
litesdcard : litesdcard_core
port map (
clk => system_clk,
rst => soc_rst,
wb_ctrl_adr => wb_sdcard_adr,
wb_ctrl_dat_w => wb_ext_io_in.dat,
wb_ctrl_dat_r => wb_sdcard_out.dat,
wb_ctrl_sel => wb_ext_io_in.sel,
wb_ctrl_cyc => wb_sdcard_cyc,
wb_ctrl_stb => wb_ext_io_in.stb,
wb_ctrl_ack => wb_sdcard_out.ack,
wb_ctrl_we => wb_ext_io_in.we,
wb_ctrl_cti => "000",
wb_ctrl_bte => "00",
wb_ctrl_err => open,
wb_dma_adr => wb_sddma_nr.adr,
wb_dma_dat_w => wb_sddma_nr.dat,
wb_dma_dat_r => wb_sddma_ir.dat,
wb_dma_sel => wb_sddma_nr.sel,
wb_dma_cyc => wb_sddma_nr.cyc,
wb_dma_stb => wb_sddma_nr.stb,
wb_dma_ack => wb_sddma_ir.ack,
wb_dma_we => wb_sddma_nr.we,
wb_dma_cti => open,
wb_dma_bte => open,
wb_dma_err => '0',
sdcard_data => sdcard_data,
sdcard_cmd => sdcard_cmd,
sdcard_clk => sdcard_clk,
sdcard_cd => sdcard_cd,
irq => ext_irq_sdcard
);

-- Gate cyc with chip select from SoC
wb_sdcard_cyc <= wb_ext_io_in.cyc and wb_ext_is_sdcard;

wb_sdcard_adr <= x"0000" & wb_ext_io_in.adr(13 downto 0);

wb_sdcard_out.stall <= not wb_sdcard_out.ack;

-- Convert non-pipelined DMA wishbone to pipelined by suppressing
-- non-acknowledged strobes
process(system_clk)
begin
if rising_edge(system_clk) then
wb_sddma_out <= wb_sddma_nr;
if wb_sddma_stb_sent = '1' or
(wb_sddma_out.stb = '1' and wb_sddma_in.stall = '0') then
wb_sddma_out.stb <= '0';
end if;
if wb_sddma_nr.cyc = '0' or wb_sddma_ir.ack = '1' then
wb_sddma_stb_sent <= '0';
elsif wb_sddma_in.stall = '0' then
wb_sddma_stb_sent <= wb_sddma_nr.stb;
end if;
wb_sddma_ir <= wb_sddma_in;
end if;
end process;

end generate;

-- Mux WB response on the IO bus
wb_ext_io_out <= wb_eth_out when wb_ext_is_eth = '1' else
wb_sdcard_out when wb_ext_is_sdcard = '1' else
wb_dram_ctrl_out;

leds_pwm : process(system_clk)
begin
if rising_edge(system_clk) then
pwm_counter <= std_ulogic_vector(signed(pwm_counter) + 1);
if pwm_counter(8 downto 4) = "00000" then
led0_b <= led0_b_pwm;
led0_r <= led0_r_pwm;
led0_g <= led0_g_pwm;
else
led0_b <= '0';
led0_r <= '0';
led0_g <= '0';
end if;
end if;
end process;

led4 <= system_clk_locked;
led5 <= eth_clk_locked;
led6 <= not soc_rst;

-- GPIO
gpio_in(0) <= shield_io(0);
gpio_in(1) <= shield_io(1);
gpio_in(2) <= shield_io(2);
gpio_in(3) <= shield_io(3);
gpio_in(4) <= shield_io(4);
gpio_in(5) <= shield_io(5);
gpio_in(6) <= shield_io(6);
gpio_in(7) <= shield_io(7);
gpio_in(8) <= shield_io(8);
gpio_in(9) <= shield_io(9);
gpio_in(10) <= shield_io(10);
gpio_in(11) <= shield_io(11);
gpio_in(12) <= shield_io(12);
gpio_in(13) <= shield_io(13);
gpio_in(14) <= shield_io(26);
gpio_in(15) <= shield_io(27);
gpio_in(16) <= shield_io(28);
gpio_in(17) <= shield_io(29);
gpio_in(18) <= shield_io(30);
gpio_in(19) <= shield_io(31);
gpio_in(20) <= shield_io(32);
gpio_in(21) <= shield_io(33);
gpio_in(22) <= shield_io(34);
gpio_in(23) <= shield_io(35);
gpio_in(24) <= shield_io(36);
gpio_in(25) <= shield_io(37);
gpio_in(26) <= shield_io(38);
gpio_in(27) <= shield_io(39);
gpio_in(28) <= shield_io(40);
gpio_in(29) <= shield_io(41);
gpio_in(30) <= shield_io(43);
gpio_in(31) <= shield_io(44);

shield_io(0) <= gpio_out(0) when gpio_dir(0) = '1' else 'Z';
shield_io(1) <= gpio_out(1) when gpio_dir(1) = '1' else 'Z';
shield_io(2) <= gpio_out(2) when gpio_dir(2) = '1' else 'Z';
shield_io(3) <= gpio_out(3) when gpio_dir(3) = '1' else 'Z';
shield_io(4) <= gpio_out(4) when gpio_dir(4) = '1' else 'Z';
shield_io(5) <= gpio_out(5) when gpio_dir(5) = '1' else 'Z';
shield_io(6) <= gpio_out(6) when gpio_dir(6) = '1' else 'Z';
shield_io(7) <= gpio_out(7) when gpio_dir(7) = '1' else 'Z';
shield_io(8) <= gpio_out(8) when gpio_dir(8) = '1' else 'Z';
shield_io(9) <= gpio_out(9) when gpio_dir(9) = '1' else 'Z';
shield_io(10) <= gpio_out(10) when gpio_dir(10) = '1' else 'Z';
shield_io(11) <= gpio_out(11) when gpio_dir(11) = '1' else 'Z';
shield_io(12) <= gpio_out(12) when gpio_dir(12) = '1' else 'Z';
shield_io(13) <= gpio_out(13) when gpio_dir(13) = '1' else 'Z';
shield_io(26) <= gpio_out(14) when gpio_dir(14) = '1' else 'Z';
shield_io(27) <= gpio_out(15) when gpio_dir(15) = '1' else 'Z';
shield_io(28) <= gpio_out(16) when gpio_dir(16) = '1' else 'Z';
shield_io(29) <= gpio_out(17) when gpio_dir(17) = '1' else 'Z';
shield_io(30) <= gpio_out(18) when gpio_dir(18) = '1' else 'Z';
shield_io(31) <= gpio_out(19) when gpio_dir(19) = '1' else 'Z';
shield_io(32) <= gpio_out(20) when gpio_dir(20) = '1' else 'Z';
shield_io(33) <= gpio_out(21) when gpio_dir(21) = '1' else 'Z';
shield_io(34) <= gpio_out(22) when gpio_dir(22) = '1' else 'Z';
shield_io(35) <= gpio_out(23) when gpio_dir(23) = '1' else 'Z';
shield_io(36) <= gpio_out(24) when gpio_dir(24) = '1' else 'Z';
shield_io(37) <= gpio_out(25) when gpio_dir(25) = '1' else 'Z';
shield_io(38) <= gpio_out(26) when gpio_dir(26) = '1' else 'Z';
shield_io(39) <= gpio_out(27) when gpio_dir(27) = '1' else 'Z';
shield_io(40) <= gpio_out(28) when gpio_dir(28) = '1' else 'Z';
shield_io(41) <= gpio_out(29) when gpio_dir(29) = '1' else 'Z';
shield_io(43) <= gpio_out(30) when gpio_dir(30) = '1' else 'Z';
shield_io(44) <= gpio_out(31) when gpio_dir(31) = '1' else 'Z';

end architecture behaviour;

@ -1,330 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library unisim;
use unisim.vcomponents.all;

library work;
use work.wishbone_types.all;

entity toplevel is
generic (
MEMORY_SIZE : integer := 16384;
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
SPI_FLASH_OFFSET : integer := 10485760;
SPI_FLASH_DEF_CKDV : natural := 1;
SPI_FLASH_DEF_QUAD : boolean := true;
LOG_LENGTH : natural := 2048;
UART_IS_16550 : boolean := true
);
port(
clk200_p : in std_ulogic;
clk200_n : in std_ulogic;
ext_rst : in std_ulogic;

-- UART0 signals:
uart_main_tx : out std_ulogic;
uart_main_rx : in std_ulogic;

-- LEDs
led0 : out std_logic;
led1 : out std_logic;
led2 : out std_logic;
led3 : out std_logic;

-- SPI
spi_flash_cs_n : out std_ulogic;
spi_flash_mosi : inout std_ulogic;
spi_flash_miso : inout std_ulogic;
spi_flash_wp_n : inout std_ulogic;
spi_flash_hold_n : inout std_ulogic;

-- DRAM wires
ddram_a : out std_logic_vector(14 downto 0);
ddram_ba : out std_logic_vector(2 downto 0);
ddram_ras_n : out std_logic;
ddram_cas_n : out std_logic;
ddram_we_n : out std_logic;
ddram_cs_n : out std_ulogic;
ddram_dm : out std_logic_vector(3 downto 0);
ddram_dq : inout std_logic_vector(31 downto 0);
ddram_dqs_p : inout std_logic_vector(3 downto 0);
ddram_dqs_n : inout std_logic_vector(3 downto 0);
ddram_clk_p : out std_logic;
ddram_clk_n : out std_logic;
ddram_cke : out std_logic;
ddram_odt : out std_logic;
ddram_reset_n : out std_logic
);
end entity toplevel;

architecture behaviour of toplevel is

-- Internal clock
signal ext_clk : std_ulogic;

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;

-- Internal clock signals:
signal system_clk : std_ulogic;
signal system_clk_locked : std_ulogic;

-- DRAM main data wishbone connection
signal wb_dram_in : wishbone_master_out;
signal wb_dram_out : wishbone_slave_out;

-- DRAM control wishbone connection
signal wb_ext_io_in : wb_io_master_out;
signal wb_ext_io_out : wb_io_slave_out;
signal wb_ext_is_dram_csr : std_ulogic;
signal wb_ext_is_dram_init : std_ulogic;

-- Control/status
signal core_alt_reset : std_ulogic;

-- SPI flash
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic;
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);

-- ddram clock signals as vectors
signal ddram_clk_p_vec : std_logic_vector(0 downto 0);
signal ddram_clk_n_vec : std_logic_vector(0 downto 0);

-- Fixup various memory sizes based on generics
function get_bram_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return 0;
else
return MEMORY_SIZE;
end if;
end function;

function get_payload_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return MEMORY_SIZE;
else
return 0;
end if;
end function;

constant BRAM_SIZE : natural := get_bram_size;
constant PAYLOAD_SIZE : natural := get_payload_size;
begin

-- Main SoC
soc0: entity work.soc
generic map(
MEMORY_SIZE => BRAM_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 1024 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
LOG_LENGTH => LOG_LENGTH,
UART0_IS_16550 => UART_IS_16550
)
port map (
-- System signals
system_clk => system_clk,
rst => soc_rst,

-- UART signals
uart0_txd => uart_main_tx,
uart0_rxd => uart_main_rx,

-- SPI signals
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i,

-- DRAM wishbone
wb_dram_in => wb_dram_in,
wb_dram_out => wb_dram_out,
wb_ext_io_in => wb_ext_io_in,
wb_ext_io_out => wb_ext_io_out,
wb_ext_is_dram_csr => wb_ext_is_dram_csr,
wb_ext_is_dram_init => wb_ext_is_dram_init,
alt_reset => core_alt_reset
);

-- SPI Flash. The SPI clk needs to be fed through the STARTUPE2
-- primitive of the FPGA as it's not a normal pin
--
spi_flash_cs_n <= spi_cs_n;
spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z';
spi_sdat_i(0) <= spi_flash_mosi;
spi_sdat_i(1) <= spi_flash_miso;
spi_sdat_i(2) <= spi_flash_wp_n;
spi_sdat_i(3) <= spi_flash_hold_n;

STARTUPE2_INST: STARTUPE2
port map (
CLK => '0',
GSR => '0',
GTS => '0',
KEYCLEARB => '0',
PACK => '0',
USRCCLKO => spi_sck,
USRCCLKTS => '0',
USRDONEO => '1',
USRDONETS => '0'
);

clk200: IBUFDS
port map (
i => clk200_p,
ib => clk200_n,
o => ext_clk
);

nodram: if not USE_LITEDRAM generate
signal ddram_clk_dummy : std_ulogic;
begin
reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => ext_rst,
pll_rst_out => pll_rst,
rst_out => soc_rst
);

clkgen: entity work.clock_generator
generic map(
CLK_INPUT_HZ => 200000000,
CLK_OUTPUT_HZ => CLK_FREQUENCY
)
port map(
ext_clk => ext_clk,
pll_rst_in => pll_rst,
pll_clk_out => system_clk,
pll_locked_out => system_clk_locked
);

led0 <= soc_rst;
led1 <= pll_rst;
led2 <= not system_clk_locked;
led3 <= '0';
core_alt_reset <= '0';

-- Vivado barfs on those differential signals if left
-- unconnected. So instanciate a diff. buffer and feed
-- it a constant '0'.
dummy_dram_clk: OBUFDS
port map (
O => ddram_clk_p,
OB => ddram_clk_n,
I => ddram_clk_dummy
);
ddram_clk_dummy <= '0';

end generate;

has_dram: if USE_LITEDRAM generate
signal dram_init_done : std_ulogic;
signal dram_init_error : std_ulogic;
signal dram_sys_rst : std_ulogic;
begin

-- Eventually dig out the frequency from the generator
-- but for now, assert it's 100Mhz
assert CLK_FREQUENCY = 100000000;

reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW,
PLL_RESET_BITS => 18,
SOC_RESET_BITS => 1
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => '1',
ext_rst_in => ext_rst,
pll_rst_out => pll_rst,
rst_out => open
);

ddram_clk_p_vec <= (others => ddram_clk_p);
ddram_clk_n_vec <= (others => ddram_clk_n);

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 25,
DRAM_ALINES => 15,
DRAM_DLINES => 32,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 256,
PAYLOAD_FILE => RAM_INIT_FILE,
PAYLOAD_SIZE => PAYLOAD_SIZE
)
port map(
clk_in => ext_clk,
rst => pll_rst,
system_clk => system_clk,
system_reset => soc_rst,
core_alt_reset => core_alt_reset,
pll_locked => system_clk_locked,

wb_in => wb_dram_in,
wb_out => wb_dram_out,
wb_ctrl_in => wb_ext_io_in,
wb_ctrl_out => wb_ext_io_out,
wb_ctrl_is_csr => wb_ext_is_dram_csr,
wb_ctrl_is_init => wb_ext_is_dram_init,

init_done => dram_init_done,
init_error => dram_init_error,

ddram_a => ddram_a,
ddram_ba => ddram_ba,
ddram_ras_n => ddram_ras_n,
ddram_cas_n => ddram_cas_n,
ddram_we_n => ddram_we_n,
ddram_cs_n => ddram_cs_n,
ddram_dm => ddram_dm,
ddram_dq => ddram_dq,
ddram_dqs_p => ddram_dqs_p,
ddram_dqs_n => ddram_dqs_n,
ddram_clk_p => ddram_clk_p_vec,
ddram_clk_n => ddram_clk_n_vec,
ddram_cke => ddram_cke,
ddram_odt => ddram_odt,
ddram_reset_n => ddram_reset_n
);

led0 <= soc_rst;
led1 <= pll_rst;
led2 <= not dram_init_done or dram_init_error;
led3 <= not dram_init_error; -- Make it blink ?
end generate;
end architecture behaviour;

@ -1,587 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library unisim;
use unisim.vcomponents.all;

library work;
use work.wishbone_types.all;

entity toplevel is
generic (
MEMORY_SIZE : integer := 16384;
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT: boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
SPI_FLASH_OFFSET : integer := 10485760;
SPI_FLASH_DEF_CKDV : natural := 1;
SPI_FLASH_DEF_QUAD : boolean := true;
LOG_LENGTH : natural := 2048;
UART_IS_16550 : boolean := true;
USE_LITEETH : boolean := false;
USE_LITESDCARD : boolean := false
);
port(
ext_clk : in std_ulogic;
ext_rst_n : in std_ulogic;

-- UART0 signals:
uart_main_tx : out std_ulogic;
uart_main_rx : in std_ulogic;

-- LEDs
led0 : out std_ulogic;
led1 : out std_ulogic;
led2 : out std_ulogic;
led3 : out std_ulogic;
led4 : out std_ulogic;
led5 : out std_ulogic;
led6 : out std_ulogic;
led7 : out std_ulogic;

-- SPI
spi_flash_cs_n : out std_ulogic;
spi_flash_mosi : inout std_ulogic;
spi_flash_miso : inout std_ulogic;
spi_flash_wp_n : inout std_ulogic;
spi_flash_hold_n : inout std_ulogic;

-- Ethernet
eth_clocks_tx : out std_ulogic;
eth_clocks_rx : in std_ulogic;
eth_rst_n : out std_ulogic;
eth_int_n : in std_ulogic;
eth_mdio : inout std_ulogic;
eth_mdc : out std_ulogic;
eth_rx_ctl : in std_ulogic;
eth_rx_data : in std_ulogic_vector(3 downto 0);
eth_tx_ctl : out std_ulogic;
eth_tx_data : out std_ulogic_vector(3 downto 0);

-- SD card
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;
sdcard_reset : out std_ulogic;

-- DRAM wires
ddram_a : out std_logic_vector(14 downto 0);
ddram_ba : out std_logic_vector(2 downto 0);
ddram_ras_n : out std_logic;
ddram_cas_n : out std_logic;
ddram_we_n : out std_logic;
ddram_dm : out std_logic_vector(1 downto 0);
ddram_dq : inout std_logic_vector(15 downto 0);
ddram_dqs_p : inout std_logic_vector(1 downto 0);
ddram_dqs_n : inout std_logic_vector(1 downto 0);
ddram_clk_p : out std_logic;
ddram_clk_n : out std_logic;
ddram_cke : out std_logic;
ddram_odt : out std_logic;
ddram_reset_n : out std_logic
);
end entity toplevel;

architecture behaviour of toplevel is

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;

-- Internal clock signals:
signal system_clk : std_ulogic;
signal system_clk_locked : std_ulogic;

-- External IOs from the SoC
signal wb_ext_io_in : wb_io_master_out;
signal wb_ext_io_out : wb_io_slave_out;
signal wb_ext_is_dram_csr : std_ulogic;
signal wb_ext_is_dram_init : std_ulogic;
signal wb_ext_is_eth : std_ulogic;
signal wb_ext_is_sdcard : std_ulogic;

-- DRAM main data wishbone connection
signal wb_dram_in : wishbone_master_out;
signal wb_dram_out : wishbone_slave_out;

-- DRAM control wishbone connection
signal wb_dram_ctrl_out : wb_io_slave_out := wb_io_slave_out_init;

-- LiteEth connection
signal ext_irq_eth : std_ulogic;
signal wb_eth_out : wb_io_slave_out := wb_io_slave_out_init;

-- LiteSDCard connection
signal ext_irq_sdcard : std_ulogic := '0';
signal wb_sdcard_out : wb_io_slave_out := wb_io_slave_out_init;
signal wb_sddma_out : wb_io_master_out := wb_io_master_out_init;
signal wb_sddma_in : wb_io_slave_out;
signal wb_sddma_nr : wb_io_master_out;
signal wb_sddma_ir : wb_io_slave_out;
-- for conversion from non-pipelined wishbone to pipelined
signal wb_sddma_stb_sent : std_ulogic;

-- Control/status
signal core_alt_reset : std_ulogic;

-- SPI flash
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic;
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);

-- ddram clock signals as vectors
signal ddram_clk_p_vec : std_logic_vector(0 downto 0);
signal ddram_clk_n_vec : std_logic_vector(0 downto 0);

-- Fixup various memory sizes based on generics
function get_bram_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return 0;
else
return MEMORY_SIZE;
end if;
end function;

function get_payload_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return MEMORY_SIZE;
else
return 0;
end if;
end function;
constant BRAM_SIZE : natural := get_bram_size;
constant PAYLOAD_SIZE : natural := get_payload_size;
begin

-- Main SoC
soc0: entity work.soc
generic map(
MEMORY_SIZE => BRAM_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT=> HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
LOG_LENGTH => LOG_LENGTH,
UART0_IS_16550 => UART_IS_16550,
HAS_LITEETH => USE_LITEETH,
HAS_SD_CARD => USE_LITESDCARD
)
port map (
-- System signals
system_clk => system_clk,
rst => soc_rst,

-- UART signals
uart0_txd => uart_main_tx,
uart0_rxd => uart_main_rx,

-- SPI signals
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i,

-- External interrupts
ext_irq_eth => ext_irq_eth,
ext_irq_sdcard => ext_irq_sdcard,

-- IO wishbone
wb_dram_in => wb_dram_in,
wb_dram_out => wb_dram_out,
wb_ext_io_in => wb_ext_io_in,
wb_ext_io_out => wb_ext_io_out,
wb_ext_is_dram_csr => wb_ext_is_dram_csr,
wb_ext_is_dram_init => wb_ext_is_dram_init,
wb_ext_is_eth => wb_ext_is_eth,
wb_ext_is_sdcard => wb_ext_is_sdcard,

-- DMA wishbone
wishbone_dma_in => wb_sddma_in,
wishbone_dma_out => wb_sddma_out,

alt_reset => core_alt_reset
);

-- SPI Flash. The SPI clk needs to be fed through the STARTUPE2
-- primitive of the FPGA as it's not a normal pin
--
spi_flash_cs_n <= spi_cs_n;
spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z';
spi_sdat_i(0) <= spi_flash_mosi;
spi_sdat_i(1) <= spi_flash_miso;
spi_sdat_i(2) <= spi_flash_wp_n;
spi_sdat_i(3) <= spi_flash_hold_n;

STARTUPE2_INST: STARTUPE2
port map (
CLK => '0',
GSR => '0',
GTS => '0',
KEYCLEARB => '0',
PACK => '0',
USRCCLKO => spi_sck,
USRCCLKTS => '0',
USRDONEO => '1',
USRDONETS => '0'
);

nodram: if not USE_LITEDRAM generate
signal ddram_clk_dummy : std_ulogic;
begin
reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => soc_rst
);

clkgen: entity work.clock_generator
generic map(
CLK_INPUT_HZ => 100000000,
CLK_OUTPUT_HZ => CLK_FREQUENCY
)
port map(
ext_clk => ext_clk,
pll_rst_in => pll_rst,
pll_clk_out => system_clk,
pll_locked_out => system_clk_locked
);

led0 <= '1';
led1 <= not soc_rst;
led2 <= '0';
core_alt_reset <= '0';

-- Vivado barfs on those differential signals if left
-- unconnected. So instanciate a diff. buffer and feed
-- it a constant '0'.
dummy_dram_clk: OBUFDS
port map (
O => ddram_clk_p,
OB => ddram_clk_n,
I => ddram_clk_dummy
);
ddram_clk_dummy <= '0';

end generate;

has_dram: if USE_LITEDRAM generate
signal dram_init_done : std_ulogic;
signal dram_init_error : std_ulogic;
signal dram_sys_rst : std_ulogic;
begin

-- Eventually dig out the frequency from the generator
-- but for now, assert it's 100Mhz
assert CLK_FREQUENCY = 100000000;

reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW,
PLL_RESET_BITS => 18,
SOC_RESET_BITS => 1
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => '1',
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => open
);

-- Generate SoC reset
soc_rst_gen: process(system_clk)
begin
if ext_rst_n = '0' then
soc_rst <= '1';
elsif rising_edge(system_clk) then
soc_rst <= dram_sys_rst or not system_clk_locked;
end if;
end process;

ddram_clk_p_vec <= (others => ddram_clk_p);
ddram_clk_n_vec <= (others => ddram_clk_n);

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 25,
DRAM_ALINES => 15,
DRAM_DLINES => 16,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 128,
PAYLOAD_FILE => RAM_INIT_FILE,
PAYLOAD_SIZE => PAYLOAD_SIZE
)
port map(
clk_in => ext_clk,
rst => pll_rst,
system_clk => system_clk,
system_reset => dram_sys_rst,
core_alt_reset => core_alt_reset,
pll_locked => system_clk_locked,

wb_in => wb_dram_in,
wb_out => wb_dram_out,
wb_ctrl_in => wb_ext_io_in,
wb_ctrl_out => wb_dram_ctrl_out,
wb_ctrl_is_csr => wb_ext_is_dram_csr,
wb_ctrl_is_init => wb_ext_is_dram_init,

init_done => dram_init_done,
init_error => dram_init_error,

ddram_a => ddram_a,
ddram_ba => ddram_ba,
ddram_ras_n => ddram_ras_n,
ddram_cas_n => ddram_cas_n,
ddram_we_n => ddram_we_n,
ddram_cs_n => open,
ddram_dm => ddram_dm,
ddram_dq => ddram_dq,
ddram_dqs_p => ddram_dqs_p,
ddram_dqs_n => ddram_dqs_n,
ddram_clk_p => ddram_clk_p_vec,
ddram_clk_n => ddram_clk_n_vec,
ddram_cke => ddram_cke,
ddram_odt => ddram_odt,
ddram_reset_n => ddram_reset_n
);

led0 <= not dram_init_done;
led1 <= dram_init_error; -- Make it blink ?
led2 <= dram_init_done and not dram_init_error;

end generate;

has_liteeth : if USE_LITEETH generate

component liteeth_core port (
sys_clock : in std_ulogic;
sys_reset : in std_ulogic;
rgmii_eth_clocks_tx : out std_ulogic;
rgmii_eth_clocks_rx : in std_ulogic;
rgmii_eth_rst_n : out std_ulogic;
rgmii_eth_int_n : in std_ulogic;
rgmii_eth_mdio : inout std_ulogic;
rgmii_eth_mdc : out std_ulogic;
rgmii_eth_rx_ctl : in std_ulogic;
rgmii_eth_rx_data : in std_ulogic_vector(3 downto 0);
rgmii_eth_tx_ctl : out std_ulogic;
rgmii_eth_tx_data : out std_ulogic_vector(3 downto 0);
wishbone_adr : in std_ulogic_vector(29 downto 0);
wishbone_dat_w : in std_ulogic_vector(31 downto 0);
wishbone_dat_r : out std_ulogic_vector(31 downto 0);
wishbone_sel : in std_ulogic_vector(3 downto 0);
wishbone_cyc : in std_ulogic;
wishbone_stb : in std_ulogic;
wishbone_ack : out std_ulogic;
wishbone_we : in std_ulogic;
wishbone_cti : in std_ulogic_vector(2 downto 0);
wishbone_bte : in std_ulogic_vector(1 downto 0);
wishbone_err : out std_ulogic;
interrupt : out std_ulogic
);
end component;

signal wb_eth_cyc : std_ulogic;
signal wb_eth_adr : std_ulogic_vector(29 downto 0);

begin
liteeth : liteeth_core
port map(
sys_clock => system_clk,
sys_reset => soc_rst,
rgmii_eth_clocks_tx => eth_clocks_tx,
rgmii_eth_clocks_rx => eth_clocks_rx,
rgmii_eth_rst_n => eth_rst_n,
rgmii_eth_int_n => eth_int_n,
rgmii_eth_mdio => eth_mdio,
rgmii_eth_mdc => eth_mdc,
rgmii_eth_rx_ctl => eth_rx_ctl,
rgmii_eth_rx_data => eth_rx_data,
rgmii_eth_tx_ctl => eth_tx_ctl,
rgmii_eth_tx_data => eth_tx_data,
wishbone_adr => wb_eth_adr,
wishbone_dat_w => wb_ext_io_in.dat,
wishbone_dat_r => wb_eth_out.dat,
wishbone_sel => wb_ext_io_in.sel,
wishbone_cyc => wb_eth_cyc,
wishbone_stb => wb_ext_io_in.stb,
wishbone_ack => wb_eth_out.ack,
wishbone_we => wb_ext_io_in.we,
wishbone_cti => "000",
wishbone_bte => "00",
wishbone_err => open,
interrupt => ext_irq_eth
);

-- Gate cyc with "chip select" from soc
wb_eth_cyc <= wb_ext_io_in.cyc and wb_ext_is_eth;

-- Remove top address bits as liteeth decoder doesn't know about them
wb_eth_adr <= x"000" & "000" & wb_ext_io_in.adr(14 downto 0);

-- LiteETH isn't pipelined
wb_eth_out.stall <= not wb_eth_out.ack;

end generate;

no_liteeth : if not USE_LITEETH generate
ext_irq_eth <= '0';
end generate;

-- SD card
has_sdcard : if USE_LITESDCARD generate
component litesdcard_core port (
clk : in std_ulogic;
rst : in std_ulogic;
-- wishbone for accessing control registers
wb_ctrl_adr : in std_ulogic_vector(29 downto 0);
wb_ctrl_dat_w : in std_ulogic_vector(31 downto 0);
wb_ctrl_dat_r : out std_ulogic_vector(31 downto 0);
wb_ctrl_sel : in std_ulogic_vector(3 downto 0);
wb_ctrl_cyc : in std_ulogic;
wb_ctrl_stb : in std_ulogic;
wb_ctrl_ack : out std_ulogic;
wb_ctrl_we : in std_ulogic;
wb_ctrl_cti : in std_ulogic_vector(2 downto 0);
wb_ctrl_bte : in std_ulogic_vector(1 downto 0);
wb_ctrl_err : out std_ulogic;
-- wishbone for SD card core to use for DMA
wb_dma_adr : out std_ulogic_vector(29 downto 0);
wb_dma_dat_w : out std_ulogic_vector(31 downto 0);
wb_dma_dat_r : in std_ulogic_vector(31 downto 0);
wb_dma_sel : out std_ulogic_vector(3 downto 0);
wb_dma_cyc : out std_ulogic;
wb_dma_stb : out std_ulogic;
wb_dma_ack : in std_ulogic;
wb_dma_we : out std_ulogic;
wb_dma_cti : out std_ulogic_vector(2 downto 0);
wb_dma_bte : out std_ulogic_vector(1 downto 0);
wb_dma_err : in std_ulogic;
-- connections to SD card
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;
irq : out std_ulogic
);
end component;

signal wb_sdcard_cyc : std_ulogic;
signal wb_sdcard_adr : std_ulogic_vector(29 downto 0);

begin
litesdcard : litesdcard_core
port map (
clk => system_clk,
rst => soc_rst,
wb_ctrl_adr => wb_sdcard_adr,
wb_ctrl_dat_w => wb_ext_io_in.dat,
wb_ctrl_dat_r => wb_sdcard_out.dat,
wb_ctrl_sel => wb_ext_io_in.sel,
wb_ctrl_cyc => wb_sdcard_cyc,
wb_ctrl_stb => wb_ext_io_in.stb,
wb_ctrl_ack => wb_sdcard_out.ack,
wb_ctrl_we => wb_ext_io_in.we,
wb_ctrl_cti => "000",
wb_ctrl_bte => "00",
wb_ctrl_err => open,
wb_dma_adr => wb_sddma_nr.adr,
wb_dma_dat_w => wb_sddma_nr.dat,
wb_dma_dat_r => wb_sddma_ir.dat,
wb_dma_sel => wb_sddma_nr.sel,
wb_dma_cyc => wb_sddma_nr.cyc,
wb_dma_stb => wb_sddma_nr.stb,
wb_dma_ack => wb_sddma_ir.ack,
wb_dma_we => wb_sddma_nr.we,
wb_dma_cti => open,
wb_dma_bte => open,
wb_dma_err => '0',
sdcard_data => sdcard_data,
sdcard_cmd => sdcard_cmd,
sdcard_clk => sdcard_clk,
sdcard_cd => sdcard_cd,
irq => ext_irq_sdcard
);

-- Gate cyc with chip select from SoC
wb_sdcard_cyc <= wb_ext_io_in.cyc and wb_ext_is_sdcard;

wb_sdcard_adr <= x"0000" & wb_ext_io_in.adr(13 downto 0);

wb_sdcard_out.stall <= not wb_sdcard_out.ack;

sdcard_reset <= '0';

-- Convert non-pipelined DMA wishbone to pipelined by suppressing
-- non-acknowledged strobes
process(system_clk)
begin
if rising_edge(system_clk) then
wb_sddma_out <= wb_sddma_nr;
if wb_sddma_stb_sent = '1' or
(wb_sddma_out.stb = '1' and wb_sddma_in.stall = '0') then
wb_sddma_out.stb <= '0';
end if;
if wb_sddma_nr.cyc = '0' or wb_sddma_ir.ack = '1' then
wb_sddma_stb_sent <= '0';
elsif wb_sddma_in.stall = '0' then
wb_sddma_stb_sent <= wb_sddma_nr.stb;
end if;
wb_sddma_ir <= wb_sddma_in;
end if;
end process;

end generate;

no_sdcard : if not USE_LITESDCARD generate
sdcard_reset <= '1';
end generate;

-- Mux WB response on the IO bus
wb_ext_io_out <= wb_eth_out when wb_ext_is_eth = '1' else
wb_sdcard_out when wb_ext_is_sdcard = '1' else
wb_dram_ctrl_out;

led4 <= system_clk_locked;
led5 <= '1';
led6 <= not soc_rst;
led7 <= '0';

end architecture behaviour;

@ -1,512 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.wishbone_types.all;

entity toplevel is
generic (
MEMORY_SIZE : integer := 16384;
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := false;
USE_LITEDRAM : boolean := true;
NO_BRAM : boolean := true;
SCLK_STARTUPE2 : boolean := false;
SPI_FLASH_OFFSET : integer := 4194304;
SPI_FLASH_DEF_CKDV : natural := 1;
SPI_FLASH_DEF_QUAD : boolean := true;
LOG_LENGTH : natural := 0;
UART_IS_16550 : boolean := true;
HAS_UART1 : boolean := false;
USE_LITESDCARD : boolean := true;
ICACHE_NUM_LINES : natural := 64;
NGPIO : natural := 0
);
port(
ext_clk : in std_ulogic;
ext_rst_n : in std_ulogic;

-- UART0 signals:
pin_gpio_0 : out std_ulogic;
pin_gpio_1 : in std_ulogic;

-- LEDs
led0_b : out std_ulogic;
led0_g : out std_ulogic;
led0_r : out std_ulogic;

-- SPI
spi_flash_cs_n : out std_ulogic;
spi_flash_mosi : inout std_ulogic;
spi_flash_miso : inout std_ulogic;
spi_flash_wp_n : inout std_ulogic;
spi_flash_hold_n : inout std_ulogic;

-- SD card wires
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;

-- DRAM wires
ddram_a : out std_ulogic_vector(13 downto 0);
ddram_ba : out std_ulogic_vector(2 downto 0);
ddram_ras_n : out std_ulogic;
ddram_cas_n : out std_ulogic;
ddram_we_n : out std_ulogic;
ddram_cs_n : out std_ulogic;
ddram_dm : out std_ulogic_vector(1 downto 0);
ddram_dq : inout std_ulogic_vector(15 downto 0);
ddram_dqs_p : inout std_ulogic_vector(1 downto 0);
ddram_clk_p : out std_ulogic_vector(0 downto 0);
-- only the positive differential pin is instantiated
--ddram_dqs_n : inout std_ulogic_vector(1 downto 0);
--ddram_clk_n : out std_ulogic_vector(0 downto 0);
ddram_cke : out std_ulogic;
ddram_odt : out std_ulogic;
ddram_reset_n : out std_ulogic;

ddram_gnd : out std_ulogic_vector(1 downto 0);
ddram_vccio : out std_ulogic_vector(5 downto 0)
);
end entity toplevel;

architecture behaviour of toplevel is

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;

-- Internal clock signals:
signal system_clk : std_ulogic;
signal system_clk_locked : std_ulogic;

-- External IOs from the SoC
signal wb_ext_io_in : wb_io_master_out;
signal wb_ext_io_out : wb_io_slave_out;
signal wb_ext_is_dram_csr : std_ulogic;
signal wb_ext_is_dram_init : std_ulogic;
signal wb_ext_is_sdcard : std_ulogic;

-- DRAM main data wishbone connection
signal wb_dram_in : wishbone_master_out;
signal wb_dram_out : wishbone_slave_out;

-- DRAM control wishbone connection
signal wb_dram_ctrl_out : wb_io_slave_out := wb_io_slave_out_init;

-- LiteSDCard connection
signal ext_irq_sdcard : std_ulogic := '0';
signal wb_sdcard_out : wb_io_slave_out := wb_io_slave_out_init;
signal wb_sddma_out : wb_io_master_out := wb_io_master_out_init;
signal wb_sddma_in : wb_io_slave_out;
signal wb_sddma_nr : wb_io_master_out;
signal wb_sddma_ir : wb_io_slave_out;
-- for conversion from non-pipelined wishbone to pipelined
signal wb_sddma_stb_sent : std_ulogic;

-- Control/status
signal core_alt_reset : std_ulogic;

-- Status LED
signal led0_b_pwm : std_ulogic;
signal led0_r_pwm : std_ulogic;
signal led0_g_pwm : std_ulogic;

-- Dumb PWM for the LEDs, those RGB LEDs are too bright otherwise
signal pwm_counter : std_ulogic_vector(8 downto 0);

-- SPI flash
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic;
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);

-- GPIO
signal gpio_in : std_ulogic_vector(NGPIO - 1 downto 0);
signal gpio_out : std_ulogic_vector(NGPIO - 1 downto 0);
signal gpio_dir : std_ulogic_vector(NGPIO - 1 downto 0);

-- Fixup various memory sizes based on generics
function get_bram_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return 0;
else
return MEMORY_SIZE;
end if;
end function;

function get_payload_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return MEMORY_SIZE;
else
return 0;
end if;
end function;

constant BRAM_SIZE : natural := get_bram_size;
constant PAYLOAD_SIZE : natural := get_payload_size;

COMPONENT USRMCLK
PORT(
USRMCLKI : IN STD_ULOGIC;
USRMCLKTS : IN STD_ULOGIC
);
END COMPONENT;
attribute syn_noprune: boolean ;
attribute syn_noprune of USRMCLK: component is true;

begin

-- Main SoC
soc0: entity work.soc
generic map(
MEMORY_SIZE => BRAM_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
LOG_LENGTH => LOG_LENGTH,
UART0_IS_16550 => UART_IS_16550,
HAS_UART1 => HAS_UART1,
HAS_SD_CARD => USE_LITESDCARD,
ICACHE_NUM_LINES => ICACHE_NUM_LINES,
HAS_SHORT_MULT => true,
NGPIO => NGPIO
)
port map (
-- System signals
system_clk => system_clk,
rst => soc_rst,

-- UART signals
uart0_txd => pin_gpio_0,
uart0_rxd => pin_gpio_1,

-- UART1 signals
--uart1_txd => uart_pmod_tx,
--uart1_rxd => uart_pmod_rx,

-- SPI signals
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i,

-- GPIO signals
gpio_in => gpio_in,
gpio_out => gpio_out,
gpio_dir => gpio_dir,

-- External interrupts
ext_irq_sdcard => ext_irq_sdcard,

-- DRAM wishbone
wb_dram_in => wb_dram_in,
wb_dram_out => wb_dram_out,

-- IO wishbone
wb_ext_io_in => wb_ext_io_in,
wb_ext_io_out => wb_ext_io_out,
wb_ext_is_dram_csr => wb_ext_is_dram_csr,
wb_ext_is_dram_init => wb_ext_is_dram_init,
wb_ext_is_sdcard => wb_ext_is_sdcard,

-- DMA wishbone
wishbone_dma_in => wb_sddma_in,
wishbone_dma_out => wb_sddma_out,

alt_reset => core_alt_reset
);

-- SPI Flash
--
spi_flash_cs_n <= spi_cs_n;
spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z';
spi_sdat_i(0) <= spi_flash_mosi;
spi_sdat_i(1) <= spi_flash_miso;
spi_sdat_i(2) <= spi_flash_wp_n;
spi_sdat_i(3) <= spi_flash_hold_n;

uclk: USRMCLK port map (
USRMCLKI => spi_sck,
USRMCLKTS => '0'
);

nodram: if not USE_LITEDRAM generate
signal ddram_clk_dummy : std_ulogic;
begin
reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => soc_rst
);

clkgen: entity work.clock_generator
generic map(
CLK_INPUT_HZ => CLK_INPUT,
CLK_OUTPUT_HZ => CLK_FREQUENCY
)
port map(
ext_clk => ext_clk,
pll_rst_in => pll_rst,
pll_clk_out => system_clk,
pll_locked_out => system_clk_locked
);

led0_b_pwm <= '1';
led0_r_pwm <= '1';
led0_g_pwm <= '0';
core_alt_reset <= '0';

end generate;

has_dram: if USE_LITEDRAM generate
signal dram_init_done : std_ulogic;
signal dram_init_error : std_ulogic;
signal dram_sys_rst : std_ulogic;
signal rst_gen_rst : std_ulogic;
begin

-- Eventually dig out the frequency from
-- litesdram generate.py sys_clk_freq
-- but for now, assert it's 48Mhz for orangecrab
assert CLK_FREQUENCY = 48000000;

reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW,
PLL_RESET_BITS => 18,
SOC_RESET_BITS => 1
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => rst_gen_rst
);

-- Generate SoC reset
soc_rst_gen: process(system_clk)
begin
if ext_rst_n = '0' then
soc_rst <= '1';
elsif rising_edge(system_clk) then
soc_rst <= dram_sys_rst or not system_clk_locked;
end if;
end process;

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 24,
DRAM_ALINES => 14,
DRAM_DLINES => 16,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 128,
NUM_LINES => 8, -- reduce from default of 64 to make smaller/timing
PAYLOAD_FILE => RAM_INIT_FILE,
PAYLOAD_SIZE => PAYLOAD_SIZE
)
port map(
clk_in => ext_clk,
rst => pll_rst,
system_clk => system_clk,
system_reset => dram_sys_rst,
core_alt_reset => core_alt_reset,
pll_locked => system_clk_locked,

wb_in => wb_dram_in,
wb_out => wb_dram_out,
wb_ctrl_in => wb_ext_io_in,
wb_ctrl_out => wb_dram_ctrl_out,
wb_ctrl_is_csr => wb_ext_is_dram_csr,
wb_ctrl_is_init => wb_ext_is_dram_init,

init_done => dram_init_done,
init_error => dram_init_error,

ddram_a => ddram_a,
ddram_ba => ddram_ba,
ddram_ras_n => ddram_ras_n,
ddram_cas_n => ddram_cas_n,
ddram_we_n => ddram_we_n,
ddram_cs_n => ddram_cs_n,
ddram_dm => ddram_dm,
ddram_dq => ddram_dq,
ddram_dqs_p => ddram_dqs_p,
ddram_clk_p => ddram_clk_p,
-- only the positive differential pin is instantiated
--ddram_dqs_n => ddram_dqs_n,
--ddram_clk_n => ddram_clk_n,
ddram_cke => ddram_cke,
ddram_odt => ddram_odt,

ddram_reset_n => ddram_reset_n
);

ddram_gnd <= "00";
-- for power consumption.
-- https://github.com/orangecrab-fpga/orangecrab-hardware/issues/19#issuecomment-683479378
ddram_vccio <= "111111";

led0_b_pwm <= not dram_init_done;
led0_r_pwm <= dram_init_error;
led0_g_pwm <= dram_init_done and not dram_init_error;

end generate;


-- SD card pmod
has_sdcard : if USE_LITESDCARD generate
component litesdcard_core port (
clk : in std_ulogic;
rst : in std_ulogic;
-- wishbone for accessing control registers
wb_ctrl_adr : in std_ulogic_vector(29 downto 0);
wb_ctrl_dat_w : in std_ulogic_vector(31 downto 0);
wb_ctrl_dat_r : out std_ulogic_vector(31 downto 0);
wb_ctrl_sel : in std_ulogic_vector(3 downto 0);
wb_ctrl_cyc : in std_ulogic;
wb_ctrl_stb : in std_ulogic;
wb_ctrl_ack : out std_ulogic;
wb_ctrl_we : in std_ulogic;
wb_ctrl_cti : in std_ulogic_vector(2 downto 0);
wb_ctrl_bte : in std_ulogic_vector(1 downto 0);
wb_ctrl_err : out std_ulogic;
-- wishbone for SD card core to use for DMA
wb_dma_adr : out std_ulogic_vector(29 downto 0);
wb_dma_dat_w : out std_ulogic_vector(31 downto 0);
wb_dma_dat_r : in std_ulogic_vector(31 downto 0);
wb_dma_sel : out std_ulogic_vector(3 downto 0);
wb_dma_cyc : out std_ulogic;
wb_dma_stb : out std_ulogic;
wb_dma_ack : in std_ulogic;
wb_dma_we : out std_ulogic;
wb_dma_cti : out std_ulogic_vector(2 downto 0);
wb_dma_bte : out std_ulogic_vector(1 downto 0);
wb_dma_err : in std_ulogic;
-- connections to SD card
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;
irq : out std_ulogic
);
end component;

signal wb_sdcard_cyc : std_ulogic;
signal wb_sdcard_adr : std_ulogic_vector(29 downto 0);

begin
litesdcard : litesdcard_core
port map (
clk => system_clk,
rst => soc_rst,
wb_ctrl_adr => wb_sdcard_adr,
wb_ctrl_dat_w => wb_ext_io_in.dat,
wb_ctrl_dat_r => wb_sdcard_out.dat,
wb_ctrl_sel => wb_ext_io_in.sel,
wb_ctrl_cyc => wb_sdcard_cyc,
wb_ctrl_stb => wb_ext_io_in.stb,
wb_ctrl_ack => wb_sdcard_out.ack,
wb_ctrl_we => wb_ext_io_in.we,
wb_ctrl_cti => "000",
wb_ctrl_bte => "00",
wb_ctrl_err => open,
wb_dma_adr => wb_sddma_nr.adr,
wb_dma_dat_w => wb_sddma_nr.dat,
wb_dma_dat_r => wb_sddma_ir.dat,
wb_dma_sel => wb_sddma_nr.sel,
wb_dma_cyc => wb_sddma_nr.cyc,
wb_dma_stb => wb_sddma_nr.stb,
wb_dma_ack => wb_sddma_ir.ack,
wb_dma_we => wb_sddma_nr.we,
wb_dma_cti => open,
wb_dma_bte => open,
wb_dma_err => '0',
sdcard_data => sdcard_data,
sdcard_cmd => sdcard_cmd,
sdcard_clk => sdcard_clk,
sdcard_cd => sdcard_cd,
irq => ext_irq_sdcard
);

-- Gate cyc with chip select from SoC
wb_sdcard_cyc <= wb_ext_io_in.cyc and wb_ext_is_sdcard;

wb_sdcard_adr <= x"0000" & wb_ext_io_in.adr(13 downto 0);

wb_sdcard_out.stall <= not wb_sdcard_out.ack;

-- Convert non-pipelined DMA wishbone to pipelined by suppressing
-- non-acknowledged strobes
process(system_clk)
begin
if rising_edge(system_clk) then
wb_sddma_out <= wb_sddma_nr;
if wb_sddma_stb_sent = '1' or
(wb_sddma_out.stb = '1' and wb_sddma_in.stall = '0') then
wb_sddma_out.stb <= '0';
end if;
if wb_sddma_nr.cyc = '0' or wb_sddma_ir.ack = '1' then
wb_sddma_stb_sent <= '0';
elsif wb_sddma_in.stall = '0' then
wb_sddma_stb_sent <= wb_sddma_nr.stb;
end if;
wb_sddma_ir <= wb_sddma_in;
end if;
end process;

end generate;

-- Mux WB response on the IO bus
wb_ext_io_out <= wb_sdcard_out when wb_ext_is_sdcard = '1' else
wb_dram_ctrl_out;

leds_pwm : process(system_clk)
begin
if rising_edge(system_clk) then
pwm_counter <= std_ulogic_vector(signed(pwm_counter) + 1);
if pwm_counter(8 downto 4) = "00000" then
led0_b <= led0_b_pwm;
led0_r <= led0_r_pwm;
led0_g <= led0_g_pwm;
else
led0_b <= '0';
led0_r <= '0';
led0_g <= '0';
end if;
end if;
end process;

end architecture behaviour;

@ -1,587 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library unisim;
use unisim.vcomponents.all;

library work;
use work.wishbone_types.all;

entity toplevel is
generic (
MEMORY_SIZE : integer := 16384;
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
HAS_SHORT_MULT : boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
SPI_FLASH_OFFSET : integer := 4194304;
SPI_FLASH_DEF_CKDV : natural := 1;
SPI_FLASH_DEF_QUAD : boolean := true;
LOG_LENGTH : natural := 512;
USE_LITEETH : boolean := false;
UART_IS_16550 : boolean := true;
HAS_UART1 : boolean := false;
USE_LITESDCARD : boolean := false;
HAS_GPIO : boolean := false;
NGPIO : natural := 32
);
port(
ext_clk : in std_ulogic;
ext_rst_n : in std_ulogic;

-- UART0 signals:
uart_main_tx : out std_ulogic;
uart_main_rx : in std_ulogic;

-- LEDs
led0_n : out std_ulogic;
led1_n : out std_ulogic;

-- SPI
spi_flash_cs_n : out std_ulogic;
spi_flash_mosi : inout std_ulogic;
spi_flash_miso : inout std_ulogic;
spi_flash_wp_n : inout std_ulogic;
spi_flash_hold_n : inout std_ulogic;

-- Ethernet
eth_clocks_tx : in std_ulogic;
eth_clocks_gtx : out std_ulogic;
eth_clocks_rx : in std_ulogic;
eth_rst_n : out std_ulogic;
eth_mdio : inout std_ulogic;
eth_mdc : out std_ulogic;
eth_rx_dv : in std_ulogic;
eth_rx_er : in std_ulogic;
eth_rx_data : in std_ulogic_vector(7 downto 0);
eth_tx_en : out std_ulogic;
eth_tx_er : out std_ulogic;
eth_tx_data : out std_ulogic_vector(7 downto 0);
eth_col : in std_ulogic;
eth_crs : in std_ulogic;

-- SD card
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;

-- DRAM wires
ddram_a : out std_ulogic_vector(13 downto 0);
ddram_ba : out std_ulogic_vector(2 downto 0);
ddram_ras_n : out std_ulogic;
ddram_cas_n : out std_ulogic;
ddram_we_n : out std_ulogic;
ddram_dm : out std_ulogic_vector(1 downto 0);
ddram_dq : inout std_ulogic_vector(15 downto 0);
ddram_dqs_p : inout std_ulogic_vector(1 downto 0);
ddram_dqs_n : inout std_ulogic_vector(1 downto 0);
ddram_clk_p : out std_ulogic;
ddram_clk_n : out std_ulogic;
ddram_cke : out std_ulogic;
ddram_odt : out std_ulogic;
ddram_reset_n : out std_ulogic
);
end entity toplevel;

architecture behaviour of toplevel is

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;

-- Internal clock signals:
signal system_clk : std_ulogic;
signal system_clk_locked : std_ulogic;

-- External IOs from the SoC
signal wb_ext_io_in : wb_io_master_out;
signal wb_ext_io_out : wb_io_slave_out;
signal wb_ext_is_dram_csr : std_ulogic;
signal wb_ext_is_dram_init : std_ulogic;
signal wb_ext_is_eth : std_ulogic;
signal wb_ext_is_sdcard : std_ulogic;

-- DRAM main data wishbone connection
signal wb_dram_in : wishbone_master_out;
signal wb_dram_out : wishbone_slave_out;

-- DRAM control wishbone connection
signal wb_dram_ctrl_out : wb_io_slave_out := wb_io_slave_out_init;

-- LiteEth connection
signal ext_irq_eth : std_ulogic;
signal wb_eth_out : wb_io_slave_out := wb_io_slave_out_init;

-- LiteSDCard connection
signal ext_irq_sdcard : std_ulogic := '0';
signal wb_sdcard_out : wb_io_slave_out := wb_io_slave_out_init;
signal wb_sddma_out : wb_io_master_out := wb_io_master_out_init;
signal wb_sddma_in : wb_io_slave_out;
signal wb_sddma_nr : wb_io_master_out;
signal wb_sddma_ir : wb_io_slave_out;
-- for conversion from non-pipelined wishbone to pipelined
signal wb_sddma_stb_sent : std_ulogic;

-- Control/status
signal core_alt_reset : std_ulogic;

-- SPI flash
signal spi_sck : std_ulogic;
signal spi_cs_n : std_ulogic;
signal spi_sdat_o : std_ulogic_vector(3 downto 0);
signal spi_sdat_oe : std_ulogic_vector(3 downto 0);
signal spi_sdat_i : std_ulogic_vector(3 downto 0);

-- ddram clock signals as vectors
signal ddram_clk_p_vec : std_ulogic_vector(0 downto 0);
signal ddram_clk_n_vec : std_ulogic_vector(0 downto 0);

-- Fixup various memory sizes based on generics
function get_bram_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return 0;
else
return MEMORY_SIZE;
end if;
end function;

function get_payload_size return natural is
begin
if USE_LITEDRAM and NO_BRAM then
return MEMORY_SIZE;
else
return 0;
end if;
end function;
constant BRAM_SIZE : natural := get_bram_size;
constant PAYLOAD_SIZE : natural := get_payload_size;
begin

-- Main SoC
soc0: entity work.soc
generic map(
MEMORY_SIZE => BRAM_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT => HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
HAS_SPI_FLASH => true,
SPI_FLASH_DLINES => 4,
SPI_FLASH_OFFSET => SPI_FLASH_OFFSET,
SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV,
SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD,
LOG_LENGTH => LOG_LENGTH,
HAS_LITEETH => USE_LITEETH,
UART0_IS_16550 => UART_IS_16550,
HAS_UART1 => HAS_UART1,
HAS_SD_CARD => USE_LITESDCARD,
HAS_GPIO => HAS_GPIO,
NGPIO => NGPIO
)
port map (
-- System signals
system_clk => system_clk,
rst => soc_rst,

-- UART signals
uart0_txd => uart_main_tx,
uart0_rxd => uart_main_rx,

-- SPI signals
spi_flash_sck => spi_sck,
spi_flash_cs_n => spi_cs_n,
spi_flash_sdat_o => spi_sdat_o,
spi_flash_sdat_oe => spi_sdat_oe,
spi_flash_sdat_i => spi_sdat_i,

-- External interrupts
ext_irq_eth => ext_irq_eth,
ext_irq_sdcard => ext_irq_sdcard,

-- DRAM wishbone
wb_dram_in => wb_dram_in,
wb_dram_out => wb_dram_out,

-- IO wishbone
wb_ext_io_in => wb_ext_io_in,
wb_ext_io_out => wb_ext_io_out,
wb_ext_is_dram_csr => wb_ext_is_dram_csr,
wb_ext_is_dram_init => wb_ext_is_dram_init,
wb_ext_is_eth => wb_ext_is_eth,
wb_ext_is_sdcard => wb_ext_is_sdcard,

-- DMA wishbone
wishbone_dma_in => wb_sddma_in,
wishbone_dma_out => wb_sddma_out,

alt_reset => core_alt_reset
);

-- SPI Flash
spi_flash_cs_n <= spi_cs_n;
spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z';
spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z';
spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z';
spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z';
spi_sdat_i(0) <= spi_flash_mosi;
spi_sdat_i(1) <= spi_flash_miso;
spi_sdat_i(2) <= spi_flash_wp_n;
spi_sdat_i(3) <= spi_flash_hold_n;

STARTUPE2_INST: STARTUPE2
port map (
CLK => '0',
GSR => '0',
GTS => '0',
KEYCLEARB => '0',
PACK => '0',
USRCCLKO => spi_sck,
USRCCLKTS => '0',
USRDONEO => '1',
USRDONETS => '0'
);

nodram: if not USE_LITEDRAM generate
signal ddram_clk_dummy : std_ulogic;
begin
reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => soc_rst
);

clkgen: entity work.clock_generator
generic map(
CLK_INPUT_HZ => 50000000,
CLK_OUTPUT_HZ => CLK_FREQUENCY
)
port map(
ext_clk => ext_clk,
pll_rst_in => pll_rst,
pll_clk_out => system_clk,
pll_locked_out => system_clk_locked
);

core_alt_reset <= '0';

-- Vivado barfs on those differential signals if left
-- unconnected. So instanciate a diff. buffer and feed
-- it a constant '0'.
dummy_dram_clk: OBUFDS
port map (
O => ddram_clk_p,
OB => ddram_clk_n,
I => ddram_clk_dummy
);
ddram_clk_dummy <= '0';

end generate;

has_dram: if USE_LITEDRAM generate
signal dram_init_done : std_ulogic;
signal dram_init_error : std_ulogic;
signal dram_sys_rst : std_ulogic;
signal rst_gen_rst : std_ulogic;
begin

-- Eventually dig out the frequency from the generator
-- but for now, assert it's 100Mhz
assert CLK_FREQUENCY = 100000000;

reset_controller: entity work.soc_reset
generic map(
RESET_LOW => RESET_LOW,
PLL_RESET_BITS => 18,
SOC_RESET_BITS => 1
)
port map(
ext_clk => ext_clk,
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => ext_rst_n,
pll_rst_out => pll_rst,
rst_out => rst_gen_rst
);

-- Generate SoC reset
soc_rst_gen: process(system_clk)
begin
if ext_rst_n = '0' then
soc_rst <= '1';
elsif rising_edge(system_clk) then
soc_rst <= dram_sys_rst or not system_clk_locked;
end if;
end process;

ddram_clk_p_vec <= (others => ddram_clk_p);
ddram_clk_n_vec <= (others => ddram_clk_n);

dram: entity work.litedram_wrapper
generic map(
DRAM_ABITS => 24,
DRAM_ALINES => 14,
DRAM_DLINES => 16,
DRAM_CKLINES => 1,
DRAM_PORT_WIDTH => 128,
PAYLOAD_FILE => RAM_INIT_FILE,
PAYLOAD_SIZE => PAYLOAD_SIZE
)
port map(
clk_in => ext_clk,
rst => pll_rst,
system_clk => system_clk,
system_reset => dram_sys_rst,
core_alt_reset => core_alt_reset,
pll_locked => system_clk_locked,

wb_in => wb_dram_in,
wb_out => wb_dram_out,
wb_ctrl_in => wb_ext_io_in,
wb_ctrl_out => wb_dram_ctrl_out,
wb_ctrl_is_csr => wb_ext_is_dram_csr,
wb_ctrl_is_init => wb_ext_is_dram_init,

init_done => dram_init_done,
init_error => dram_init_error,

ddram_a => ddram_a,
ddram_ba => ddram_ba,
ddram_ras_n => ddram_ras_n,
ddram_cas_n => ddram_cas_n,
ddram_we_n => ddram_we_n,
ddram_cs_n => open,
ddram_dm => ddram_dm,
ddram_dq => ddram_dq,
ddram_dqs_p => ddram_dqs_p,
ddram_dqs_n => ddram_dqs_n,
ddram_clk_p => ddram_clk_p_vec,
ddram_clk_n => ddram_clk_n_vec,
ddram_cke => ddram_cke,
ddram_odt => ddram_odt,
ddram_reset_n => ddram_reset_n
);

end generate;

has_liteeth : if USE_LITEETH generate

component liteeth_core port (
sys_clock : in std_ulogic;
sys_reset : in std_ulogic;
gmii_eth_clocks_tx : in std_ulogic;
gmii_eth_clocks_gtx : out std_ulogic;
gmii_eth_clocks_rx : in std_ulogic;
gmii_eth_rst_n : out std_ulogic;
gmii_eth_mdio : inout std_ulogic;
gmii_eth_mdc : out std_ulogic;
gmii_eth_rx_dv : in std_ulogic;
gmii_eth_rx_er : in std_ulogic;
gmii_eth_rx_data : in std_ulogic_vector(7 downto 0);
gmii_eth_tx_en : out std_ulogic;
gmii_eth_tx_er : out std_ulogic;
gmii_eth_tx_data : out std_ulogic_vector(7 downto 0);
gmii_eth_col : in std_ulogic;
gmii_eth_crs : in std_ulogic;
wishbone_adr : in std_ulogic_vector(29 downto 0);
wishbone_dat_w : in std_ulogic_vector(31 downto 0);
wishbone_dat_r : out std_ulogic_vector(31 downto 0);
wishbone_sel : in std_ulogic_vector(3 downto 0);
wishbone_cyc : in std_ulogic;
wishbone_stb : in std_ulogic;
wishbone_ack : out std_ulogic;
wishbone_we : in std_ulogic;
wishbone_cti : in std_ulogic_vector(2 downto 0);
wishbone_bte : in std_ulogic_vector(1 downto 0);
wishbone_err : out std_ulogic;
interrupt : out std_ulogic
);
end component;

signal wb_eth_cyc : std_ulogic;
signal wb_eth_adr : std_ulogic_vector(29 downto 0);

-- Change this to use a PLL instead of a BUFR to generate the 25Mhz
-- reference clock to the PHY.
constant USE_PLL : boolean := false;
begin
liteeth : liteeth_core
port map(
sys_clock => system_clk,
sys_reset => soc_rst,
gmii_eth_clocks_tx => eth_clocks_tx,
gmii_eth_clocks_gtx => eth_clocks_gtx,
gmii_eth_clocks_rx => eth_clocks_rx,
gmii_eth_rst_n => eth_rst_n,
gmii_eth_mdio => eth_mdio,
gmii_eth_mdc => eth_mdc,
gmii_eth_rx_dv => eth_rx_dv,
gmii_eth_rx_er => eth_rx_er,
gmii_eth_rx_data => eth_rx_data,
gmii_eth_tx_en => eth_tx_en,
gmii_eth_tx_er => eth_tx_er,
gmii_eth_tx_data => eth_tx_data,
gmii_eth_col => eth_col,
gmii_eth_crs => eth_crs,
wishbone_adr => wb_eth_adr,
wishbone_dat_w => wb_ext_io_in.dat,
wishbone_dat_r => wb_eth_out.dat,
wishbone_sel => wb_ext_io_in.sel,
wishbone_cyc => wb_eth_cyc,
wishbone_stb => wb_ext_io_in.stb,
wishbone_ack => wb_eth_out.ack,
wishbone_we => wb_ext_io_in.we,
wishbone_cti => "000",
wishbone_bte => "00",
wishbone_err => open,
interrupt => ext_irq_eth
);

-- Gate cyc with "chip select" from soc
wb_eth_cyc <= wb_ext_io_in.cyc and wb_ext_is_eth;

-- Remove top address bits as liteeth decoder doesn't know about them
wb_eth_adr <= x"000" & "000" & wb_ext_io_in.adr(14 downto 0);

-- LiteETH isn't pipelined
wb_eth_out.stall <= not wb_eth_out.ack;

end generate;

no_liteeth : if not USE_LITEETH generate
ext_irq_eth <= '0';
end generate;

-- SD card pmod
has_sdcard : if USE_LITESDCARD generate
component litesdcard_core port (
clk : in std_ulogic;
rst : in std_ulogic;
-- wishbone for accessing control registers
wb_ctrl_adr : in std_ulogic_vector(29 downto 0);
wb_ctrl_dat_w : in std_ulogic_vector(31 downto 0);
wb_ctrl_dat_r : out std_ulogic_vector(31 downto 0);
wb_ctrl_sel : in std_ulogic_vector(3 downto 0);
wb_ctrl_cyc : in std_ulogic;
wb_ctrl_stb : in std_ulogic;
wb_ctrl_ack : out std_ulogic;
wb_ctrl_we : in std_ulogic;
wb_ctrl_cti : in std_ulogic_vector(2 downto 0);
wb_ctrl_bte : in std_ulogic_vector(1 downto 0);
wb_ctrl_err : out std_ulogic;
-- wishbone for SD card core to use for DMA
wb_dma_adr : out std_ulogic_vector(29 downto 0);
wb_dma_dat_w : out std_ulogic_vector(31 downto 0);
wb_dma_dat_r : in std_ulogic_vector(31 downto 0);
wb_dma_sel : out std_ulogic_vector(3 downto 0);
wb_dma_cyc : out std_ulogic;
wb_dma_stb : out std_ulogic;
wb_dma_ack : in std_ulogic;
wb_dma_we : out std_ulogic;
wb_dma_cti : out std_ulogic_vector(2 downto 0);
wb_dma_bte : out std_ulogic_vector(1 downto 0);
wb_dma_err : in std_ulogic;
-- connections to SD card
sdcard_data : inout std_ulogic_vector(3 downto 0);
sdcard_cmd : inout std_ulogic;
sdcard_clk : out std_ulogic;
sdcard_cd : in std_ulogic;
irq : out std_ulogic
);
end component;

signal wb_sdcard_cyc : std_ulogic;
signal wb_sdcard_adr : std_ulogic_vector(29 downto 0);

begin
litesdcard : litesdcard_core
port map (
clk => system_clk,
rst => soc_rst,
wb_ctrl_adr => wb_sdcard_adr,
wb_ctrl_dat_w => wb_ext_io_in.dat,
wb_ctrl_dat_r => wb_sdcard_out.dat,
wb_ctrl_sel => wb_ext_io_in.sel,
wb_ctrl_cyc => wb_sdcard_cyc,
wb_ctrl_stb => wb_ext_io_in.stb,
wb_ctrl_ack => wb_sdcard_out.ack,
wb_ctrl_we => wb_ext_io_in.we,
wb_ctrl_cti => "000",
wb_ctrl_bte => "00",
wb_ctrl_err => open,
wb_dma_adr => wb_sddma_nr.adr,
wb_dma_dat_w => wb_sddma_nr.dat,
wb_dma_dat_r => wb_sddma_ir.dat,
wb_dma_sel => wb_sddma_nr.sel,
wb_dma_cyc => wb_sddma_nr.cyc,
wb_dma_stb => wb_sddma_nr.stb,
wb_dma_ack => wb_sddma_ir.ack,
wb_dma_we => wb_sddma_nr.we,
wb_dma_cti => open,
wb_dma_bte => open,
wb_dma_err => '0',
sdcard_data => sdcard_data,
sdcard_cmd => sdcard_cmd,
sdcard_clk => sdcard_clk,
sdcard_cd => sdcard_cd,
irq => ext_irq_sdcard
);

-- Gate cyc with chip select from SoC
wb_sdcard_cyc <= wb_ext_io_in.cyc and wb_ext_is_sdcard;

wb_sdcard_adr <= x"0000" & wb_ext_io_in.adr(13 downto 0);

wb_sdcard_out.stall <= not wb_sdcard_out.ack;

-- Convert non-pipelined DMA wishbone to pipelined by suppressing
-- non-acknowledged strobes
process(system_clk)
begin
if rising_edge(system_clk) then
wb_sddma_out <= wb_sddma_nr;
if wb_sddma_stb_sent = '1' or
(wb_sddma_out.stb = '1' and wb_sddma_in.stall = '0') then
wb_sddma_out.stb <= '0';
end if;
if wb_sddma_nr.cyc = '0' or wb_sddma_ir.ack = '1' then
wb_sddma_stb_sent <= '0';
elsif wb_sddma_in.stall = '0' then
wb_sddma_stb_sent <= wb_sddma_nr.stb;
end if;
wb_sddma_ir <= wb_sddma_in;
end if;
end process;

end generate;

-- Mux WB response on the IO bus
wb_ext_io_out <= wb_eth_out when wb_ext_is_eth = '1' else
wb_sdcard_out when wb_ext_is_sdcard = '1' else
wb_dram_ctrl_out;

led0_n <= system_clk_locked;
led1_n <= not soc_rst;

end architecture behaviour;

@ -1,23 +1,11 @@
library ieee;
use ieee.std_logic_1164.all;

library work;
use work.wishbone_types.all;

entity toplevel is
generic (
MEMORY_SIZE : positive := (384*1024);
MEMORY_SIZE : positive := 524288;
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := false;
HAS_SHORT_MULT: boolean := false;
ICACHE_NUM_LINES : natural := 64;
LOG_LENGTH : natural := 512;
DISABLE_FLATTEN_CORE : boolean := false;
UART_IS_16550 : boolean := true
RESET_LOW : boolean := true
);
port(
ext_clk : in std_ulogic;
@ -25,7 +13,11 @@ entity toplevel is

-- UART0 signals:
uart0_txd : out std_ulogic;
uart0_rxd : in std_ulogic
uart0_rxd : in std_ulogic;

-- NIA out on GPIOs
ja : out std_ulogic_vector(7 downto 0);
jb : out std_ulogic_vector(7 downto 0)
);
end entity toplevel;

@ -33,13 +25,16 @@ architecture behaviour of toplevel is

-- Reset signals:
signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic;
signal pll_rst_n : std_ulogic;

-- Internal clock signals:
signal system_clk : std_ulogic;
signal system_clk_locked : std_ulogic;

signal nia : std_ulogic_vector(61 downto 0);
begin
ja <= nia(7 downto 0);
jb <= nia(15 downto 8);

reset_controller: entity work.soc_reset
generic map(
@ -50,18 +45,14 @@ begin
pll_clk => system_clk,
pll_locked_in => system_clk_locked,
ext_rst_in => ext_rst,
pll_rst_out => pll_rst,
pll_rst_out => pll_rst_n,
rst_out => soc_rst
);

clkgen: entity work.clock_generator
generic map(
CLK_INPUT_HZ => CLK_INPUT,
CLK_OUTPUT_HZ => CLK_FREQUENCY
)
port map(
ext_clk => ext_clk,
pll_rst_in => pll_rst,
pll_rst_in => pll_rst_n,
pll_clk_out => system_clk,
pll_locked_out => system_clk_locked
);
@ -71,21 +62,15 @@ begin
generic map(
MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
SIM => false,
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
HAS_SHORT_MULT => HAS_SHORT_MULT,
ICACHE_NUM_LINES => ICACHE_NUM_LINES,
LOG_LENGTH => LOG_LENGTH,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
UART0_IS_16550 => UART_IS_16550
RESET_LOW => RESET_LOW,
SIM => false
)
port map (
system_clk => system_clk,
rst => soc_rst,
uart0_txd => uart0_txd,
uart0_rxd => uart0_rxd
uart0_rxd => uart0_rxd,
nia_out => nia
);

end architecture behaviour;

@ -1,487 +0,0 @@
################################################################################
# clkin, reset, uart pins...
################################################################################

set_property -dict { PACKAGE_PIN M21 IOSTANDARD LVCMOS33 } [get_ports { ext_clk }];

set_property -dict { PACKAGE_PIN H7 IOSTANDARD LVCMOS33 } [get_ports { ext_rst_n }];

set_property -dict { PACKAGE_PIN E3 IOSTANDARD LVCMOS33 } [get_ports { uart_main_tx }];
set_property -dict { PACKAGE_PIN F3 IOSTANDARD LVCMOS33 } [get_ports { uart_main_rx }];

################################################################################
# LEDs
################################################################################

set_property -dict { PACKAGE_PIN V16 IOSTANDARD LVCMOS33 } [get_ports { led0_n }];
set_property -dict { PACKAGE_PIN V17 IOSTANDARD LVCMOS33 } [get_ports { led1_n }];

################################################################################
# SPI Flash
################################################################################ema

set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_cs_n }];
set_property -dict { PACKAGE_PIN R14 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_mosi }];
set_property -dict { PACKAGE_PIN R15 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_miso }];
set_property -dict { PACKAGE_PIN P14 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_wp_n }];
set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_hold_n }];

################################################################################
# Micro SD
################################################################################

set_property -dict { PACKAGE_PIN M5 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[0] }];
set_property -dict { PACKAGE_PIN M7 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[1] }];
set_property -dict { PACKAGE_PIN H6 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[2] }];
set_property -dict { PACKAGE_PIN J6 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_data[3] }];
set_property -dict { PACKAGE_PIN J8 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_cmd }];
set_property -dict { PACKAGE_PIN L4 IOSTANDARD LVCMOS33 SLEW FAST } [get_ports { sdcard_clk }];
set_property -dict { PACKAGE_PIN N6 IOSTANDARD LVCMOS33 } [get_ports { sdcard_cd }];

# Put registers into IOBs to improve timing
set_property IOB true [get_cells -hierarchical -filter {NAME =~*.litesdcard/sdcard_*}]

################################################################################
# PMOD header J10 (high-speed, no protection resisters)
################################################################################

#set_property -dict { PACKAGE_PIN D5 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_1 }];
#set_property -dict { PACKAGE_PIN G5 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_2 }];
#set_property -dict { PACKAGE_PIN G7 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_3 }];
#set_property -dict { PACKAGE_PIN G8 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_4 }];
#set_property -dict { PACKAGE_PIN E5 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_7 }];
#set_property -dict { PACKAGE_PIN E6 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_8 }];
#set_property -dict { PACKAGE_PIN D6 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_9 }];
#set_property -dict { PACKAGE_PIN G6 IOSTANDARD LVCMOS33 } [get_ports { pmod_j10_10 }];

################################################################################
# PMOD header J11 (high-speed, no protection resisters)
################################################################################

#set_property -dict { PACKAGE_PIN H4 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_1 }];
#set_property -dict { PACKAGE_PIN F4 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_2 }];
#set_property -dict { PACKAGE_PIN A4 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_3 }];
#set_property -dict { PACKAGE_PIN A5 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_4 }];
#set_property -dict { PACKAGE_PIN J4 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_7 }];
#set_property -dict { PACKAGE_PIN G4 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_8 }];
#set_property -dict { PACKAGE_PIN B4 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_9 }];
#set_property -dict { PACKAGE_PIN B5 IOSTANDARD LVCMOS33 } [get_ports { pmod_j11_10 }];

################################################################################
# HDR 20X2 connector
################################################################################

## TODO

################################################################################
# Ethernet (generated by LiteX)
################################################################################

# eth_clocks:0.tx
set_property LOC M2 [get_ports {eth_clocks_tx}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_clocks_tx}]

# eth_clocks:0.gtx
set_property LOC U1 [get_ports {eth_clocks_gtx}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_clocks_gtx}]

# eth_clocks:0.rx
set_property LOC P4 [get_ports {eth_clocks_rx}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_clocks_rx}]

# eth:0.rst_n
set_property LOC R1 [get_ports {eth_rst_n}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rst_n}]

# eth:0.mdio
set_property LOC H1 [get_ports {eth_mdio}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_mdio}]

# eth:0.mdc
set_property LOC H2 [get_ports {eth_mdc}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_mdc}]

# eth:0.rx_dv
set_property LOC L3 [get_ports {eth_rx_dv}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_dv}]

# eth:0.rx_er
set_property LOC U5 [get_ports {eth_rx_er}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_er}]

# eth:0.rx_data
set_property LOC M4 [get_ports {eth_rx_data[0]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[0]}]

# eth:0.rx_data
set_property LOC N3 [get_ports {eth_rx_data[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[1]}]

# eth:0.rx_data
set_property LOC N4 [get_ports {eth_rx_data[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[2]}]

# eth:0.rx_data
set_property LOC P3 [get_ports {eth_rx_data[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[3]}]

# eth:0.rx_data
set_property LOC R3 [get_ports {eth_rx_data[4]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[4]}]

# eth:0.rx_data
set_property LOC T3 [get_ports {eth_rx_data[5]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[5]}]

# eth:0.rx_data
set_property LOC T4 [get_ports {eth_rx_data[6]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[6]}]

# eth:0.rx_data
set_property LOC T5 [get_ports {eth_rx_data[7]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_rx_data[7]}]

# eth:0.tx_en
set_property LOC T2 [get_ports {eth_tx_en}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_en}]

# eth:0.tx_er
set_property LOC J1 [get_ports {eth_tx_er}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_er}]

# eth:0.tx_data
set_property LOC R2 [get_ports {eth_tx_data[0]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[0]}]

# eth:0.tx_data
set_property LOC P1 [get_ports {eth_tx_data[1]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[1]}]

# eth:0.tx_data
set_property LOC N2 [get_ports {eth_tx_data[2]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[2]}]

# eth:0.tx_data
set_property LOC N1 [get_ports {eth_tx_data[3]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[3]}]

# eth:0.tx_data
set_property LOC M1 [get_ports {eth_tx_data[4]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[4]}]

# eth:0.tx_data
set_property LOC L2 [get_ports {eth_tx_data[5]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[5]}]

# eth:0.tx_data
set_property LOC K2 [get_ports {eth_tx_data[6]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[6]}]

# eth:0.tx_data
set_property LOC K1 [get_ports {eth_tx_data[7]}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_tx_data[7]}]

# eth:0.col
set_property LOC U4 [get_ports {eth_col}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_col}]

# eth:0.crs
set_property LOC U2 [get_ports {eth_crs}]
set_property IOSTANDARD LVCMOS33 [get_ports {eth_crs}]

################################################################################
# DRAM (generated by LiteX)
################################################################################

# ddram:0.a
set_property LOC E17 [get_ports {ddram_a[0]}]
set_property SLEW FAST [get_ports {ddram_a[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[0]}]

# ddram:0.a
set_property LOC G17 [get_ports {ddram_a[1]}]
set_property SLEW FAST [get_ports {ddram_a[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[1]}]

# ddram:0.a
set_property LOC F17 [get_ports {ddram_a[2]}]
set_property SLEW FAST [get_ports {ddram_a[2]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[2]}]

# ddram:0.a
set_property LOC C17 [get_ports {ddram_a[3]}]
set_property SLEW FAST [get_ports {ddram_a[3]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[3]}]

# ddram:0.a
set_property LOC G16 [get_ports {ddram_a[4]}]
set_property SLEW FAST [get_ports {ddram_a[4]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[4]}]

# ddram:0.a
set_property LOC D16 [get_ports {ddram_a[5]}]
set_property SLEW FAST [get_ports {ddram_a[5]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[5]}]

# ddram:0.a
set_property LOC H16 [get_ports {ddram_a[6]}]
set_property SLEW FAST [get_ports {ddram_a[6]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[6]}]

# ddram:0.a
set_property LOC E16 [get_ports {ddram_a[7]}]
set_property SLEW FAST [get_ports {ddram_a[7]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[7]}]

# ddram:0.a
set_property LOC H14 [get_ports {ddram_a[8]}]
set_property SLEW FAST [get_ports {ddram_a[8]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[8]}]

# ddram:0.a
set_property LOC F15 [get_ports {ddram_a[9]}]
set_property SLEW FAST [get_ports {ddram_a[9]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[9]}]

# ddram:0.a
set_property LOC F20 [get_ports {ddram_a[10]}]
set_property SLEW FAST [get_ports {ddram_a[10]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[10]}]

# ddram:0.a
set_property LOC H15 [get_ports {ddram_a[11]}]
set_property SLEW FAST [get_ports {ddram_a[11]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[11]}]

# ddram:0.a
set_property LOC C18 [get_ports {ddram_a[12]}]
set_property SLEW FAST [get_ports {ddram_a[12]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[12]}]

# ddram:0.a
set_property LOC G15 [get_ports {ddram_a[13]}]
set_property SLEW FAST [get_ports {ddram_a[13]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_a[13]}]

# ddram:0.ba
set_property LOC B17 [get_ports {ddram_ba[0]}]
set_property SLEW FAST [get_ports {ddram_ba[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ba[0]}]

# ddram:0.ba
set_property LOC D18 [get_ports {ddram_ba[1]}]
set_property SLEW FAST [get_ports {ddram_ba[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ba[1]}]

# ddram:0.ba
set_property LOC A17 [get_ports {ddram_ba[2]}]
set_property SLEW FAST [get_ports {ddram_ba[2]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ba[2]}]

# ddram:0.ras_n
set_property LOC A19 [get_ports {ddram_ras_n}]
set_property SLEW FAST [get_ports {ddram_ras_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_ras_n}]

# ddram:0.cas_n
set_property LOC B19 [get_ports {ddram_cas_n}]
set_property SLEW FAST [get_ports {ddram_cas_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_cas_n}]

# ddram:0.we_n
set_property LOC A18 [get_ports {ddram_we_n}]
set_property SLEW FAST [get_ports {ddram_we_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_we_n}]

# ddram:0.dm
set_property LOC A22 [get_ports {ddram_dm[0]}]
set_property SLEW FAST [get_ports {ddram_dm[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dm[0]}]

# ddram:0.dm
set_property LOC C22 [get_ports {ddram_dm[1]}]
set_property SLEW FAST [get_ports {ddram_dm[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dm[1]}]

# ddram:0.dq
set_property LOC D21 [get_ports {ddram_dq[0]}]
set_property SLEW FAST [get_ports {ddram_dq[0]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[0]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[0]}]

# ddram:0.dq
set_property LOC C21 [get_ports {ddram_dq[1]}]
set_property SLEW FAST [get_ports {ddram_dq[1]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[1]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[1]}]

# ddram:0.dq
set_property LOC B22 [get_ports {ddram_dq[2]}]
set_property SLEW FAST [get_ports {ddram_dq[2]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[2]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[2]}]

# ddram:0.dq
set_property LOC B21 [get_ports {ddram_dq[3]}]
set_property SLEW FAST [get_ports {ddram_dq[3]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[3]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[3]}]

# ddram:0.dq
set_property LOC D19 [get_ports {ddram_dq[4]}]
set_property SLEW FAST [get_ports {ddram_dq[4]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[4]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[4]}]

# ddram:0.dq
set_property LOC E20 [get_ports {ddram_dq[5]}]
set_property SLEW FAST [get_ports {ddram_dq[5]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[5]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[5]}]

# ddram:0.dq
set_property LOC C19 [get_ports {ddram_dq[6]}]
set_property SLEW FAST [get_ports {ddram_dq[6]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[6]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[6]}]

# ddram:0.dq
set_property LOC D20 [get_ports {ddram_dq[7]}]
set_property SLEW FAST [get_ports {ddram_dq[7]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[7]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[7]}]

# ddram:0.dq
set_property LOC C23 [get_ports {ddram_dq[8]}]
set_property SLEW FAST [get_ports {ddram_dq[8]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[8]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[8]}]

# ddram:0.dq
set_property LOC D23 [get_ports {ddram_dq[9]}]
set_property SLEW FAST [get_ports {ddram_dq[9]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[9]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[9]}]

# ddram:0.dq
set_property LOC B24 [get_ports {ddram_dq[10]}]
set_property SLEW FAST [get_ports {ddram_dq[10]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[10]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[10]}]

# ddram:0.dq
set_property LOC B25 [get_ports {ddram_dq[11]}]
set_property SLEW FAST [get_ports {ddram_dq[11]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[11]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[11]}]

# ddram:0.dq
set_property LOC C24 [get_ports {ddram_dq[12]}]
set_property SLEW FAST [get_ports {ddram_dq[12]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[12]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[12]}]

# ddram:0.dq
set_property LOC C26 [get_ports {ddram_dq[13]}]
set_property SLEW FAST [get_ports {ddram_dq[13]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[13]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[13]}]

# ddram:0.dq
set_property LOC A25 [get_ports {ddram_dq[14]}]
set_property SLEW FAST [get_ports {ddram_dq[14]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[14]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[14]}]

# ddram:0.dq
set_property LOC B26 [get_ports {ddram_dq[15]}]
set_property SLEW FAST [get_ports {ddram_dq[15]}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_dq[15]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dq[15]}]

# ddram:0.dqs_p
set_property LOC B20 [get_ports {ddram_dqs_p[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[0]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_p[0]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_p[0]}]

# ddram:0.dqs_p
set_property LOC A23 [get_ports {ddram_dqs_p[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_p[1]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_p[1]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_p[1]}]

# ddram:0.dqs_n
set_property LOC A20 [get_ports {ddram_dqs_n[0]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[0]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_n[0]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_n[0]}]

# ddram:0.dqs_n
set_property LOC A24 [get_ports {ddram_dqs_n[1]}]
set_property SLEW FAST [get_ports {ddram_dqs_n[1]}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_dqs_n[1]}]
set_property IN_TERM UNTUNED_SPLIT_40 [get_ports {ddram_dqs_n[1]}]

# ddram:0.clk_p
set_property LOC F18 [get_ports {ddram_clk_p}]
set_property SLEW FAST [get_ports {ddram_clk_p}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_clk_p}]

# ddram:0.clk_n
set_property LOC F19 [get_ports {ddram_clk_n}]
set_property SLEW FAST [get_ports {ddram_clk_n}]
set_property IOSTANDARD DIFF_SSTL135 [get_ports {ddram_clk_n}]

# ddram:0.cke
set_property LOC E18 [get_ports {ddram_cke}]
set_property SLEW FAST [get_ports {ddram_cke}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_cke}]

# ddram:0.odt
set_property LOC G19 [get_ports {ddram_odt}]
set_property SLEW FAST [get_ports {ddram_odt}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_odt}]

# ddram:0.reset_n
set_property LOC H17 [get_ports {ddram_reset_n}]
set_property SLEW FAST [get_ports {ddram_reset_n}]
set_property IOSTANDARD SSTL135 [get_ports {ddram_reset_n}]

################################################################################
# Design constraints and bitsteam attributes
################################################################################

set_property INTERNAL_VREF 0.675 [get_iobanks 16]

set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property CFGBVS VCCO [current_design]

set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design]
set_property BITSTREAM.CONFIG.CONFIGRATE 33 [current_design]
set_property CONFIG_MODE SPIx4 [current_design]

################################################################################
# Clock constraints
################################################################################

create_clock -name sys_clk_pin -period 20.00 [get_ports { ext_clk }];

create_clock -name eth_rx_clk -period 8.0 [get_nets has_liteeth.liteeth/eth_rx_clk]
create_clock -name eth_tx_clk -period 8.0 [get_nets has_liteeth.liteeth/eth_tx_clk]

set_clock_groups -group [get_clocks -include_generated_clocks -of [get_nets has_liteeth.liteeth/sys_clk]] -group [get_clocks -include_generated_clocks -of [get_nets has_liteeth.liteeth/eth_rx_clk]] -asynchronous

set_clock_groups -group [get_clocks -include_generated_clocks -of [get_nets has_liteeth.liteeth/sys_clk]] -group [get_clocks -include_generated_clocks -of [get_nets has_liteeth.liteeth/eth_tx_clk]] -asynchronous

set_clock_groups -group [get_clocks -include_generated_clocks -of [get_nets has_liteeth.liteeth/eth_rx_clk]] -group [get_clocks -include_generated_clocks -of [get_nets has_liteeth.liteeth/eth_tx_clk]] -asynchronous

################################################################################
# False path constraints (from LiteX as they relate to LiteDRAM and LiteEth)
################################################################################

set_false_path -quiet -through [get_nets -hierarchical -filter {mr_ff == TRUE}]

set_false_path -quiet -to [get_pins -filter {REF_PIN_NAME == PRE} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE || ars_ff2 == TRUE}]]

set_max_delay 2 -quiet -from [get_pins -filter {REF_PIN_NAME == C} -of_objects [get_cells -hierarchical -filter {ars_ff1 == TRUE}]] -to [get_pins -filter {REF_PIN_NAME == D} -of_objects [get_cells -hierarchical -filter {ars_ff2 == TRUE}]]

2573
fpu.vhdl

File diff suppressed because it is too large Load Diff

@ -6,33 +6,33 @@ library work;
use work.glibc_random_helpers.all;

package glibc_random is
function pseudorand(a: integer) return std_ulogic_vector;
function pseudorand1 return std_ulogic;
function pseudorand(a: integer) return std_ulogic_vector;
function pseudorand1 return std_ulogic;
end package glibc_random;

package body glibc_random is
function pseudorand(a: integer) return std_ulogic_vector is
variable tmp1, tmp2, tmp3, tmp4: std_ulogic_vector(31 downto 0);
variable ret: std_ulogic_vector(63 downto 0);
begin
tmp1 := std_ulogic_vector(to_unsigned(random, 32));
tmp2 := std_ulogic_vector(to_unsigned(random, 32));
if a <= 32 then
ret := tmp1 & tmp2;
else
tmp3 := std_ulogic_vector(to_unsigned(random, 32));
tmp4 := std_ulogic_vector(to_unsigned(random, 32));
function pseudorand(a: integer) return std_ulogic_vector is
variable tmp1, tmp2, tmp3, tmp4: std_ulogic_vector(31 downto 0);
variable ret: std_ulogic_vector(63 downto 0);
begin
tmp1 := std_ulogic_vector(to_unsigned(random, 32));
tmp2 := std_ulogic_vector(to_unsigned(random, 32));
if a <= 32 then
ret := tmp1 & tmp2;
else
tmp3 := std_ulogic_vector(to_unsigned(random, 32));
tmp4 := std_ulogic_vector(to_unsigned(random, 32));

ret := tmp1(15 downto 0) & tmp2(15 downto 0) & tmp3(15 downto 0) & tmp4(15 downto 0);
end if;
ret := tmp1(15 downto 0) & tmp2(15 downto 0) & tmp3(15 downto 0) & tmp4(15 downto 0);
end if;

return ret((a-1) downto 0);
end;
return ret((a-1) downto 0);
end;

function pseudorand1 return std_ulogic is
variable tmp: std_ulogic_vector(31 downto 0);
begin
tmp := std_ulogic_vector(to_unsigned(random, 32));
return tmp(0);
end;
function pseudorand1 return std_ulogic is
variable tmp: std_ulogic_vector(31 downto 0);
begin
tmp := std_ulogic_vector(to_unsigned(random, 32));
return tmp(0);
end;
end package body glibc_random;

@ -1,19 +1,19 @@
package glibc_random_helpers is
procedure srand (v : integer);
attribute foreign of srand : procedure is "VHPIDIRECT srand";
procedure srand (v : integer);
attribute foreign of srand : procedure is "VHPIDIRECT srand";

function random return integer;
attribute foreign of random : function is "VHPIDIRECT random";
function random return integer;
attribute foreign of random : function is "VHPIDIRECT random";
end glibc_random_helpers;

package body glibc_random_helpers is
procedure srand (v : integer) is
begin
assert false severity failure;
end srand;
procedure srand (v : integer) is
begin
assert false severity failure;
end srand;

function random return integer is
begin
assert false severity failure;
end random;
function random return integer is
begin
assert false severity failure;
end random;
end glibc_random_helpers;

@ -1,99 +0,0 @@
-- GPIO module for microwatt
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.wishbone_types.all;

entity gpio is
generic (
NGPIO : integer := 32
);
port (
clk : in std_ulogic;
rst : in std_ulogic;

-- Wishbone
wb_in : in wb_io_master_out;
wb_out : out wb_io_slave_out;

-- GPIO lines
gpio_in : in std_ulogic_vector(NGPIO - 1 downto 0);
gpio_out : out std_ulogic_vector(NGPIO - 1 downto 0);
-- 1 = output, 0 = input
gpio_dir : out std_ulogic_vector(NGPIO - 1 downto 0);

-- Interrupt
intr : out std_ulogic
);
end entity gpio;

architecture behaviour of gpio is
constant GPIO_REG_BITS : positive := 5;

-- Register addresses, matching addr downto 2, so 4 bytes per reg
constant GPIO_REG_DATA_OUT : std_ulogic_vector(GPIO_REG_BITS-1 downto 0) := "00000";
constant GPIO_REG_DATA_IN : std_ulogic_vector(GPIO_REG_BITS-1 downto 0) := "00001";
constant GPIO_REG_DIR : std_ulogic_vector(GPIO_REG_BITS-1 downto 0) := "00010";
constant GPIO_REG_DATA_SET : std_ulogic_vector(GPIO_REG_BITS-1 downto 0) := "00100";
constant GPIO_REG_DATA_CLR : std_ulogic_vector(GPIO_REG_BITS-1 downto 0) := "00101";

-- Current output value and direction
signal reg_data : std_ulogic_vector(NGPIO - 1 downto 0);
signal reg_dirn : std_ulogic_vector(NGPIO - 1 downto 0);
signal reg_in1 : std_ulogic_vector(NGPIO - 1 downto 0);
signal reg_in2 : std_ulogic_vector(NGPIO - 1 downto 0);

signal wb_rsp : wb_io_slave_out;
signal reg_out : std_ulogic_vector(NGPIO - 1 downto 0);

begin

-- No interrupt facility for now
intr <= '0';

gpio_out <= reg_data;
gpio_dir <= reg_dirn;

-- Wishbone response
wb_rsp.ack <= wb_in.cyc and wb_in.stb;
with wb_in.adr(GPIO_REG_BITS - 1 downto 0) select reg_out <=
reg_data when GPIO_REG_DATA_OUT,
reg_in2 when GPIO_REG_DATA_IN,
reg_dirn when GPIO_REG_DIR,
(others => '0') when others;
wb_rsp.dat(wb_rsp.dat'left downto NGPIO) <= (others => '0');
wb_rsp.dat(NGPIO - 1 downto 0) <= reg_out;
wb_rsp.stall <= '0';

regs_rw: process(clk)
begin
if rising_edge(clk) then
wb_out <= wb_rsp;
reg_in2 <= reg_in1;
reg_in1 <= gpio_in;
if rst = '1' then
reg_data <= (others => '0');
reg_dirn <= (others => '0');
wb_out.ack <= '0';
else
if wb_in.cyc = '1' and wb_in.stb = '1' and wb_in.we = '1' then
case wb_in.adr(GPIO_REG_BITS - 1 downto 0) is
when GPIO_REG_DATA_OUT =>
reg_data <= wb_in.dat(NGPIO - 1 downto 0);
when GPIO_REG_DIR =>
reg_dirn <= wb_in.dat(NGPIO - 1 downto 0);
when GPIO_REG_DATA_SET =>
reg_data <= reg_data or wb_in.dat(NGPIO - 1 downto 0);
when GPIO_REG_DATA_CLR =>
reg_data <= reg_data and not wb_in.dat(NGPIO - 1 downto 0);
when others =>
end case;
end if;
end if;
end if;
end process;

end architecture behaviour;

@ -1,34 +1,28 @@
ARCH = $(shell uname -m)
ifneq ("$(ARCH)", "ppc64")
ifneq ("$(ARCH)", "ppc64le")
CROSS_COMPILE ?= powerpc64le-linux-gnu-
endif
endif
CROSS_COMPILE = powerpc64le-linux-
endif
endif

CC = $(CROSS_COMPILE)gcc
LD = $(CROSS_COMPILE)ld
OBJCOPY = $(CROSS_COMPILE)objcopy

CFLAGS = -Os -g -Wall -std=c99 -msoft-float -mno-string -mno-multiple -mno-vsx -mno-altivec -mlittle-endian -fno-stack-protector -mstrict-align -ffreestanding -fdata-sections -ffunction-sections -I../include
CFLAGS = -Os -g -Wall -std=c99 -msoft-float -mno-string -mno-multiple -mno-vsx -mno-altivec -mlittle-endian -fno-stack-protector -mstrict-align -ffreestanding -fdata-sections -ffunction-sections
ASFLAGS = $(CFLAGS)
LDFLAGS = -T powerpc.lds

all: hello_world.hex

console.o: ../lib/console.c
$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

hello_world.elf: hello_world.o head.o console.o
$(LD) $(LDFLAGS) -o $@ $^
hello_world.elf: hello_world.o head.o
$(LD) $(LDFLAGS) -o hello_world.elf hello_world.o head.o

hello_world.bin: hello_world.elf
$(OBJCOPY) -O binary $^ $@
$(OBJCOPY) -O binary hello_world.elf hello_world.bin

hello_world.hex: hello_world.bin
../scripts/bin2hex.py $^ > $@
./bin2hex.py hello_world.bin > hello_world.hex

clean:
@rm -f *.o hello_world.elf hello_world.bin hello_world.hex
distclean: clean
rm -f *~


@ -14,7 +14,7 @@
* limitations under the License.
*/

#define STACK_TOP 0x2000
#define STACK_TOP 0x3000

#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
@ -40,45 +40,19 @@

.section ".head","ax"

/*
* Microwatt currently enters in LE mode at 0x0, so we don't need to
* do any endian fix ups>
*/
. = 0
.global _start
_start:
b boot_entry

/* QEMU enters at 0x10 */
. = 0x10
FIXUP_ENDIAN
b boot_entry

. = 0x100
FIXUP_ENDIAN
b boot_entry

.global boot_entry
boot_entry:
LOAD_IMM64(%r10,__bss_start)
LOAD_IMM64(%r11,__bss_end)
subf %r11,%r10,%r11
addi %r11,%r11,63
srdi. %r11,%r11,6
beq 2f
mtctr %r11
1: dcbz 0,%r10
addi %r10,%r10,64
bdnz 1b

/* setup stack */
2: LOAD_IMM64(%r1,__stack_top)
li %r0,0
stdu %r0,-32(%r1)
LOAD_IMM64(%r1, STACK_TOP - 0x100)
LOAD_IMM64(%r12, main)
mtctr %r12
mtctr %r12,
bctrl
attn // terminate on exit
b .

#define EXCEPTION(nr) \

Binary file not shown.

@ -1,30 +1,146 @@
#include <unistd.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>

#include "console.h"
/*
* Core UART functions to implement for a port
*/

static char mw_logo[] =
static uint64_t potato_uart_base;

"\n"
" .oOOo. \n"
" .\" \". \n"
" ; .mw. ; Microwatt, it works.\n"
" . ' ' . \n"
" \\ || / \n"
" ;..; \n"
" ;..; \n"
" `ww' \n";
#define PROC_FREQ 50000000
#define UART_FREQ 115200
#define UART_BASE 0xc0002000

#define POTATO_CONSOLE_TX 0x00
#define POTATO_CONSOLE_RX 0x08
#define POTATO_CONSOLE_STATUS 0x10
#define POTATO_CONSOLE_STATUS_RX_EMPTY 0x01
#define POTATO_CONSOLE_STATUS_TX_EMPTY 0x02
#define POTATO_CONSOLE_STATUS_RX_FULL 0x04
#define POTATO_CONSOLE_STATUS_TX_FULL 0x08
#define POTATO_CONSOLE_CLOCK_DIV 0x18
#define POTATO_CONSOLE_IRQ_EN 0x20

static uint64_t potato_uart_reg_read(int offset)
{
uint64_t addr;
uint64_t val;

addr = potato_uart_base + offset;

val = *(volatile uint64_t *)addr;

return val;
}

static void potato_uart_reg_write(int offset, uint64_t val)
{
uint64_t addr;

addr = potato_uart_base + offset;

*(volatile uint64_t *)addr = val;
}

static int potato_uart_rx_empty(void)
{
uint64_t val;

val = potato_uart_reg_read(POTATO_CONSOLE_STATUS);

if (val & POTATO_CONSOLE_STATUS_RX_EMPTY)
return 1;

return 0;
}

static int potato_uart_tx_full(void)
{
uint64_t val;

val = potato_uart_reg_read(POTATO_CONSOLE_STATUS);

if (val & POTATO_CONSOLE_STATUS_TX_FULL)
return 1;

return 0;
}

static char potato_uart_read(void)
{
uint64_t val;

val = potato_uart_reg_read(POTATO_CONSOLE_RX);

return (char)(val & 0x000000ff);
}

static void potato_uart_write(char c)
{
uint64_t val;

val = c;

potato_uart_reg_write(POTATO_CONSOLE_TX, val);
}

static unsigned long potato_uart_divisor(unsigned long proc_freq, unsigned long uart_freq)
{
return proc_freq / (uart_freq * 16) - 1;
}

void potato_uart_init(void)
{
potato_uart_base = UART_BASE;

potato_uart_reg_write(POTATO_CONSOLE_CLOCK_DIV, potato_uart_divisor(PROC_FREQ, UART_FREQ));
}

int getchar(void)
{
while (potato_uart_rx_empty())
/* Do nothing */ ;

return potato_uart_read();
}

void putchar(unsigned char c)
{
while (potato_uart_tx_full())
/* Do Nothing */;

potato_uart_write(c);
}

void putstr(const char *str, unsigned long len)
{
for (unsigned long i = 0; i < len; i++) {
putchar(str[i]);
}
}

size_t strlen(const char *s)
{
size_t len = 0;

while (*s++)
len++;

return len;
}

#define HELLO_WORLD "Hello World\r\n"

int main(void)
{
console_init();
potato_uart_init();

puts(mw_logo);
putstr(HELLO_WORLD, strlen(HELLO_WORLD));

while (1) {
unsigned char c = getchar();
putchar(c);
if (c == 13) // if CR send LF
putchar(10);
}
}

Binary file not shown.

@ -1,785 +0,0 @@
000000004800012c
0000000000000000
4800002408000048
01006b69a600607d
a602487d05009f42
a64b5a7d14004a39
2402004ca64b7b7d
00000000480000f4
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
4800002408000048
01006b69a600607d
a602487d05009f42
a64b5a7d14004a39
2402004ca64b7b7d
3d40000048000004
794a07c6614a0000
614a1900654a0000
616b00003d600000
656b0000796b07c6
7d6a5850616b1980
796bd183396b003f
7d6903a641820014
394a00407c0057ec
3c2000004200fff8
782107c660210000
6021398064210000
f801ffe138000000
3d8000007c1243a6
798c07c6618c0000
618c1000658c0000
4e8004217d8903a6
4800000000000200
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000048000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
384298003c400001
fbe1fff87c0802a6
f821ffd1f8010010
60000000480001ed
3862800060000000
6000000048000155
6000000048000049
5463063e7c7f1b78
480000b957ff063e
2c1f000d60000000
3860000a4082ffe0
60000000480000a5
000000004bffffd0
0000018001000000
384298003c400001
8922810860000000
3942810060000000
418200302c090000
39290014e92a0000
7d204eaa7c0004ac
4182ffec71290001
7c0004ace86a0000
5463063e7c601eaa
e92a00004e800020
7c0004ac39290010
712900017d204eea
e86a00004082ffec
7c0004ac38630008
4bffffd07c601eea
0000000000000000
3c40000100000000
6000000038429800
6000000089228108
2c09000039428100
e92a00004182002c
7c0004ac39290014
712900207d204eaa
e92a00004182ffec
7c604faa7c0004ac
e92a00004e800020
7c0004ac39290010
712900087d204eea
5469063e4082ffec
7c0004ace94a0000
4e8000207d2057ea
0000000000000000
3c40000100000000
7c0802a638429800
fbc1fff0fbe1fff8
f80100103be3ffff
8fdf0001f821ffd1
408200102c3e0000
3860000038210030
281e000a480001e8
3860000d4082000c
7fc3f3784bffff45
4bffffd04bffff3d
0100000000000000
7c691b7800000280
7d4918ae38600000
4d8200202c0a0000
4bfffff038630001
0000000000000000
3c40000100000000
3d40c00038429800
794a0020614a0020
7d4056ea7c0004ac
794a06003d20c000
7929002061290008
7d204eea7c0004ac
4182001871290020
612900403d20c000
7c0004ac79290020
7929f8047d204eea
79290fc33d00c000
7908002061082000
f902810060000000
610820003d00001c
418200847d4a4392
3920000160000000
3d00c00099228108
3920ff806108200c
7c0004ac79080020
e92281007d2047aa
7d404faa7c0004ac
794ac202e9228100
7c0004ac39290004
e92281007d404faa
3929000c39400003
7d404faa7c0004ac
39290010e9228100
7d404faa7c0004ac
39400007e9228100
7c0004ac39290008
4e8000207d404faa
394affff60000000
3d20c00099228108
7929002061292018
7d404fea7c0004ac
000000004e800020
0000000000000000
384298003c400001
8922810860000000
600000002c090000
41820024e9228100
78840e282c230000
6084000141820008
7c0004ac39290004
4e8000207c804faa
418200082c240000
3929002060630002
7c604fea7c0004ac
000000004e800020
0000000000000000
e8010010ebc1fff0
7c0803a6ebe1fff8
000000104e800020
00527a0100000000
00010c1b01417804
0000001800000018
00000070fffffc40
9f7e4111300e4600
0000001000000001
00527a0100000000
00010c1b01417804
0000001800000010
00000084fffffc80
0000001000000000
fffffcf00000002c
0000000000000080
0000004000000028
00000060fffffd5c
9e019f0041094500
447e4111300e4302
4106dedf42000e0a
000000100000000b
fffffd900000006c
0000000000000028
0000008000000010
0000012cfffffda4
0000001000000000
fffffebc00000094
0000000000000068
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
4f4f6f2e2020200a
0a20202020202e6f
2020202020222e20
203b200a202e2220
3b20202e776d2e20
6f7263694d202020
7469202c74746177
0a2e736b726f7720
27202027202e2020
200a202020202e20
2f207c7c205c2020
2020200a20202020
2020203b2e2e3b20
202020200a202020
202020203b2e2e3b
60202020200a2020
000a202020277777

@ -1,27 +1,13 @@
SECTIONS
{
. = 0;
_start = .;
. = 0;
.head : {
KEEP(*(.head))
}
}
. = 0x1000;
.text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) }
. = 0x1800;
.data : { *(.data) *(.data.*) *(.got) *(.toc) }
. = ALIGN(0x80);
__bss_start = .;
.bss : {
*(.dynsbss)
*(.sbss)
*(.scommon)
*(.dynbss)
*(.bss)
*(.common)
*(.bss.*)
}
. = ALIGN(0x80);
__bss_end = .;
. = . + 0x2000;
__stack_top = .;
.text : { *(.text) }
. = 0x2000;
.data : { *(.data) }
.bss : { *(.bss) }
}

@ -5,294 +5,205 @@ use ieee.numeric_std.all;
library work;

package helpers is
function fls_32 (val: std_ulogic_vector(31 downto 0)) return integer;
function ffs_32 (val: std_ulogic_vector(31 downto 0)) return integer;
function fls_32 (val: std_ulogic_vector(31 downto 0)) return integer;
function ffs_32 (val: std_ulogic_vector(31 downto 0)) return integer;

function fls_64 (val: std_ulogic_vector(63 downto 0)) return integer;
function ffs_64 (val: std_ulogic_vector(63 downto 0)) return integer;
function fls_64 (val: std_ulogic_vector(63 downto 0)) return integer;
function ffs_64 (val: std_ulogic_vector(63 downto 0)) return integer;

function popcnt8(val: std_ulogic_vector(7 downto 0)) return std_ulogic_vector;
function popcnt32(val: std_ulogic_vector(31 downto 0)) return std_ulogic_vector;
function popcnt64(val: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function popcnt8(val: std_ulogic_vector(7 downto 0)) return std_ulogic_vector;
function popcnt32(val: std_ulogic_vector(31 downto 0)) return std_ulogic_vector;
function popcnt64(val: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;

function cmp_one_byte(a, b: std_ulogic_vector(7 downto 0)) return std_ulogic_vector;
function cmp_one_byte(a, b: std_ulogic_vector(7 downto 0)) return std_ulogic_vector;

function ppc_signed_compare(a, b: signed(63 downto 0); so: std_ulogic) return std_ulogic_vector;
function ppc_unsigned_compare(a, b: unsigned(63 downto 0); so: std_ulogic) return std_ulogic_vector;
function ppc_signed_compare(a, b: signed(63 downto 0)) return std_ulogic_vector;
function ppc_unsigned_compare(a, b: unsigned(63 downto 0)) return std_ulogic_vector;

function ra_or_zero(ra: std_ulogic_vector(63 downto 0); reg: std_ulogic_vector(4 downto 0)) return std_ulogic_vector;
function ra_or_zero(ra: std_ulogic_vector(63 downto 0); reg: std_ulogic_vector(4 downto 0)) return std_ulogic_vector;

function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector;
function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector;

function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector;

function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector;
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function edgelocation(v: std_ulogic_vector; nbits: natural) return std_ulogic_vector;
function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
function count_right_zeroes(val: std_ulogic_vector) return std_ulogic_vector;
function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector;
end package helpers;

package body helpers is
function fls_32 (val: std_ulogic_vector(31 downto 0)) return integer is
variable ret: integer;
begin
ret := 32;
for i in val'range loop
if val(i) = '1' then
ret := 31 - i;
exit;
end if;
end loop;

return ret;
end;

function ffs_32 (val: std_ulogic_vector(31 downto 0)) return integer is
variable ret: integer;
begin
ret := 32;
for i in val'reverse_range loop
if val(i) = '1' then
ret := i;
exit;
end if;
end loop;

return ret;
end;

function fls_64 (val: std_ulogic_vector(63 downto 0)) return integer is
variable ret: integer;
begin
ret := 64;
for i in val'range loop
if val(i) = '1' then
ret := 63 - i;
exit;
end if;
end loop;

return ret;
end;

function ffs_64 (val: std_ulogic_vector(63 downto 0)) return integer is
variable ret: integer;
begin
ret := 64;
for i in val'reverse_range loop
if val(i) = '1' then
ret := i;
exit;
end if;
end loop;

return ret;
end;

function popcnt8(val: std_ulogic_vector(7 downto 0)) return std_ulogic_vector is
variable ret: unsigned(3 downto 0) := (others => '0');
begin
for i in val'range loop
ret := ret + ("000" & val(i));
end loop;

return std_ulogic_vector(resize(ret, val'length));
end;

function popcnt32(val: std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
variable ret: unsigned(5 downto 0) := (others => '0');
begin
for i in val'range loop
ret := ret + ("00000" & val(i));
end loop;

return std_ulogic_vector(resize(ret, val'length));
end;

function popcnt64(val: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: unsigned(6 downto 0) := (others => '0');
begin
for i in val'range loop
ret := ret + ("000000" & val(i));
end loop;

return std_ulogic_vector(resize(ret, val'length));
end;

function cmp_one_byte(a, b: std_ulogic_vector(7 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(7 downto 0);
begin
if a = b then
ret := x"ff";
else
ret := x"00";
end if;

return ret;
end;

function ppc_signed_compare(a, b: signed(63 downto 0); so: std_ulogic) return std_ulogic_vector is
variable ret: std_ulogic_vector(2 downto 0);
begin
if a < b then
ret := "100";
elsif a > b then
ret := "010";
else
ret := "001";
end if;

return ret & so;
end;

function ppc_unsigned_compare(a, b: unsigned(63 downto 0); so: std_ulogic) return std_ulogic_vector is
variable ret: std_ulogic_vector(2 downto 0);
begin
if a < b then
ret := "100";
elsif a > b then
ret := "010";
else
ret := "001";
end if;

return ret & so;
end;

function ra_or_zero(ra: std_ulogic_vector(63 downto 0); reg: std_ulogic_vector(4 downto 0)) return std_ulogic_vector is
begin
if to_integer(unsigned(reg)) = 0 then
return x"0000000000000000";
else
return ra;
end if;
end;

function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0) := (others => '0');
begin
-- Vivado doesn't support non constant vector slices, so we have to code
-- each of these.
case_0: case size is
when 2 =>
for_2 : for k in 0 to 1 loop
ret(((8*k)+7) downto (8*k)) := val((8*(1-k)+7) downto (8*(1-k)));
end loop;
when 4 =>
for_4 : for k in 0 to 3 loop
ret(((8*k)+7) downto (8*k)) := val((8*(3-k)+7) downto (8*(3-k)));
end loop;
when 8 =>
for_8 : for k in 0 to 7 loop
ret(((8*k)+7) downto (8*k)) := val((8*(7-k)+7) downto (8*(7-k)));
end loop;
when others =>
report "bad byte reverse length " & integer'image(size) severity failure;
end case;

return ret;
end;

function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector is
variable ret : signed(63 downto 0) := (others => '0');
variable upper : integer := 0;
begin
case_0: case size is
when 2 =>
ret := resize(signed(val(15 downto 0)), 64);
when 4 =>
ret := resize(signed(val(31 downto 0)), 64);
when 8 =>
ret := resize(signed(val(63 downto 0)), 64);
when others =>
report "bad byte reverse length " & integer'image(size) severity failure;
end case;

return std_ulogic_vector(ret);

end;

-- Reverse the order of bits in a word
function bit_reverse(a: std_ulogic_vector) return std_ulogic_vector is
variable ret: std_ulogic_vector(a'left downto a'right);
begin
for i in a'right to a'left loop
ret(a'left + a'right - i) := a(i);
end loop;
return ret;
end;

-- If there is only one bit set in a doubleword, return its bit number
-- (counting from the right). Each bit of the result is obtained by
-- ORing together 32 bits of the input:
-- bit 0 = a[1] or a[3] or a[5] or ...
-- bit 1 = a[2] or a[3] or a[6] or a[7] or ...
-- bit 2 = a[4..7] or a[12..15] or ...
-- bit 5 = a[32..63] ORed together
function bit_number(a: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(5 downto 0);
variable stride: natural;
variable bit: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to 5 loop
bit := '0';
for j in 0 to (64 / stride) - 1 loop
k := j * stride;
bit := bit or (or a(k + stride - 1 downto k + (stride / 2)));
end loop;
ret(i) := bit;
stride := stride * 2;
end loop;
return ret;
end;

-- Assuming the input 'v' is a value of the form 1...10...0,
-- the output is the bit number of the rightmost 1 bit in v.
-- If v is zero, the result is zero.
function edgelocation(v: std_ulogic_vector; nbits: natural) return std_ulogic_vector is
variable p: std_ulogic_vector(nbits - 1 downto 0);
variable stride: natural;
variable b: std_ulogic;
variable k: natural;
begin
stride := 2;
for i in 0 to nbits - 1 loop
b := '0';
for j in 0 to (2**nbits / stride) - 1 loop
k := j * stride;
b := b or (v(k + stride - 1) and not v(k + (stride/2) - 1));
end loop;
p(i) := b;
stride := stride * 2;
end loop;
return p;
end function;

-- Count leading zeroes operations
-- Assumes the value passed in is not zero (if it is, zero is returned)
function count_right_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable sum: std_ulogic_vector(val'left downto val'right);
variable onehot: std_ulogic_vector(val'left downto val'right);
variable edge: std_ulogic_vector(val'left downto val'right);
variable bn, bn_e, bn_o: std_ulogic_vector(5 downto 0);
begin
sum := std_ulogic_vector(- signed(val));
onehot := sum and val;
edge := sum or val;
bn_e := edgelocation(std_ulogic_vector(resize(signed(edge), 64)), 6);
bn_o := bit_number(std_ulogic_vector(resize(unsigned(onehot), 64)));
bn := bn_e(5 downto 2) & bn_o(1 downto 0);
return bn;
end;

function count_left_zeroes(val: std_ulogic_vector) return std_ulogic_vector is
variable rev: std_ulogic_vector(val'left downto val'right);
begin
rev := bit_reverse(val);
return count_right_zeroes(rev);
end;

function fls_32 (val: std_ulogic_vector(31 downto 0)) return integer is
variable ret: integer;
begin
ret := 32;
for i in val'range loop
if val(i) = '1' then
ret := 31 - i;
exit;
end if;
end loop;

return ret;
end;

function ffs_32 (val: std_ulogic_vector(31 downto 0)) return integer is
variable ret: integer;
begin
ret := 32;
for i in val'reverse_range loop
if val(i) = '1' then
ret := i;
exit;
end if;
end loop;

return ret;
end;

function fls_64 (val: std_ulogic_vector(63 downto 0)) return integer is
variable ret: integer;
begin
ret := 64;
for i in val'range loop
if val(i) = '1' then
ret := 63 - i;
exit;
end if;
end loop;

return ret;
end;

function ffs_64 (val: std_ulogic_vector(63 downto 0)) return integer is
variable ret: integer;
begin
ret := 64;
for i in val'reverse_range loop
if val(i) = '1' then
ret := i;
exit;
end if;
end loop;

return ret;
end;

function popcnt8(val: std_ulogic_vector(7 downto 0)) return std_ulogic_vector is
variable ret: unsigned(3 downto 0) := (others => '0');
begin
for i in val'range loop
ret := ret + ("000" & val(i));
end loop;

return std_ulogic_vector(resize(ret, val'length));
end;

function popcnt32(val: std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
variable ret: unsigned(5 downto 0) := (others => '0');
begin
for i in val'range loop
ret := ret + ("00000" & val(i));
end loop;

return std_ulogic_vector(resize(ret, val'length));
end;

function popcnt64(val: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable ret: unsigned(6 downto 0) := (others => '0');
begin
for i in val'range loop
ret := ret + ("000000" & val(i));
end loop;

return std_ulogic_vector(resize(ret, val'length));
end;

function cmp_one_byte(a, b: std_ulogic_vector(7 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(7 downto 0);
begin
if a = b then
ret := x"ff";
else
ret := x"00";
end if;

return ret;
end;

function ppc_signed_compare(a, b: signed(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(3 downto 0);
begin
if a < b then
ret := "1000";
elsif a > b then
ret := "0100";
else
ret := "0010";
end if;

return ret;
end;

function ppc_unsigned_compare(a, b: unsigned(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(3 downto 0);
begin
if a < b then
ret := "1000";
elsif a > b then
ret := "0100";
else
ret := "0010";
end if;

return ret;
end;

function ra_or_zero(ra: std_ulogic_vector(63 downto 0); reg: std_ulogic_vector(4 downto 0)) return std_ulogic_vector is
begin
if to_integer(unsigned(reg)) = 0 then
return x"0000000000000000";
else
return ra;
end if;
end;

function byte_reverse(val: std_ulogic_vector(63 downto 0); size: integer) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0) := (others => '0');
begin
-- Vivado doesn't support non constant vector slices, so we have to code
-- each of these.
case_0: case size is
when 2 =>
for_2 : for k in 0 to 1 loop
ret(((8*k)+7) downto (8*k)) := val((8*(1-k)+7) downto (8*(1-k)));
end loop;
when 4 =>
for_4 : for k in 0 to 3 loop
ret(((8*k)+7) downto (8*k)) := val((8*(3-k)+7) downto (8*(3-k)));
end loop;
when 8 =>
for_8 : for k in 0 to 7 loop
ret(((8*k)+7) downto (8*k)) := val((8*(7-k)+7) downto (8*(7-k)));
end loop;
when others =>
report "bad byte reverse length " & integer'image(size) severity failure;
end case;

return ret;
end;

function sign_extend(val: std_ulogic_vector(63 downto 0); size: natural) return std_ulogic_vector is
variable ret : signed(63 downto 0) := (others => '0');
variable upper : integer := 0;
begin
case_0: case size is
when 2 =>
ret := resize(signed(val(15 downto 0)), 64);
when 4 =>
ret := resize(signed(val(31 downto 0)), 64);
when 8 =>
ret := resize(signed(val(63 downto 0)), 64);
when others =>
report "bad byte reverse length " & integer'image(size) severity failure;
end case;

return std_ulogic_vector(ret);

end;
end package body helpers;

@ -1,27 +1,8 @@
--
-- Set associative icache
--
-- TODO (in no specific order):
--
-- * Add debug interface to inspect cache content
-- * Add snoop/invalidate path
-- * Add multi-hit error detection
-- * Pipelined bus interface (wb or axi)
-- * Maybe add parity ? There's a few bits free in each BRAM row on Xilinx
-- * Add optimization: service hits on partially loaded lines
-- * Add optimization: (maybe) interrupt reload on fluch/redirect
-- * Check if playing with the geometry of the cache tags allow for more
-- efficient use of distributed RAM and less logic/muxes. Currently we
-- write TAG_BITS width which may not match full ram blocks and might
-- cause muxes to be inferred for "partial writes".
-- * Check if making the read size of PLRU a ROM helps utilization
--
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;
use work.wishbone_types.all;

@ -29,802 +10,164 @@ use work.wishbone_types.all;

entity icache is
generic (
SIM : boolean := false;
-- Line size in bytes
LINE_SIZE : positive := 64;
-- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line"
--
-- ROW_SIZE is the width in bytes of the BRAM (based on WB, so 64-bits)
ROW_SIZE : positive := wishbone_data_bits / 8;
-- Number of lines in a set
NUM_LINES : positive := 32;
-- Number of ways
NUM_WAYS : positive := 4;
-- L1 ITLB number of entries (direct mapped)
TLB_SIZE : positive := 64;
-- L1 ITLB log_2(page_size)
TLB_LG_PGSZ : positive := 12;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
-- Line size in 64bit doublewords
LINE_SIZE_DW : natural := 8;
-- Number of lines
NUM_LINES : natural := 32
);
port (
clk : in std_ulogic;
rst : in std_ulogic;

i_in : in Fetch1ToIcacheType;
i_out : out IcacheToDecode1Type;

m_in : in MmuToIcacheType;

stall_in : in std_ulogic;
stall_out : out std_ulogic;
flush_in : in std_ulogic;
inval_in : in std_ulogic;
i_in : in Fetch2ToIcacheType;
i_out : out IcacheToFetch2Type;

wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out;

wb_snoop_in : in wishbone_master_out := wishbone_master_out_init;

events : out IcacheEventType;
log_out : out std_ulogic_vector(53 downto 0)
wishbone_in : in wishbone_slave_out
);
end entity icache;

architecture rtl of icache is
constant ROW_SIZE_BITS : natural := ROW_SIZE*8;
-- ROW_PER_LINE is the number of row (wishbone transactions) in a line
constant ROW_PER_LINE : natural := LINE_SIZE / ROW_SIZE;
-- BRAM_ROWS is the number of rows in BRAM needed to represent the full
-- icache
constant BRAM_ROWS : natural := NUM_LINES * ROW_PER_LINE;
-- INSN_PER_ROW is the number of 32bit instructions per BRAM row
constant INSN_PER_ROW : natural := ROW_SIZE_BITS / 32;
-- Bit fields counts in the address

-- INSN_BITS is the number of bits to select an instruction in a row
constant INSN_BITS : natural := log2(INSN_PER_ROW);
-- ROW_BITS is the number of bits to select a row
constant ROW_BITS : natural := log2(BRAM_ROWS);
-- ROW_LINEBITS is the number of bits to select a row within a line
constant ROW_LINEBITS : natural := log2(ROW_PER_LINE);
-- LINE_OFF_BITS is the number of bits for the offset in a cache line
constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
-- ROW_OFF_BITS is the number of bits for the offset in a row
constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
-- INDEX_BITS is the number of bits to select a cache line
constant INDEX_BITS : natural := log2(NUM_LINES);
-- SET_SIZE_BITS is the log base 2 of the set size
constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
-- TAG_BITS is the number of bits of the tag part of the address
-- the +1 is to allow the endianness to be stored in the tag
constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS + 1;
-- WAY_BITS is the number of bits to select a way
constant WAY_BITS : natural := log2(NUM_WAYS);

-- Example of layout for 32 lines of 64 bytes:
--
-- .. tag |index| line |
-- .. | row | |
-- .. | | | |00| zero (2)
-- .. | | |-| | INSN_BITS (1)
-- .. | |---| | ROW_LINEBITS (3)
-- .. | |--- - --| LINE_OFF_BITS (6)
-- .. | |- --| ROW_OFF_BITS (3)
-- .. |----- ---| | ROW_BITS (8)
-- .. |-----| | INDEX_BITS (5)
-- .. --------| | TAG_BITS (53)
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

subtype row_t is integer range 0 to BRAM_ROWS-1;
subtype index_t is integer range 0 to NUM_LINES-1;
subtype way_t is integer range 0 to NUM_WAYS-1;
subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

-- The cache data BRAM organized as described above for each way
subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
constant LINE_SIZE : natural := LINE_SIZE_DW*8;
constant OFFSET_BITS : natural := log2(LINE_SIZE);
constant INDEX_BITS : natural := log2(NUM_LINES);
constant TAG_BITS : natural := 64 - OFFSET_BITS - INDEX_BITS;

-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
-- not handle a clean (commented) definition of the cache tags as a 3d
-- memory. For now, work around it by putting all the tags
subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
-- type cache_tags_set_t is array(way_t) of cache_tag_t;
-- type cache_tags_array_t is array(index_t) of cache_tags_set_t;
constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
type cache_tags_array_t is array(index_t) of cache_tags_set_t;
subtype cacheline_type is std_logic_vector((LINE_SIZE*8)-1 downto 0);
type cacheline_array is array(0 to NUM_LINES-1) of cacheline_type;

-- The cache valid bits
subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
type cache_valids_t is array(index_t) of cache_way_valids_t;
type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;
subtype cacheline_tag_type is std_logic_vector(TAG_BITS-1 downto 0);
type cacheline_tag_array is array(0 to NUM_LINES-1) of cacheline_tag_type;

-- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
signal cache_tags : cache_tags_array_t;
signal cache_valids : cache_valids_t;
signal cachelines : cacheline_array := (others => (others => '0'));
signal tags : cacheline_tag_array := (others => (others => '0'));
signal tags_valid : std_ulogic_vector(NUM_LINES-1 downto 0) := (others => '0');

attribute ram_style : string;
attribute ram_style of cache_tags : signal is "distributed";

-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;

subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;

signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;
attribute ram_style of itlb_tags : signal is "distributed";
attribute ram_style of itlb_ptes : signal is "distributed";

-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;

-- Cache reload state machine
type state_t is (IDLE, STOP_RELOAD, CLR_TAG, WAIT_ACK);
attribute ram_style of cachelines : signal is "block";

type reg_internal_t is record
-- Cache hit state (Latches for 1 cycle BRAM access)
hit_way : way_t;
hit_nia : std_ulogic_vector(63 downto 0);
hit_smark : std_ulogic;
hit_valid : std_ulogic;
big_endian: std_ulogic;
attribute ram_decomp : string;
attribute ram_decomp of cachelines : signal is "power";

-- Cache miss state (reload state machine)
state : state_t;
wb : wishbone_master_out;
store_way : way_t;
store_index : index_t;
store_row : row_t;
store_tag : cache_tag_t;
store_valid : std_ulogic;
end_row_ix : row_in_line_t;
rows_valid : row_per_line_valid_t;
type state_type is (IDLE, WAIT_ACK);

-- TLB miss state
fetch_failed : std_ulogic;
type reg_internal_type is record
state : state_type;
w : wishbone_master_out;
store_index : integer range 0 to (NUM_LINES-1);
store_word : integer range 0 to (LINE_SIZE-1);
end record;

signal r : reg_internal_t;

signal ev : IcacheEventType;

-- Async signals on incoming request
signal req_index : index_t;
signal req_row : row_t;
signal req_hit_way : way_t;
signal req_tag : cache_tag_t;
signal req_is_hit : std_ulogic;
signal req_is_miss : std_ulogic;
signal req_raddr : real_addr_t;

signal tlb_req_index : tlb_index_t;
signal real_addr : real_addr_t;
signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic;
signal access_ok : std_ulogic;

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t;

-- PLRU output interface
type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
signal plru_victim : plru_out_t;
signal replace_way : way_t;

-- Memory write snoop signals
signal snoop_valid : std_ulogic;
signal snoop_index : index_t;
signal snoop_hits : cache_way_valids_t;

-- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector) return index_t is
begin
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)));
end;

-- Return the cache row index (data memory) for an address
function get_row(addr: std_ulogic_vector) return row_t is
begin
return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));
end;

-- Return the index of a row within a line
function get_row_of_line(row: row_t) return row_in_line_t is
variable row_v : unsigned(ROW_BITS-1 downto 0);
begin
row_v := to_unsigned(row, ROW_BITS);
return row_v(ROW_LINEBITS-1 downto 0);
end;

-- Returns whether this is the last row of a line
function is_last_row_wb_addr(wb_addr: wishbone_addr_type; last: row_in_line_t) return boolean is
begin
return unsigned(wb_addr(LINE_OFF_BITS - ROW_OFF_BITS - 1 downto 0)) = last;
end;

-- Returns whether this is the last row of a line
function is_last_row(row: row_t; last: row_in_line_t) return boolean is
begin
return get_row_of_line(row) = last;
end;
signal r : reg_internal_type;

-- Return the address of the next row in the current cache line
function next_row_wb_addr(wb_addr: wishbone_addr_type)
return std_ulogic_vector is
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : wishbone_addr_type;
begin
-- Is there no simpler way in VHDL to generate that 3 bits adder ?
row_idx := wb_addr(ROW_LINEBITS - 1 downto 0);
row_idx := std_ulogic_vector(unsigned(row_idx) + 1);
result := wb_addr;
result(ROW_LINEBITS - 1 downto 0) := row_idx;
return result;
end;

-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
--
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));
end;

-- Read the instruction word for the given address in the current cache row
function read_insn_word(addr: std_ulogic_vector(63 downto 0);
data: cache_row_t) return std_ulogic_vector is
variable word: integer range 0 to INSN_PER_ROW-1;
begin
word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
return data(31+word*32 downto word*32);
end;
signal read_index : integer range 0 to NUM_LINES-1;
signal read_tag : std_ulogic_vector(63-OFFSET_BITS-INDEX_BITS downto 0);
signal read_miss : boolean;

-- Get the tag value from the address
function get_tag(addr: real_addr_t; endian: std_ulogic) return cache_tag_t is
function get_index(addr: std_ulogic_vector(63 downto 0)) return integer is
begin
return endian & addr(addr'left downto SET_SIZE_BITS);
return to_integer(unsigned(addr((OFFSET_BITS+INDEX_BITS-1) downto OFFSET_BITS)));
end;

-- Read a tag from a tag memory row
function read_tag(way: way_t; tagset: cache_tags_set_t) return cache_tag_t is
function get_word(addr: std_ulogic_vector(63 downto 0); data: cacheline_type) return std_ulogic_vector is
variable word : integer;
begin
return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
word := to_integer(unsigned(addr(OFFSET_BITS-1 downto 2)));
return data((word+1)*32-1 downto word*32);
end;

-- Write a tag to tag memory row
procedure write_tag(way: in way_t; tagset: inout cache_tags_set_t;
tag: cache_tag_t) is
function get_tag(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
begin
tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
end;

-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return tlb_index_t is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS)
xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS);
return to_integer(unsigned(hash));
return addr(63 downto OFFSET_BITS+INDEX_BITS);
end;
begin
assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;

assert LINE_SIZE mod ROW_SIZE = 0;
assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;
assert ispow2(ROW_PER_LINE) report "ROW_PER_LINE not power of 2" severity FAILURE;
assert ispow2(INSN_PER_ROW) report "INSN_PER_ROW not power of 2" severity FAILURE;
assert (ROW_BITS = INDEX_BITS + ROW_LINEBITS)
report "geometry bits don't add up" severity FAILURE;
assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
report "geometry bits don't add up" severity FAILURE;
assert (REAL_ADDR_BITS + 1 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
report "geometry bits don't add up" severity FAILURE;
assert (REAL_ADDR_BITS + 1 = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
report "geometry bits don't add up" severity FAILURE;

sim_debug: if SIM generate
debug: process
begin
report "ROW_SIZE = " & natural'image(ROW_SIZE);
report "ROW_PER_LINE = " & natural'image(ROW_PER_LINE);
report "BRAM_ROWS = " & natural'image(BRAM_ROWS);
report "INSN_PER_ROW = " & natural'image(INSN_PER_ROW);
report "INSN_BITS = " & natural'image(INSN_BITS);
report "ROW_BITS = " & natural'image(ROW_BITS);
report "ROW_LINEBITS = " & natural'image(ROW_LINEBITS);
report "LINE_OFF_BITS = " & natural'image(LINE_OFF_BITS);
report "ROW_OFF_BITS = " & natural'image(ROW_OFF_BITS);
report "INDEX_BITS = " & natural'image(INDEX_BITS);
report "TAG_BITS = " & natural'image(TAG_BITS);
report "WAY_BITS = " & natural'image(WAY_BITS);
wait;
end process;
end generate;

-- Generate a cache RAM for each way
rams: for i in 0 to NUM_WAYS-1 generate
signal do_read : std_ulogic;
signal do_write : std_ulogic;
signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal dout : cache_row_t;
signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
signal wr_dat : std_ulogic_vector(wishbone_in.dat'left downto 0);
icache_read : process(all)
begin
way: entity work.cache_ram
generic map (
ROW_BITS => ROW_BITS,
WIDTH => ROW_SIZE_BITS
)
port map (
clk => clk,
rd_en => do_read,
rd_addr => rd_addr,
rd_data => dout,
wr_sel => wr_sel,
wr_addr => wr_addr,
wr_data => wr_dat
);
process(all)
variable j: integer;
begin
-- byte-swap read data if big endian
if r.store_tag(TAG_BITS - 1) = '0' then
wr_dat <= wishbone_in.dat;
else
for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop
j := ((ii / 4) * 4) + (3 - (ii mod 4));
wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
end loop;
end if;
do_read <= not stall_in;
do_write <= '0';
if wishbone_in.ack = '1' and replace_way = i then
do_write <= '1';
end if;
cache_out(i) <= dout;
rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
for ii in 0 to ROW_SIZE-1 loop
wr_sel(ii) <= do_write;
end loop;
end process;
end generate;
-- Generate PLRUs
maybe_plrus: if NUM_WAYS > 1 generate
begin
plrus: for i in 0 to NUM_LINES-1 generate
-- PLRU interface
signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
signal plru_acc_en : std_ulogic;
signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
begin
plru : entity work.plru
generic map (
BITS => WAY_BITS
)
port map (
clk => clk,
rst => rst,
acc => plru_acc,
acc_en => plru_acc_en,
lru => plru_out
);
read_index <= get_index(i_in.addr);
read_tag <= get_tag(i_in.addr);
read_miss <= false;

process(all)
begin
-- PLRU interface
if get_index(r.hit_nia) = i then
plru_acc_en <= r.hit_valid;
else
plru_acc_en <= '0';
end if;
plru_acc <= std_ulogic_vector(to_unsigned(r.hit_way, WAY_BITS));
plru_victim(i) <= plru_out;
end process;
end generate;
end generate;
i_out.ack <= '0';
i_out.insn <= get_word(i_in.addr, cachelines(read_index));

-- TLB hit detection and real address generation
itlb_lookup : process(all)
variable pte : tlb_pte_t;
variable ttag : tlb_tag_t;
begin
tlb_req_index <= hash_ea(i_in.nia);
pte := itlb_ptes(tlb_req_index);
ttag := itlb_tags(tlb_req_index);
if i_in.virt_mode = '1' then
real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
i_in.nia(TLB_LG_PGSZ - 1 downto 0);
if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
ra_valid <= itlb_valids(tlb_req_index);
if i_in.req = '1' then
if (tags_valid(read_index) = '1') and (tags(read_index) = read_tag) then
-- report hit asynchronously
i_out.ack <= '1';
else
ra_valid <= '0';
end if;
eaa_priv <= pte(3);
else
real_addr <= addr_to_real(i_in.nia);
ra_valid <= '1';
eaa_priv <= '1';
end if;

-- no IAMR, so no KUEP support for now
priv_fault <= eaa_priv and not i_in.priv_mode;
access_ok <= ra_valid and not priv_fault;
end process;

-- iTLB update
itlb_update: process(clk)
variable wr_index : tlb_index_t;
begin
if rising_edge(clk) then
wr_index := hash_ea(m_in.addr);
if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif m_in.tlbie = '1' then
-- clear entry regardless of hit or miss
itlb_valids(wr_index) <= '0';
elsif m_in.tlbld = '1' then
itlb_tags(wr_index) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
itlb_ptes(wr_index) <= m_in.pte;
itlb_valids(wr_index) <= '1';
read_miss <= true;
end if;
ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if;
end process;

-- Cache hit detection, output to fetch2 and other misc logic
icache_comb : process(all)
variable is_hit : std_ulogic;
variable hit_way : way_t;
begin
-- Extract line, row and tag from request
req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia);
req_tag <= get_tag(real_addr, i_in.big_endian);

-- Calculate address of beginning of cache row, will be
-- used for cache miss processing if needed
--
req_raddr <= real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
(ROW_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way
hit_way := 0;
is_hit := '0';
for i in way_t loop
if i_in.req = '1' and
(cache_valids(req_index)(i) = '1' or
(r.state = WAIT_ACK and
req_index = r.store_index and
i = r.store_way and
r.rows_valid(req_row mod ROW_PER_LINE) = '1')) then
if read_tag(i, cache_tags(req_index)) = req_tag then
hit_way := i;
is_hit := '1';
end if;
end if;
end loop;

-- Generate the "hit" and "miss" signals for the synchronous blocks
if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then
req_is_hit <= is_hit;
req_is_miss <= not is_hit;
else
req_is_hit <= '0';
req_is_miss <= '0';
end if;
req_hit_way <= hit_way;

-- The way to replace on a miss
if r.state = CLR_TAG then
replace_way <= to_integer(unsigned(plru_victim(r.store_index)));
else
replace_way <= r.store_way;
end if;

-- Output instruction from current cache row
--
-- Note: This is a mild violation of our design principle of having pipeline
-- stages output from a clean latch. In this case we output the result
-- of a mux. The alternative would be output an entire row which
-- I prefer not to do just yet as it would force fetch2 to know about
-- some of the cache geometry information.
--
if r.hit_valid = '1' then
i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
else
i_out.insn <= (others => '0');
end if;
i_out.valid <= r.hit_valid;
i_out.nia <= r.hit_nia;
i_out.stop_mark <= r.hit_smark;
i_out.fetch_failed <= r.fetch_failed;
i_out.big_endian <= r.big_endian;
i_out.next_predicted <= i_in.predicted;
i_out.next_pred_ntaken <= i_in.pred_ntaken;
wishbone_out <= r.w;

-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not (is_hit and access_ok);

-- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb;
end process;

-- Cache hit synchronous machine
icache_hit : process(clk)
begin
if rising_edge(clk) then
-- keep outputs to fetch2 unchanged on a stall
-- except that flush or reset sets valid to 0
if stall_in = '1' then
if rst = '1' or flush_in = '1' then
r.hit_valid <= '0';
end if;
else
-- On a hit, latch the request for the next cycle, when the BRAM data
-- will be available on the cache_out output of the corresponding way
--
r.hit_valid <= req_is_hit;
if req_is_hit = '1' then
r.hit_way <= req_hit_way;

report "cache hit nia:" & to_hstring(i_in.nia) &
" IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) &
" way:" & integer'image(req_hit_way) &
" RA:" & to_hstring(real_addr);
end if;
end if;
if stall_in = '0' then
-- Send stop marks and NIA down regardless of validity
r.hit_smark <= i_in.stop_mark;
r.hit_nia <= i_in.nia;
r.big_endian <= i_in.big_endian;
end if;
end if;
end process;

-- Cache miss/reload synchronous machine
icache_miss : process(clk)
variable tagset : cache_tags_set_t;
variable tag : cache_tag_t;
variable snoop_addr : real_addr_t;
variable snoop_tag : cache_tag_t;
variable snoop_cache_tags : cache_tags_set_t;
icache_write : process(clk)
begin
if rising_edge(clk) then
ev.icache_miss <= '0';
-- On reset, clear all valid bits to force misses
if rst = '1' then
for i in index_t loop
cache_valids(i) <= (others => '0');
end loop;
tags_valid <= (others => '0');
r.state <= IDLE;
r.wb.cyc <= '0';
r.wb.stb <= '0';

-- We only ever do reads on wishbone
r.wb.dat <= (others => '0');
r.wb.sel <= "11111111";
r.wb.we <= '0';

-- Not useful normally but helps avoiding tons of sim warnings
r.wb.adr <= (others => '0');

snoop_valid <= '0';
snoop_index <= 0;
snoop_hits <= (others => '0');
else
-- Detect snooped writes and decode address into index and tag
-- Since we never write, any write should be snooped
snoop_valid <= wb_snoop_in.cyc and wb_snoop_in.stb and wb_snoop_in.we;
snoop_addr := addr_to_real(wb_to_addr(wb_snoop_in.adr));
snoop_index <= get_index(snoop_addr);
snoop_cache_tags := cache_tags(get_index(snoop_addr));
snoop_tag := get_tag(snoop_addr, '0');
snoop_hits <= (others => '0');
for i in way_t loop
tag := read_tag(i, snoop_cache_tags);
-- Ignore endian bit in comparison
tag(TAG_BITS - 1) := '0';
if tag = snoop_tag then
snoop_hits(i) <= '1';
end if;
end loop;

-- Process cache invalidations
if inval_in = '1' then
for i in index_t loop
cache_valids(i) <= (others => '0');
end loop;
r.store_valid <= '0';
else
-- Do invalidations from snooped stores to memory, one
-- cycle after the address appears on wb_snoop_in.
for i in way_t loop
if snoop_valid = '1' and snoop_hits(i) = '1' then
cache_valids(snoop_index)(i) <= '0';
end if;
end loop;
end if;

-- Main state machine
case r.state is
when IDLE =>
-- Reset per-row valid flags, only used in WAIT_ACK
for i in 0 to ROW_PER_LINE - 1 loop
r.rows_valid(i) <= '0';
end loop;

-- We need to read a cache line
if req_is_miss = '1' then
report "cache miss nia:" & to_hstring(i_in.nia) &
" IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) &
" way:" & integer'image(replace_way) &
" tag:" & to_hstring(req_tag) &
" RA:" & to_hstring(real_addr);
ev.icache_miss <= '1';

-- Keep track of our index and way for subsequent stores
r.store_index <= req_index;
r.store_row <= get_row(req_raddr);
r.store_tag <= req_tag;
r.store_valid <= '1';
r.end_row_ix <= get_row_of_line(get_row(req_raddr)) - 1;

-- Prep for first wishbone read. We calculate the address of
-- the start of the cache line and start the WB cycle.
--
r.wb.adr <= addr_to_wb(req_raddr);
r.wb.cyc <= '1';
r.wb.stb <= '1';

-- Track that we had one request sent
r.state <= CLR_TAG;
end if;

when CLR_TAG | WAIT_ACK =>
if r.state = CLR_TAG then
-- Get victim way from plru
r.store_way <= replace_way;

-- Force misses on that way while reloading that line
cache_valids(req_index)(replace_way) <= '0';
r.w.cyc <= '0';
r.w.stb <= '0';
end if;

-- Store new tag in selected way
for i in 0 to NUM_WAYS-1 loop
if i = replace_way then
tagset := cache_tags(r.store_index);
write_tag(i, tagset, r.store_tag);
cache_tags(r.store_index) <= tagset;
end if;
end loop;
r.w.dat <= (others => '0');
r.w.sel <= "11111111";
r.w.we <= '0';

case r.state is
when IDLE =>
if read_miss = true then
r.state <= WAIT_ACK;
end if;
r.store_word <= 0;
r.store_index <= read_index;

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and r.wb.stb = '1' then
-- That was the last word ? We are done sending. Clear stb.
--
if is_last_row_wb_addr(r.wb.adr, r.end_row_ix) then
r.wb.stb <= '0';
end if;
tags(read_index) <= read_tag;
tags_valid(read_index) <= '0';

-- Calculate the next row address
r.wb.adr <= next_row_wb_addr(r.wb.adr);
end if;

-- Abort reload if we get an invalidation
if inval_in = '1' then
r.wb.stb <= '0';
r.state <= STOP_RELOAD;
end if;

-- Incoming acks processing
if wishbone_in.ack = '1' then
r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in;
-- Check for completion
if is_last_row(r.store_row, r.end_row_ix) then
-- Complete wishbone cycle
r.wb.cyc <= '0';

-- Cache line is now valid
cache_valids(r.store_index)(replace_way) <= r.store_valid and not inval_in;

-- We are done
r.state <= IDLE;
end if;

-- Increment store row counter
r.store_row <= next_row(r.store_row);
end if;

when STOP_RELOAD =>
-- Wait for all outstanding requests to be satisfied, then
-- go to IDLE state.
if get_row_of_line(r.store_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then
r.wb.cyc <= '0';
r.state <= IDLE;
r.w.adr <= i_in.addr(63 downto OFFSET_BITS) & (OFFSET_BITS-1 downto 0 => '0');
r.w.cyc <= '1';
r.w.stb <= '1';
end if;
when WAIT_ACK =>
if wishbone_in.ack = '1' then
-- Increment store row counter
r.store_row <= next_row(r.store_row);
end if;
end case;
end if;

-- TLB miss and protection fault processing
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
r.fetch_failed <= '0';
elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
r.fetch_failed <= '1';
end if;
end if;
cachelines(r.store_index)((r.store_word+1)*64-1 downto ((r.store_word)*64)) <= wishbone_in.dat;
r.store_word <= r.store_word + 1;

if r.store_word = (LINE_SIZE_DW-1) then
r.state <= IDLE;
tags_valid(r.store_index) <= '1';
r.w.cyc <= '0';
r.w.stb <= '0';
else
r.w.adr(OFFSET_BITS-1 downto 3) <= std_ulogic_vector(to_unsigned(r.store_word+1, OFFSET_BITS-3));
end if;
end if;
end case;
end if;
end process;

icache_log: if LOG_LENGTH > 0 generate
-- Output data to logger
signal log_data : std_ulogic_vector(53 downto 0);
begin
data_log: process(clk)
variable lway: way_t;
variable wstate: std_ulogic;
begin
if rising_edge(clk) then
lway := req_hit_way;
wstate := '0';
if r.state /= IDLE then
wstate := '1';
end if;
log_data <= i_out.valid &
i_out.insn &
wishbone_in.ack &
r.wb.adr(2 downto 0) &
r.wb.stb & r.wb.cyc &
wishbone_in.stall &
stall_out &
r.fetch_failed &
r.hit_nia(5 downto 2) &
wstate &
std_ulogic_vector(to_unsigned(lway, 3)) &
req_is_hit & req_is_miss &
access_ok &
ra_valid;
end if;
end process;
log_out <= log_data;
end generate;

events <= ev;

end;

@ -1,157 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;

library work;
use work.common.all;
use work.wishbone_types.all;

entity icache_tb is
end icache_tb;

architecture behave of icache_tb is
signal clk : std_ulogic;
signal rst : std_ulogic;

signal i_out : Fetch1ToIcacheType;
signal i_in : IcacheToDecode1Type;

signal m_out : MmuToIcacheType;

signal wb_bram_in : wishbone_master_out;
signal wb_bram_out : wishbone_slave_out;

constant clk_period : time := 10 ns;
begin
icache0: entity work.icache
generic map(
LINE_SIZE => 64,
NUM_LINES => 4
)
port map(
clk => clk,
rst => rst,
i_in => i_out,
i_out => i_in,
m_in => m_out,
stall_in => '0',
flush_in => '0',
inval_in => '0',
wishbone_out => wb_bram_in,
wishbone_in => wb_bram_out
);

-- BRAM Memory slave
bram0: entity work.wishbone_bram_wrapper
generic map(
MEMORY_SIZE => 1024,
RAM_INIT_FILE => "icache_test.bin"
)
port map(
clk => clk,
rst => rst,
wishbone_in => wb_bram_in,
wishbone_out => wb_bram_out
);

clk_process: process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;

rst_process: process
begin
rst <= '1';
wait for 2*clk_period;
rst <= '0';
wait;
end process;

stim: process
begin
i_out.req <= '0';
i_out.nia <= (others => '0');
i_out.stop_mark <= '0';
i_out.priv_mode <= '1';
i_out.virt_mode <= '0';
i_out.big_endian <= '0';

m_out.tlbld <= '0';
m_out.tlbie <= '0';
m_out.addr <= (others => '0');
m_out.pte <= (others => '0');

wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);

i_out.req <= '1';
i_out.nia <= x"0000000000000004";

wait for 30*clk_period;
wait until rising_edge(clk);

assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000001"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &
" expected 00000001"
severity failure;

i_out.req <= '0';

wait until rising_edge(clk);

-- hit
i_out.req <= '1';
i_out.nia <= x"0000000000000008";
wait until rising_edge(clk);
wait until rising_edge(clk);
assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000002"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &
" expected 00000002"
severity failure;
wait until rising_edge(clk);

-- another miss
i_out.req <= '1';
i_out.nia <= x"0000000000000040";

wait for 30*clk_period;
wait until rising_edge(clk);

assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000010"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &
" expected 00000010"
severity failure;

-- test something that aliases
i_out.req <= '1';
i_out.nia <= x"0000000000000100";
wait until rising_edge(clk);
wait until rising_edge(clk);
assert i_in.valid = '0' severity failure;
wait until rising_edge(clk);

wait for 30*clk_period;
wait until rising_edge(clk);

assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000040"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &
" expected 00000040"
severity failure;

i_out.req <= '0';

std.env.finish;
end process;
end;

Binary file not shown.

@ -1,11 +0,0 @@
#include <stddef.h>

void console_init(void);
void console_set_irq_en(bool rx_irq, bool tx_irq);
int getchar(void);
int putchar(int c);
int puts(const char *str);

#ifndef __USE_LIBC
size_t strlen(const char *s);
#endif

@ -1,53 +0,0 @@
#ifndef __IO_H
#define __IO_H

static inline uint8_t readb(unsigned long addr)
{
uint8_t val;
__asm__ volatile("sync; lbzcix %0,0,%1" : "=r" (val) : "r" (addr) : "memory");
return val;
}

static inline uint16_t readw(unsigned long addr)
{
uint16_t val;
__asm__ volatile("sync; lhzcix %0,0,%1" : "=r" (val) : "r" (addr) : "memory");
return val;
}

static inline uint32_t readl(unsigned long addr)
{
uint32_t val;
__asm__ volatile("sync; lwzcix %0,0,%1" : "=r" (val) : "r" (addr) : "memory");
return val;
}

static inline uint64_t readq(unsigned long addr)
{
uint64_t val;
__asm__ volatile("sync; ldcix %0,0,%1" : "=r" (val) : "r" (addr) : "memory");
return val;
}

static inline void writeb(uint8_t val, unsigned long addr)
{
__asm__ volatile("sync; stbcix %0,0,%1" : : "r" (val), "r" (addr) : "memory");
}

static inline void writew(uint16_t val, unsigned long addr)
{
__asm__ volatile("sync; sthcix %0,0,%1" : : "r" (val), "r" (addr) : "memory");
}

static inline void writel(uint32_t val, unsigned long addr)
{
__asm__ volatile("sync; stwcix %0,0,%1" : : "r" (val), "r" (addr) : "memory");
}

static inline void writeq(uint64_t val, unsigned long addr)
{
__asm__ volatile("sync; stdcix %0,0,%1" : : "r" (val), "r" (addr) : "memory");
}

#endif /* __IO_H */

@ -1,157 +0,0 @@
#ifndef __MICROWATT_SOC_H
#define __MICROWATT_SOC_H

/*
* Microwatt SoC memory map
*/

#define MEMORY_BASE 0x00000000 /* "Main" memory alias, either BRAM or DRAM */
#define DRAM_BASE 0x40000000 /* DRAM if present */
#define BRAM_BASE 0x80000000 /* Internal BRAM */

#define SYSCON_BASE 0xc0000000 /* System control regs */
#define UART_BASE 0xc0002000 /* UART */
#define XICS_ICP_BASE 0xc0004000 /* Interrupt controller */
#define XICS_ICS_BASE 0xc0005000 /* Interrupt controller */
#define SPI_FCTRL_BASE 0xc0006000 /* SPI flash controller registers */
#define DRAM_CTRL_BASE 0xc8000000 /* LiteDRAM control registers */
#define LETH_CSR_BASE 0xc8020000 /* LiteEth CSR registers */
#define LETH_SRAM_BASE 0xc8030000 /* LiteEth MMIO space */
#define LSDC_CSR_BASE 0xc8040000 /* LiteSDCard MMIO space */
#define SPI_FLASH_BASE 0xf0000000 /* SPI Flash memory map */
#define DRAM_INIT_BASE 0xff000000 /* Internal DRAM init firmware */

/*
* Interrupt numbers
*/
#define IRQ_UART0 0
#define IRQ_ETHERNET 1

/*
* Register definitions for the syscon registers
*/

#define SYS_REG_SIGNATURE 0x00
#define SYS_REG_INFO 0x08
#define SYS_REG_INFO_HAS_UART (1ull << 0)
#define SYS_REG_INFO_HAS_DRAM (1ull << 1)
#define SYS_REG_INFO_HAS_BRAM (1ull << 2)
#define SYS_REG_INFO_HAS_SPI_FLASH (1ull << 3)
#define SYS_REG_INFO_HAS_LITEETH (1ull << 4)
#define SYS_REG_INFO_HAS_LARGE_SYSCON (1ull << 5)
#define SYS_REG_INFO_HAS_UART1 (1ull << 6)
#define SYS_REG_INFO_HAS_ARTB (1ull << 7)
#define SYS_REG_INFO_HAS_LITESDCARD (1ull << 8)
#define SYS_REG_BRAMINFO 0x10
#define SYS_REG_BRAMINFO_SIZE_MASK 0xfffffffffffffull
#define SYS_REG_DRAMINFO 0x18
#define SYS_REG_DRAMINFO_SIZE_MASK 0xfffffffffffffull
#define SYS_REG_CLKINFO 0x20
#define SYS_REG_CLKINFO_FREQ_MASK 0xffffffffffull
#define SYS_REG_CTRL 0x28
#define SYS_REG_CTRL_DRAM_AT_0 (1ull << 0)
#define SYS_REG_CTRL_CORE_RESET (1ull << 1)
#define SYS_REG_CTRL_SOC_RESET (1ull << 2)
#define SYS_REG_DRAMINITINFO 0x30
#define SYS_REG_SPI_INFO 0x38
#define SYS_REG_SPI_INFO_FLASH_OFF_MASK 0xffffffff
#define SYS_REG_UART0_INFO 0x40
#define SYS_REG_UART1_INFO 0x48
#define SYS_REG_UART_IS_16550 (1ull << 32)


/*
* Register definitions for the potato UART
*/
#define POTATO_CONSOLE_TX 0x00
#define POTATO_CONSOLE_RX 0x08
#define POTATO_CONSOLE_STATUS 0x10
#define POTATO_CONSOLE_STATUS_RX_EMPTY 0x01
#define POTATO_CONSOLE_STATUS_TX_EMPTY 0x02
#define POTATO_CONSOLE_STATUS_RX_FULL 0x04
#define POTATO_CONSOLE_STATUS_TX_FULL 0x08
#define POTATO_CONSOLE_CLOCK_DIV 0x18
#define POTATO_CONSOLE_IRQ_EN 0x20
#define POTATO_CONSOLE_IRQ_RX 0x01
#define POTATO_CONSOLE_IRQ_TX 0x02

/*
* Register definitionss for our standard (16550 style) UART
*/
#define UART_REG_RX 0x00
#define UART_REG_TX 0x00
#define UART_REG_DLL 0x00
#define UART_REG_IER 0x04
#define UART_REG_IER_RDI 0x01
#define UART_REG_IER_THRI 0x02
#define UART_REG_IER_RLSI 0x04
#define UART_REG_IER_MSI 0x08
#define UART_REG_DLM 0x04
#define UART_REG_IIR 0x08
#define UART_REG_FCR 0x08
#define UART_REG_FCR_EN_FIFO 0x01
#define UART_REG_FCR_CLR_RCVR 0x02
#define UART_REG_FCR_CLR_XMIT 0x04
#define UART_REG_FCR_TRIG1 0x00
#define UART_REG_FCR_TRIG4 0x40
#define UART_REG_FCR_TRIG8 0x80
#define UART_REG_FCR_TRIG14 0xc0
#define UART_REG_LCR 0x0c
#define UART_REG_LCR_5BIT 0x00
#define UART_REG_LCR_6BIT 0x01
#define UART_REG_LCR_7BIT 0x02
#define UART_REG_LCR_8BIT 0x03
#define UART_REG_LCR_STOP 0x04
#define UART_REG_LCR_PAR 0x08
#define UART_REG_LCR_EVEN_PAR 0x10
#define UART_REG_LCR_STIC_PAR 0x20
#define UART_REG_LCR_BREAK 0x40
#define UART_REG_LCR_DLAB 0x80
#define UART_REG_MCR 0x10
#define UART_REG_MCR_DTR 0x01
#define UART_REG_MCR_RTS 0x02
#define UART_REG_MCR_OUT1 0x04
#define UART_REG_MCR_OUT2 0x08
#define UART_REG_MCR_LOOP 0x10
#define UART_REG_LSR 0x14
#define UART_REG_LSR_DR 0x01
#define UART_REG_LSR_OE 0x02
#define UART_REG_LSR_PE 0x04
#define UART_REG_LSR_FE 0x08
#define UART_REG_LSR_BI 0x10
#define UART_REG_LSR_THRE 0x20
#define UART_REG_LSR_TEMT 0x40
#define UART_REG_LSR_FIFOE 0x80
#define UART_REG_MSR 0x18
#define UART_REG_SCR 0x1c


/*
* Register definitions for the SPI controller
*/
#define SPI_REG_DATA 0x00 /* Byte access: single wire transfer */
#define SPI_REG_DATA_DUAL 0x01 /* Byte access: dual wire transfer */
#define SPI_REG_DATA_QUAD 0x02 /* Byte access: quad wire transfer */
#define SPI_REG_CTRL 0x04 /* Reset and manual mode control */
#define SPI_REG_CTRL_RESET 0x01 /* reset all registers */
#define SPI_REG_CTRL_MANUAL_CS 0x02 /* assert CS, enable manual mode */
#define SPI_REG_CTRL_CKDIV_SHIFT 8 /* clock div */
#define SPI_REG_CTRL_CKDIV_MASK (0xff << SPI_REG_CTRL_CKDIV_SHIFT)
#define SPI_REG_AUTO_CFG 0x08 /* Automatic map configuration */
#define SPI_REG_AUTO_CFG_CMD_SHIFT 0 /* Command to use for reads */
#define SPI_REG_AUTO_CFG_CMD_MASK (0xff << SPI_REG_AUTO_CFG_CMD_SHIFT)
#define SPI_REG_AUTO_CFG_DUMMIES_SHIFT 8 /* # dummy cycles */
#define SPI_REG_AUTO_CFG_DUMMIES_MASK (0x7 << SPI_REG_AUTO_CFG_DUMMIES_SHIFT)
#define SPI_REG_AUTO_CFG_MODE_SHIFT 11 /* SPI wire mode */
#define SPI_REG_AUTO_CFG_MODE_MASK (0x3 << SPI_REG_AUTO_CFG_MODE_SHIFT)
#define SPI_REG_AUT_CFG_MODE_SINGLE (0 << 11)
#define SPI_REG_AUT_CFG_MODE_DUAL (2 << 11)
#define SPI_REG_AUT_CFG_MODE_QUAD (3 << 11)
#define SPI_REG_AUTO_CFG_ADDR4 (1u << 13) /* 3 or 4 addr bytes */
#define SPI_REG_AUTO_CFG_CKDIV_SHIFT 16 /* clock div */
#define SPI_REG_AUTO_CFG_CKDIV_MASK (0xff << SPI_REG_AUTO_CFG_CKDIV_SHIFT)
#define SPI_REG_AUTO_CFG_CSTOUT_SHIFT 24 /* CS timeout */
#define SPI_REG_AUTO_CFG_CSTOUT_MASK (0x3f << SPI_REG_AUTO_CFG_CSTOUT_SHIFT)


#endif /* __MICROWATT_SOC_H */

@ -2,252 +2,162 @@ library ieee;
use ieee.std_logic_1164.all;

package insn_helpers is
function insn_rs (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_rt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_ra (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_rb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_rcreg (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_ui (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_l (insn_in : std_ulogic_vector) return std_ulogic;
function insn_sh32 (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb32 (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me32 (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_li (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_lk (insn_in : std_ulogic_vector) return std_ulogic;
function insn_aa (insn_in : std_ulogic_vector) return std_ulogic;
function insn_rc (insn_in : std_ulogic_vector) return std_ulogic;
function insn_oe (insn_in : std_ulogic_vector) return std_ulogic;
function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bfa (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_cr (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_ba (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_fxm (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bo (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bi (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_d (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_ds (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_dq (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_dx (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_to (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bc (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_fra (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_frc (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_u (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_rs (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_rt (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_ra (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_rb (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_ui (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_l (insn_in : std_ulogic_vector) return std_ulogic;
function insn_sh32 (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb32 (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me32 (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_li (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_lk (insn_in : std_ulogic_vector) return std_ulogic;
function insn_rc (insn_in : std_ulogic_vector) return std_ulogic;
function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_fxm (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bo (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bi (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_d (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_ds (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_to (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bc (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector;
end package insn_helpers;

package body insn_helpers is
function insn_rs (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_rt (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_ra (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_rb (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_rcreg (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 0);
end;

function insn_ui (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 0);
end;

function insn_l (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(21);
end;

function insn_sh32 (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_mb32 (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_me32 (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(5 downto 1);
end;

function insn_li (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 2);
end;

function insn_lk (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(0);
end;

function insn_aa (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(1);
end;

function insn_rc (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(0);
end;

function insn_oe (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(10);
end;

function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 2);
end;

function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 23);
end;

function insn_bfa (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 18);
end;

function insn_cr (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 1);
end;
function insn_bb (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_ba (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_bt (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_fxm (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(19 downto 12);
end;

function insn_bo (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_bi (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_bh (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(12 downto 11);
end;

function insn_d (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 0);
end;

function insn_ds (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 2);
end;

function insn_dq (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 4);
end;

function insn_dx (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 6) & insn_in(20 downto 16) & insn_in(0);
end;

function insn_to (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_bc (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(1) & insn_in(15 downto 11);
end;

function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(5) & insn_in(10 downto 6);
end;

function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(5) & insn_in(10 downto 6);
end;

function insn_frt(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_fra(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_frb(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_frc(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_u(insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 12);
end;
function insn_rs (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_rt (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_ra (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_rb (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 0);
end;

function insn_ui (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 0);
end;

function insn_l (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(21);
end;

function insn_sh32 (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 11);
end;

function insn_mb32 (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_me32 (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(5 downto 1);
end;

function insn_li (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 2);
end;

function insn_lk (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(0);
end;

function insn_rc (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(0);
end;

function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 2);
end;

function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 23);
end;

function insn_fxm (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(19 downto 12);
end;

function insn_bo (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_bi (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(20 downto 16);
end;

function insn_bh (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(12 downto 11);
end;

function insn_d (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 0);
end;

function insn_ds (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 2);
end;

function insn_to (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(25 downto 21);
end;

function insn_bc (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(10 downto 6);
end;

function insn_sh (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(1) & insn_in(15 downto 11);
end;

function insn_me (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(5) & insn_in(10 downto 6);
end;

function insn_mb (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(5) & insn_in(10 downto 6);
end;
end package body insn_helpers;

@ -1,223 +0,0 @@
#include <stdint.h>
#include <stdbool.h>

#include "console.h"
#include "microwatt_soc.h"
#include "io.h"

#define UART_BAUDS 115200

/*
* Core UART functions to implement for a port
*/

bool uart_is_std;

static uint64_t uart_base;

static unsigned long uart_divisor(unsigned long uart_freq, unsigned long bauds)
{
return uart_freq / (bauds * 16);
}

static uint64_t potato_uart_reg_read(int offset)
{
return readq(uart_base + offset);
}

static void potato_uart_reg_write(int offset, uint64_t val)
{
writeq(val, uart_base + offset);
}

static int potato_uart_rx_empty(void)
{
uint64_t val;

val = potato_uart_reg_read(POTATO_CONSOLE_STATUS);

if (val & POTATO_CONSOLE_STATUS_RX_EMPTY)
return 1;

return 0;
}

static int potato_uart_tx_full(void)
{
uint64_t val;

val = potato_uart_reg_read(POTATO_CONSOLE_STATUS);

if (val & POTATO_CONSOLE_STATUS_TX_FULL)
return 1;

return 0;
}

static char potato_uart_read(void)
{
uint64_t val;

val = potato_uart_reg_read(POTATO_CONSOLE_RX);

return (char)(val & 0x000000ff);
}

static void potato_uart_write(char c)
{
uint64_t val;

val = c;

potato_uart_reg_write(POTATO_CONSOLE_TX, val);
}

static void potato_uart_init(uint64_t uart_freq)
{
unsigned long div = uart_divisor(uart_freq, UART_BAUDS) - 1;
potato_uart_reg_write(POTATO_CONSOLE_CLOCK_DIV, div);
}

static void potato_uart_set_irq_en(bool rx_irq, bool tx_irq)
{
uint64_t en = 0;

if (rx_irq)
en |= POTATO_CONSOLE_IRQ_RX;
if (tx_irq)
en |= POTATO_CONSOLE_IRQ_TX;
potato_uart_reg_write(POTATO_CONSOLE_IRQ_EN, en);
}

static bool std_uart_rx_empty(void)
{
return !(readb(uart_base + UART_REG_LSR) & UART_REG_LSR_DR);
}

static uint8_t std_uart_read(void)
{
return readb(uart_base + UART_REG_RX);
}

static bool std_uart_tx_full(void)
{
return !(readb(uart_base + UART_REG_LSR) & UART_REG_LSR_THRE);
}

static void std_uart_write(uint8_t c)
{
writeb(c, uart_base + UART_REG_TX);
}

static void std_uart_set_irq_en(bool rx_irq, bool tx_irq)
{
uint8_t ier = 0;

if (tx_irq)
ier |= UART_REG_IER_THRI;
if (rx_irq)
ier |= UART_REG_IER_RDI;
writeb(ier, uart_base + UART_REG_IER);
}

static void std_uart_init(uint64_t uart_freq)
{
unsigned long div = uart_divisor(uart_freq, UART_BAUDS);
writeb(UART_REG_LCR_DLAB, uart_base + UART_REG_LCR);
writeb(div & 0xff, uart_base + UART_REG_DLL);
writeb(div >> 8, uart_base + UART_REG_DLM);
writeb(UART_REG_LCR_8BIT, uart_base + UART_REG_LCR);
writeb(UART_REG_MCR_DTR |
UART_REG_MCR_RTS, uart_base + UART_REG_MCR);
writeb(UART_REG_FCR_EN_FIFO |
UART_REG_FCR_CLR_RCVR |
UART_REG_FCR_CLR_XMIT, uart_base + UART_REG_FCR);
}

int getchar(void)
{
if (uart_is_std) {
while (std_uart_rx_empty())
/* Do nothing */ ;
return std_uart_read();
} else {
while (potato_uart_rx_empty())
/* Do nothing */ ;
return potato_uart_read();
}
}

int putchar(int c)
{
if (uart_is_std) {
while(std_uart_tx_full())
/* Do Nothing */;
std_uart_write(c);
} else {
while (potato_uart_tx_full())
/* Do Nothing */;
potato_uart_write(c);
}
return c;
}

int puts(const char *str)
{
unsigned int i;

for (i = 0; *str; i++) {
char c = *(str++);
if (c == 10)
putchar(13);
putchar(c);
}
return 0;
}

#ifndef __USE_LIBC
size_t strlen(const char *s)
{
size_t len = 0;

while (*s++)
len++;

return len;
}
#endif

void console_init(void)
{
uint64_t sys_info;
uint64_t proc_freq;
uint64_t uart_info = 0;
uint64_t uart_freq = 0;

proc_freq = readq(SYSCON_BASE + SYS_REG_CLKINFO) & SYS_REG_CLKINFO_FREQ_MASK;
sys_info = readq(SYSCON_BASE + SYS_REG_INFO);

if (sys_info & SYS_REG_INFO_HAS_LARGE_SYSCON) {
uart_info = readq(SYSCON_BASE + SYS_REG_UART0_INFO);
uart_freq = uart_info & 0xffffffff;
}
if (uart_freq == 0)
uart_freq = proc_freq;

uart_base = UART_BASE;
if (uart_info & SYS_REG_UART_IS_16550) {
uart_is_std = true;
std_uart_init(proc_freq);
} else {
uart_is_std = false;
potato_uart_init(proc_freq);
}
}

void console_set_irq_en(bool rx_irq, bool tx_irq)
{
if (uart_is_std)
std_uart_set_irq_en(rx_irq, tx_irq);
else
potato_uart_set_irq_en(rx_irq, tx_irq);
}

@ -1,36 +0,0 @@
#!/usr/bin/python3
from fusesoc.capi2.generator import Generator
import os
import sys
import pathlib

class LiteDRAMGenerator(Generator):
def run(self):
board = self.config.get('board')
payload = self.config.get('payload')

# Collect a bunch of directory path
script_dir = os.path.dirname(sys.argv[0])
base_dir = os.path.join(script_dir, os.pardir)
gen_dir = os.path.join(base_dir, "generated", board)
extras_dir = os.path.join(base_dir, "extras")

print("Adding LiteDRAM for board... ", board)

# Add files to fusesoc
files = []
f = os.path.join(gen_dir, "litedram_core.v")
files.append({f : {'file_type' : 'verilogSource'}})
f = os.path.join(gen_dir, "litedram-initmem.vhdl")
files.append({f : {'file_type' : 'vhdlSource-2008'}})
f = os.path.join(gen_dir, "litedram_core.init")
files.append({f : {'file_type' : 'user'}})
f = os.path.join(extras_dir, "litedram-wrapper-l2.vhdl")
files.append({f : {'file_type' : 'vhdlSource-2008'}})

self.add_files(files)

g = LiteDRAMGenerator()
g.run()
g.write()

File diff suppressed because it is too large Load Diff

@ -1,10 +0,0 @@
OPT_FAST=-O3 -fstrict-aliasing
OPT_SLOW=-O3 -fstrict-aliasing

top_all: top_all2

include Vlitedram_core.mk

top_all2: default $(VK_GLOBAL_OBJS)

.PHONY: top_all top_all2

@ -1,214 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;

package sim_litedram is
-- WB req format:
-- 73 .. 71 : cti(2..0)
-- 70 .. 69 : bte(1..0)
-- 68 .. 65 : sel(3..0)
-- 64 : we
-- 63 : stb
-- 62 : cyc
-- 61 .. 32 : addr(29..0)
-- 31 .. 0 : write_data(31..0)
--
procedure litedram_set_wb(req : in std_ulogic_vector(73 downto 0));
attribute foreign of litedram_set_wb : procedure is "VHPIDIRECT litedram_set_wb";

-- WB rsp format:
-- 35 : init_error;
-- 34 : init_done;
-- 33 : err
-- 32 : ack
-- 31 .. 0 : read_data(31..0)
--
procedure litedram_get_wb(rsp : out std_ulogic_vector(35 downto 0));
attribute foreign of litedram_get_wb : procedure is "VHPIDIRECT litedram_get_wb";

-- User req format:
-- 171 : cmd_valid
-- 170 : cmd_we
-- 169 : wdata_valid
-- 168 : rdata_ready
-- 167 .. 144 : cmd_addr(23..0)
-- 143 .. 128 : wdata_we(15..0)
-- 127 .. 0 : wdata_data(127..0)
--
procedure litedram_set_user(req: in std_ulogic_vector(171 downto 0));
attribute foreign of litedram_set_user : procedure is "VHPIDIRECT litedram_set_user";

-- User rsp format:
-- 130 : cmd_ready
-- 129 : wdata_ready
-- 128 : rdata_valid
-- 127 .. 0 : rdata_data(127..0)
procedure litedram_get_user(req: in std_ulogic_vector(130 downto 0));
attribute foreign of litedram_get_user : procedure is "VHPIDIRECT litedram_get_user";
procedure litedram_clock;
attribute foreign of litedram_clock : procedure is "VHPIDIRECT litedram_clock";

procedure litedram_init(trace: integer);
attribute foreign of litedram_init : procedure is "VHPIDIRECT litedram_init";
end sim_litedram;

package body sim_litedram is
procedure litedram_set_wb(req : in std_ulogic_vector(73 downto 0)) is
begin
assert false report "VHPI" severity failure;
end procedure;
procedure litedram_get_wb(rsp : out std_ulogic_vector(35 downto 0)) is
begin
assert false report "VHPI" severity failure;
end procedure;
procedure litedram_set_user(req: in std_ulogic_vector(171 downto 0)) is
begin
assert false report "VHPI" severity failure;
end procedure;
procedure litedram_get_user(req: in std_ulogic_vector(130 downto 0)) is
begin
assert false report "VHPI" severity failure;
end procedure;
procedure litedram_clock is
begin
assert false report "VHPI" severity failure;
end procedure;
procedure litedram_init(trace: integer) is
begin
assert false report "VHPI" severity failure;
end procedure;
end sim_litedram;

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.sim_litedram.all;

entity litedram_core is
port(
clk : in std_ulogic;
rst : in std_ulogic;
pll_locked : out std_ulogic;
ddram_a : out std_ulogic_vector(0 downto 0);
ddram_ba : out std_ulogic_vector(2 downto 0);
ddram_ras_n : out std_ulogic;
ddram_cas_n : out std_ulogic;
ddram_we_n : out std_ulogic;
ddram_cs_n : out std_ulogic;
ddram_dm : out std_ulogic_vector(1 downto 0);
ddram_dq : inout std_ulogic_vector(15 downto 0);
ddram_dqs_p : inout std_ulogic_vector(1 downto 0);
ddram_dqs_n : inout std_ulogic_vector(1 downto 0);
ddram_clk_p : out std_ulogic_vector(0 downto 0);
ddram_clk_n : out std_ulogic_vector(0 downto 0);
ddram_cke : out std_ulogic;
ddram_odt : out std_ulogic;
ddram_reset_n : out std_ulogic;
init_done : out std_ulogic;
init_error : out std_ulogic;
user_clk : out std_ulogic;
user_rst : out std_ulogic;
wb_ctrl_adr : in std_ulogic_vector(29 downto 0);
wb_ctrl_dat_w : in std_ulogic_vector(31 downto 0);
wb_ctrl_dat_r : out std_ulogic_vector(31 downto 0);
wb_ctrl_sel : in std_ulogic_vector(3 downto 0);
wb_ctrl_cyc : in std_ulogic;
wb_ctrl_stb : in std_ulogic;
wb_ctrl_ack : out std_ulogic;
wb_ctrl_we : in std_ulogic;
wb_ctrl_cti : in std_ulogic_vector(2 downto 0);
wb_ctrl_bte : in std_ulogic_vector(1 downto 0);
wb_ctrl_err : out std_ulogic;
user_port_native_0_cmd_valid : in std_ulogic;
user_port_native_0_cmd_ready : out std_ulogic;
user_port_native_0_cmd_we : in std_ulogic;
user_port_native_0_cmd_addr : in std_ulogic_vector(23 downto 0);
user_port_native_0_wdata_valid : in std_ulogic;
user_port_native_0_wdata_ready : out std_ulogic;
user_port_native_0_wdata_we : in std_ulogic_vector(15 downto 0);
user_port_native_0_wdata_data : in std_ulogic_vector(127 downto 0);
user_port_native_0_rdata_valid : out std_ulogic;
user_port_native_0_rdata_ready : in std_ulogic;
user_port_native_0_rdata_data : out std_ulogic_vector(127 downto 0)
);
end entity litedram_core;

architecture behaviour of litedram_core is
signal idone : std_ulogic := '0';
signal ierr : std_ulogic := '0';
signal old_wb_cyc : std_ulogic := '1';
begin
user_rst <= rst;
user_clk <= clk;
pll_locked <= '1';
init_done <= idone;
init_error <= ierr;

poll: process(user_clk)
procedure send_signals is
begin
litedram_set_wb(wb_ctrl_cti & wb_ctrl_bte &
wb_ctrl_sel & wb_ctrl_we &
wb_ctrl_stb & wb_ctrl_cyc &
wb_ctrl_adr & wb_ctrl_dat_w);
litedram_set_user(user_port_native_0_cmd_valid &
user_port_native_0_cmd_we &
user_port_native_0_wdata_valid &
user_port_native_0_rdata_ready &
user_port_native_0_cmd_addr &
user_port_native_0_wdata_we &
user_port_native_0_wdata_data);
end procedure;

procedure recv_signals is
variable wb_response : std_ulogic_vector(35 downto 0);
variable ur_response : std_ulogic_vector(130 downto 0);
begin
litedram_get_wb(wb_response);
wb_ctrl_dat_r <= wb_response(31 downto 0);
wb_ctrl_ack <= wb_response(32);
wb_ctrl_err <= wb_response(33);
idone <= wb_response(34);
ierr <= wb_response(35);
litedram_get_user(ur_response);
user_port_native_0_cmd_ready <= ur_response(130);
user_port_native_0_wdata_ready <= ur_response(129);
user_port_native_0_rdata_valid <= ur_response(128);
user_port_native_0_rdata_data <= ur_response(127 downto 0);
end procedure;

begin
if rising_edge(user_clk) then

send_signals;
recv_signals;
-- Then generate a clock cycle ( 0->1 then 1->0 )
litedram_clock;
recv_signals;
end if;

if falling_edge(user_clk) then
send_signals;
recv_signals;
end if;
end process;

end architecture;

library work;
use work.sim_litedram.all;

entity litedram_trace_stub is
end entity;

architecture behaviour of litedram_trace_stub is
begin
process
begin
litedram_init(1);
wait;
end process;
end architecture;

@ -1,198 +0,0 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <termios.h>
#include <unistd.h>
#include <poll.h>

#include "sim_vhpi_c.h"
#include "Vlitedram_core.h"
#include "verilated_vcd_c.h"

static Vlitedram_core *v;
vluint64_t main_time = 0;

#if VM_TRACE
VerilatedVcdC *tfp;
#endif

static void cleanup(void)
{
#if VM_TRACE
if (tfp) {
tfp->flush();
tfp->close();
delete tfp;
}
#endif
}

static inline void check_init(bool traces)
{
if (v)
return;
// XX Catch exceptions ?
v = new Vlitedram_core;
if (!v) {
fprintf(stderr, "Failure allocating litedram core\n");
exit(1);
}
#if VM_TRACE
if (traces) {
// init trace dump
Verilated::traceEverOn(true);
tfp = new VerilatedVcdC;
v->trace(tfp, 99);
tfp->open("litedram.vcd");
}
#endif
atexit(cleanup);
}

unsigned char get_bit(unsigned char **p)
{
unsigned char b = **p;

*p = *p + 1;

return b == vhpi1 ? 1 : 0;
}

uint64_t get_bits(unsigned char **p, int len)
{
uint64_t r = 0;

while(len--)
r = (r << 1) | get_bit(p);
return r;
}

void set_bit(unsigned char **p, int bit)
{
**p = bit ? vhpi1 : vhpi0;
*p = *p + 1;
}

void set_bits(unsigned char **p, uint64_t val, int len)
{
while(len--)
set_bit(p, (val >> len) & 1);
}

double sc_time_stamp(void)
{
return main_time;
}

#define check_size(s, exp) \
do { \
int __s = (s); \
int __e = (exp); \
if (__s != __e) \
fprintf(stderr, "WARNING: %s exp %d got %d\n", __func__, __e, __s); \
} while(0)

static void do_eval(void)
{
v->eval();
#if VM_TRACE
if (tfp)
tfp->dump((double) main_time);
#endif
}

extern "C" void litedram_set_wb(unsigned char *req)
{
unsigned char *orig = req;

check_init(false);
v->wb_ctrl_cti = get_bits(&req, 3);
v->wb_ctrl_bte = get_bits(&req, 2);
v->wb_ctrl_sel = get_bits(&req, 4);
v->wb_ctrl_we = get_bit(&req);
v->wb_ctrl_stb = get_bit(&req);
v->wb_ctrl_cyc = get_bit(&req);
v->wb_ctrl_adr = get_bits(&req, 30);
v->wb_ctrl_dat_w = get_bits(&req, 32);

check_size(req - orig, 74);

do_eval();
}

extern "C" void litedram_get_wb(unsigned char *req)
{
unsigned char *orig = req;

check_init(false);

set_bit(&req, v->init_error);
set_bit(&req, v->init_done);
set_bit(&req, v->wb_ctrl_err);
set_bit(&req, v->wb_ctrl_ack);
set_bits(&req, v->wb_ctrl_dat_r, 32);

check_size(req - orig, 36);
}

extern "C" void litedram_set_user(unsigned char *req)
{
unsigned char *orig = req;

check_init(false);

v->user_port_native_0_cmd_valid = get_bit(&req);
v->user_port_native_0_cmd_we = get_bit(&req);
v->user_port_native_0_wdata_valid = get_bit(&req);
v->user_port_native_0_rdata_ready = get_bit(&req);
v->user_port_native_0_cmd_addr = get_bits(&req, 24);
v->user_port_native_0_wdata_we = get_bits(&req, 16);
v->user_port_native_0_wdata_data[3] = get_bits(&req, 32);
v->user_port_native_0_wdata_data[2] = get_bits(&req, 32);
v->user_port_native_0_wdata_data[1] = get_bits(&req, 32);
v->user_port_native_0_wdata_data[0] = get_bits(&req, 32);

check_size(req - orig, 172);

do_eval();
}

extern "C" void litedram_get_user(unsigned char *req)
{
unsigned char *orig = req;

check_init(false);

set_bit(&req, v->user_port_native_0_cmd_ready);
set_bit(&req, v->user_port_native_0_wdata_ready);
set_bit(&req, v->user_port_native_0_rdata_valid);
set_bits(&req, v->user_port_native_0_rdata_data[3], 32);
set_bits(&req, v->user_port_native_0_rdata_data[2], 32);
set_bits(&req, v->user_port_native_0_rdata_data[1], 32);
set_bits(&req, v->user_port_native_0_rdata_data[0], 32);

check_size(req - orig, 131);
}

extern "C" void litedram_clock(void)
{
check_init(false);

v->clk = 1;
do_eval();
main_time++;
v->clk = 0;
do_eval();
main_time++;
}

extern "C" void litedram_init(int trace_on)
{
check_init(!!trace_on);
}

File diff suppressed because one or more lines are too long

@ -1,84 +0,0 @@
$ version 1.1

# Signals in entities :
/core_dram_tb/dram/rst
/core_dram_tb/dram/system_clk
/core_dram_tb/dram/system_reset
/core_dram_tb/dram/wb_in
/core_dram_tb/dram/wb_out
/core_dram_tb/dram/user_port0_cmd_valid
/core_dram_tb/dram/user_port0_cmd_ready
/core_dram_tb/dram/user_port0_cmd_we
/core_dram_tb/dram/user_port0_cmd_addr
/core_dram_tb/dram/user_port0_wdata_valid
/core_dram_tb/dram/user_port0_wdata_ready
/core_dram_tb/dram/user_port0_wdata_we
/core_dram_tb/dram/user_port0_wdata_data
/core_dram_tb/dram/user_port0_rdata_valid
/core_dram_tb/dram/user_port0_rdata_ready
/core_dram_tb/dram/user_port0_rdata_data
/core_dram_tb/dram/cache_tags
/core_dram_tb/dram/cache_valids
/core_dram_tb/dram/storeq_rd_ready
/core_dram_tb/dram/storeq_rd_valid
/core_dram_tb/dram/storeq_rd_data
/core_dram_tb/dram/storeq_wr_ready
/core_dram_tb/dram/storeq_wr_valid
/core_dram_tb/dram/storeq_wr_data
/core_dram_tb/dram/accept_store
/core_dram_tb/dram/state
/core_dram_tb/dram/wb_req
/core_dram_tb/dram/store_queued
/core_dram_tb/dram/read_ack_0
/core_dram_tb/dram/read_ack_1
/core_dram_tb/dram/read_ad3_0
/core_dram_tb/dram/read_ad3_1
/core_dram_tb/dram/read_way_0
/core_dram_tb/dram/read_way_1
/core_dram_tb/dram/req_index
/core_dram_tb/dram/req_row
/core_dram_tb/dram/req_hit_way
/core_dram_tb/dram/req_tag
/core_dram_tb/dram/req_op
/core_dram_tb/dram/req_laddr
/core_dram_tb/dram/req_ad3
/core_dram_tb/dram/req_we
/core_dram_tb/dram/req_wdata
/core_dram_tb/dram/store_way
/core_dram_tb/dram/store_index
/core_dram_tb/dram/store_row
/core_dram_tb/dram/cache_out
/core_dram_tb/dram/plru_victim
/core_dram_tb/dram/replace_way
/core_dram_tb/dram/rams/do_read
/core_dram_tb/dram/rams/do_write
/core_dram_tb/dram/rams/rd_addr
/core_dram_tb/dram/rams/wr_addr
/core_dram_tb/dram/rams/wr_data
/core_dram_tb/dram/rams/wr_sel
/core_dram_tb/dram/rams/wr_sel_m
/core_dram_tb/dram/rams/dout
/core_dram_tb/dram/rams/way/clk
/core_dram_tb/dram/rams/way/rd_en
/core_dram_tb/dram/rams/way/rd_addr
/core_dram_tb/dram/rams/way/rd_data
/core_dram_tb/dram/rams/way/wr_sel
/core_dram_tb/dram/rams/way/wr_addr
/core_dram_tb/dram/rams/way/wr_data
/core_dram_tb/dram/rams/way/rd_data0
/core_dram_tb/dram/store_queue/wr_ready
/core_dram_tb/dram/store_queue/wr_valid
/core_dram_tb/dram/store_queue/wr_data
/core_dram_tb/dram/store_queue/rd_ready
/core_dram_tb/dram/store_queue/rd_valid
/core_dram_tb/dram/store_queue/rd_data
/core_dram_tb/dram/store_queue/rd_idx
/core_dram_tb/dram/store_queue/rd_next
/core_dram_tb/dram/store_queue/wr_idx
/core_dram_tb/dram/store_queue/wr_next
/core_dram_tb/dram/store_queue/op_prev
/core_dram_tb/dram/store_queue/op_next
/core_dram_tb/dram/store_queue/full
/core_dram_tb/dram/store_queue/empty
/core_dram_tb/dram/store_queue/push
/core_dram_tb/dram/store_queue/pop

@ -1,83 +0,0 @@
[*]
[*] GTKWave Analyzer v3.3.86 (w)1999-2017 BSI
[*] Mon Jun 22 06:32:16 2020
[*]
[dumpfile] "/home/ANT.AMAZON.COM/benh/hackplace/microwatt/foo.ghw"
[dumpfile_mtime] "Mon Jun 22 06:28:35 2020"
[dumpfile_size] 1680014
[savefile] "/home/ANT.AMAZON.COM/benh/hackplace/microwatt/litedram/extras/wave_tb.gtkw"
[timestart] 1920580000
[size] 2509 1371
[pos] -1 -1
*-24.248457 1935000000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] top.
[treeopen] top.dram_tb.
[treeopen] top.dram_tb.dram.
[sst_width] 301
[signals_width] 433
[sst_expanded] 1
[sst_vpaned_height] 410
@28
top.dram_tb.reset_acks
@420
top.dram_tb.acks
@28
top.dram_tb.rst
top.dram_tb.clk
@22
#{top.dram_tb.wb_in.dat[63:0]} top.dram_tb.wb_in.dat[63] top.dram_tb.wb_in.dat[62] top.dram_tb.wb_in.dat[61] top.dram_tb.wb_in.dat[60] top.dram_tb.wb_in.dat[59] top.dram_tb.wb_in.dat[58] top.dram_tb.wb_in.dat[57] top.dram_tb.wb_in.dat[56] top.dram_tb.wb_in.dat[55] top.dram_tb.wb_in.dat[54] top.dram_tb.wb_in.dat[53] top.dram_tb.wb_in.dat[52] top.dram_tb.wb_in.dat[51] top.dram_tb.wb_in.dat[50] top.dram_tb.wb_in.dat[49] top.dram_tb.wb_in.dat[48] top.dram_tb.wb_in.dat[47] top.dram_tb.wb_in.dat[46] top.dram_tb.wb_in.dat[45] top.dram_tb.wb_in.dat[44] top.dram_tb.wb_in.dat[43] top.dram_tb.wb_in.dat[42] top.dram_tb.wb_in.dat[41] top.dram_tb.wb_in.dat[40] top.dram_tb.wb_in.dat[39] top.dram_tb.wb_in.dat[38] top.dram_tb.wb_in.dat[37] top.dram_tb.wb_in.dat[36] top.dram_tb.wb_in.dat[35] top.dram_tb.wb_in.dat[34] top.dram_tb.wb_in.dat[33] top.dram_tb.wb_in.dat[32] top.dram_tb.wb_in.dat[31] top.dram_tb.wb_in.dat[30] top.dram_tb.wb_in.dat[29] top.dram_tb.wb_in.dat[28] top.dram_tb.wb_in.dat[27] top.dram_tb.wb_in.dat[26] top.dram_tb.wb_in.dat[25] top.dram_tb.wb_in.dat[24] top.dram_tb.wb_in.dat[23] top.dram_tb.wb_in.dat[22] top.dram_tb.wb_in.dat[21] top.dram_tb.wb_in.dat[20] top.dram_tb.wb_in.dat[19] top.dram_tb.wb_in.dat[18] top.dram_tb.wb_in.dat[17] top.dram_tb.wb_in.dat[16] top.dram_tb.wb_in.dat[15] top.dram_tb.wb_in.dat[14] top.dram_tb.wb_in.dat[13] top.dram_tb.wb_in.dat[12] top.dram_tb.wb_in.dat[11] top.dram_tb.wb_in.dat[10] top.dram_tb.wb_in.dat[9] top.dram_tb.wb_in.dat[8] top.dram_tb.wb_in.dat[7] top.dram_tb.wb_in.dat[6] top.dram_tb.wb_in.dat[5] top.dram_tb.wb_in.dat[4] top.dram_tb.wb_in.dat[3] top.dram_tb.wb_in.dat[2] top.dram_tb.wb_in.dat[1] top.dram_tb.wb_in.dat[0]
#{top.dram_tb.wb_in.adr[31:0]} top.dram_tb.wb_in.adr[31] top.dram_tb.wb_in.adr[30] top.dram_tb.wb_in.adr[29] top.dram_tb.wb_in.adr[28] top.dram_tb.wb_in.adr[27] top.dram_tb.wb_in.adr[26] top.dram_tb.wb_in.adr[25] top.dram_tb.wb_in.adr[24] top.dram_tb.wb_in.adr[23] top.dram_tb.wb_in.adr[22] top.dram_tb.wb_in.adr[21] top.dram_tb.wb_in.adr[20] top.dram_tb.wb_in.adr[19] top.dram_tb.wb_in.adr[18] top.dram_tb.wb_in.adr[17] top.dram_tb.wb_in.adr[16] top.dram_tb.wb_in.adr[15] top.dram_tb.wb_in.adr[14] top.dram_tb.wb_in.adr[13] top.dram_tb.wb_in.adr[12] top.dram_tb.wb_in.adr[11] top.dram_tb.wb_in.adr[10] top.dram_tb.wb_in.adr[9] top.dram_tb.wb_in.adr[8] top.dram_tb.wb_in.adr[7] top.dram_tb.wb_in.adr[6] top.dram_tb.wb_in.adr[5] top.dram_tb.wb_in.adr[4] top.dram_tb.wb_in.adr[3] top.dram_tb.wb_in.adr[2] top.dram_tb.wb_in.adr[1] top.dram_tb.wb_in.adr[0]
#{top.dram_tb.wb_in.sel[7:0]} top.dram_tb.wb_in.sel[7] top.dram_tb.wb_in.sel[6] top.dram_tb.wb_in.sel[5] top.dram_tb.wb_in.sel[4] top.dram_tb.wb_in.sel[3] top.dram_tb.wb_in.sel[2] top.dram_tb.wb_in.sel[1] top.dram_tb.wb_in.sel[0]
@28
top.dram_tb.wb_in.cyc
top.dram_tb.wb_in.stb
top.dram_tb.wb_in.we
top.dram_tb.wb_out.ack
top.dram_tb.wb_out.stall
@22
#{top.dram_tb.wb_out.dat[63:0]} top.dram_tb.wb_out.dat[63] top.dram_tb.wb_out.dat[62] top.dram_tb.wb_out.dat[61] top.dram_tb.wb_out.dat[60] top.dram_tb.wb_out.dat[59] top.dram_tb.wb_out.dat[58] top.dram_tb.wb_out.dat[57] top.dram_tb.wb_out.dat[56] top.dram_tb.wb_out.dat[55] top.dram_tb.wb_out.dat[54] top.dram_tb.wb_out.dat[53] top.dram_tb.wb_out.dat[52] top.dram_tb.wb_out.dat[51] top.dram_tb.wb_out.dat[50] top.dram_tb.wb_out.dat[49] top.dram_tb.wb_out.dat[48] top.dram_tb.wb_out.dat[47] top.dram_tb.wb_out.dat[46] top.dram_tb.wb_out.dat[45] top.dram_tb.wb_out.dat[44] top.dram_tb.wb_out.dat[43] top.dram_tb.wb_out.dat[42] top.dram_tb.wb_out.dat[41] top.dram_tb.wb_out.dat[40] top.dram_tb.wb_out.dat[39] top.dram_tb.wb_out.dat[38] top.dram_tb.wb_out.dat[37] top.dram_tb.wb_out.dat[36] top.dram_tb.wb_out.dat[35] top.dram_tb.wb_out.dat[34] top.dram_tb.wb_out.dat[33] top.dram_tb.wb_out.dat[32] top.dram_tb.wb_out.dat[31] top.dram_tb.wb_out.dat[30] top.dram_tb.wb_out.dat[29] top.dram_tb.wb_out.dat[28] top.dram_tb.wb_out.dat[27] top.dram_tb.wb_out.dat[26] top.dram_tb.wb_out.dat[25] top.dram_tb.wb_out.dat[24] top.dram_tb.wb_out.dat[23] top.dram_tb.wb_out.dat[22] top.dram_tb.wb_out.dat[21] top.dram_tb.wb_out.dat[20] top.dram_tb.wb_out.dat[19] top.dram_tb.wb_out.dat[18] top.dram_tb.wb_out.dat[17] top.dram_tb.wb_out.dat[16] top.dram_tb.wb_out.dat[15] top.dram_tb.wb_out.dat[14] top.dram_tb.wb_out.dat[13] top.dram_tb.wb_out.dat[12] top.dram_tb.wb_out.dat[11] top.dram_tb.wb_out.dat[10] top.dram_tb.wb_out.dat[9] top.dram_tb.wb_out.dat[8] top.dram_tb.wb_out.dat[7] top.dram_tb.wb_out.dat[6] top.dram_tb.wb_out.dat[5] top.dram_tb.wb_out.dat[4] top.dram_tb.wb_out.dat[3] top.dram_tb.wb_out.dat[2] top.dram_tb.wb_out.dat[1] top.dram_tb.wb_out.dat[0]
@28
top.dram_tb.rd_valid
top.dram_tb.rd_ready
@22
#{top.dram_tb.rd_data[63:0]} top.dram_tb.rd_data[63] top.dram_tb.rd_data[62] top.dram_tb.rd_data[61] top.dram_tb.rd_data[60] top.dram_tb.rd_data[59] top.dram_tb.rd_data[58] top.dram_tb.rd_data[57] top.dram_tb.rd_data[56] top.dram_tb.rd_data[55] top.dram_tb.rd_data[54] top.dram_tb.rd_data[53] top.dram_tb.rd_data[52] top.dram_tb.rd_data[51] top.dram_tb.rd_data[50] top.dram_tb.rd_data[49] top.dram_tb.rd_data[48] top.dram_tb.rd_data[47] top.dram_tb.rd_data[46] top.dram_tb.rd_data[45] top.dram_tb.rd_data[44] top.dram_tb.rd_data[43] top.dram_tb.rd_data[42] top.dram_tb.rd_data[41] top.dram_tb.rd_data[40] top.dram_tb.rd_data[39] top.dram_tb.rd_data[38] top.dram_tb.rd_data[37] top.dram_tb.rd_data[36] top.dram_tb.rd_data[35] top.dram_tb.rd_data[34] top.dram_tb.rd_data[33] top.dram_tb.rd_data[32] top.dram_tb.rd_data[31] top.dram_tb.rd_data[30] top.dram_tb.rd_data[29] top.dram_tb.rd_data[28] top.dram_tb.rd_data[27] top.dram_tb.rd_data[26] top.dram_tb.rd_data[25] top.dram_tb.rd_data[24] top.dram_tb.rd_data[23] top.dram_tb.rd_data[22] top.dram_tb.rd_data[21] top.dram_tb.rd_data[20] top.dram_tb.rd_data[19] top.dram_tb.rd_data[18] top.dram_tb.rd_data[17] top.dram_tb.rd_data[16] top.dram_tb.rd_data[15] top.dram_tb.rd_data[14] top.dram_tb.rd_data[13] top.dram_tb.rd_data[12] top.dram_tb.rd_data[11] top.dram_tb.rd_data[10] top.dram_tb.rd_data[9] top.dram_tb.rd_data[8] top.dram_tb.rd_data[7] top.dram_tb.rd_data[6] top.dram_tb.rd_data[5] top.dram_tb.rd_data[4] top.dram_tb.rd_data[3] top.dram_tb.rd_data[2] top.dram_tb.rd_data[1] top.dram_tb.rd_data[0]
@200
-
-
-wrapper
@28
top.dram_tb.dram.accept_store
@420
top.dram_tb.dram.req_op
top.dram_tb.dram.state
@28
top.dram_tb.dram.read_ack_1
top.dram_tb.dram.read_ack_0
top.dram_tb.dram.storeq_wr_valid
top.dram_tb.dram.storeq_wr_ready
top.dram_tb.dram.storeq_rd_valid
top.dram_tb.dram.storeq_rd_ready
top.dram_tb.dram.user_port0_rdata_ready
top.dram_tb.dram.user_port0_rdata_valid
top.dram_tb.dram.user_port0_wdata_ready
top.dram_tb.dram.user_port0_wdata_valid
top.dram_tb.dram.user_port0_cmd_we
top.dram_tb.dram.user_port0_cmd_ready
top.dram_tb.dram.user_port0_cmd_valid
top.dram_tb.dram.refill_cmd_valid
@420
top.dram_tb.dram.req_index
@421
top.dram_tb.dram.req_row
@420
top.dram_tb.dram.req_hit_way
@28
top.dram_tb.dram.req_ad3
@420
top.dram_tb.dram.refill_row
top.dram_tb.dram.refill_index
top.dram_tb.dram.refill_way
@28
top.dram_tb.dram.system_clk
[pattern_trace] 1
[pattern_trace] 0

@ -1,37 +0,0 @@
# This file is Copyright (c) 2018-2019 Florent Kermarrec <florent@enjoy-digital.fr>
# License: BSD

{
# General ------------------------------------------------------------------
"cpu": "None", # CPU type (ex vexriscv, serv, None)
"speedgrade": -2, # FPGA speedgrade
"memtype": "DDR3", # DRAM type

# PHY ----------------------------------------------------------------------
"cmd_latency": 0, # Command additional latency
"sdram_module": "MT41K512M16", # SDRAM modules of the board or SO-DIMM
"sdram_module_nb": 2, # Number of byte groups
"sdram_rank_nb": 1, # Number of ranks
"sdram_phy": "A7DDRPHY", # Type of FPGA PHY

# Electrical ---------------------------------------------------------------
"rtt_nom": "60ohm", # Nominal termination
"rtt_wr": "60ohm", # Write termination
"ron": "34ohm", # Output driver impedance

# Frequency ----------------------------------------------------------------
"input_clk_freq": 200e6, # Input clock frequency
"sys_clk_freq": 100e6, # System clock frequency (DDR_clk = 4 x sys_clk)
"iodelay_clk_freq": 200e6, # IODELAYs reference clock frequency

# Core ---------------------------------------------------------------------
"cmd_buffer_depth": 16, # Depth of the command buffer

# User Ports ---------------------------------------------------------------
"user_ports": {
"native_0": {
"type": "native",
"block_until_ready": False,
},
},
}

@ -1,37 +0,0 @@
# This file is Copyright (c) 2018-2019 Florent Kermarrec <florent@enjoy-digital.fr>
# License: BSD

{
# General ------------------------------------------------------------------
"cpu": "None", # CPU type (ex vexriscv, serv, None)
"speedgrade": -1, # FPGA speedgrade
"memtype": "DDR3", # DRAM type

# PHY ----------------------------------------------------------------------
"cmd_latency": 0, # Command additional latency
"sdram_module": "MT41K128M16", # SDRAM modules of the board or SO-DIMM
"sdram_module_nb": 2, # Number of byte groups
"sdram_rank_nb": 1, # Number of ranks
"sdram_phy": "A7DDRPHY", # Type of FPGA PHY

# Electrical ---------------------------------------------------------------
"rtt_nom": "60ohm", # Nominal termination
"rtt_wr": "60ohm", # Write termination
"ron": "34ohm", # Output driver impedance

# Frequency ----------------------------------------------------------------
"input_clk_freq": 100e6, # Input clock frequency
"sys_clk_freq": 100e6, # System clock frequency (DDR_clk = 4 x sys_clk)
"iodelay_clk_freq": 200e6, # IODELAYs reference clock frequency

# Core ---------------------------------------------------------------------
"cmd_buffer_depth": 16, # Depth of the command buffer

# User Ports ---------------------------------------------------------------
"user_ports": {
"native_0": {
"type": "native",
"block_until_ready": False,
},
},
}

@ -1,123 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.wishbone_types.all;
use work.utils.all;

entity dram_init_mem is
generic (
EXTRA_PAYLOAD_FILE : string := "";
EXTRA_PAYLOAD_SIZE : integer := 0
);
port (
clk : in std_ulogic;
wb_in : in wb_io_master_out;
wb_out : out wb_io_slave_out
);
end entity dram_init_mem;

architecture rtl of dram_init_mem is

constant INIT_RAM_SIZE : integer := 24576;
constant RND_PAYLOAD_SIZE : integer := round_up(EXTRA_PAYLOAD_SIZE, 8);
constant TOTAL_RAM_SIZE : integer := INIT_RAM_SIZE + RND_PAYLOAD_SIZE;
constant INIT_RAM_ABITS : integer := log2ceil(TOTAL_RAM_SIZE-1);
constant INIT_RAM_FILE : string := "litedram_core.init";

type ram_t is array(0 to (TOTAL_RAM_SIZE / 4) - 1) of std_logic_vector(31 downto 0);

-- XXX FIXME: Have a single init function called twice with
-- an offset as argument
procedure init_load_payload(ram: inout ram_t; filename: string) is
file payload_file : text open read_mode is filename;
variable ram_line : line;
variable temp_word : std_logic_vector(63 downto 0);
begin
for i in 0 to RND_PAYLOAD_SIZE-1 loop
exit when endfile(payload_file);
readline(payload_file, ram_line);
hread(ram_line, temp_word);
ram((INIT_RAM_SIZE/4) + i*2) := temp_word(31 downto 0);
ram((INIT_RAM_SIZE/4) + i*2+1) := temp_word(63 downto 32);
end loop;
assert endfile(payload_file) report "Payload too big !" severity failure;
end procedure;

impure function init_load_ram(name : string) return ram_t is
file ram_file : text open read_mode is name;
variable temp_word : std_logic_vector(63 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
variable ram_line : line;
begin
report "Payload size:" & integer'image(EXTRA_PAYLOAD_SIZE) &
" rounded to:" & integer'image(RND_PAYLOAD_SIZE);
report "Total RAM size:" & integer'image(TOTAL_RAM_SIZE) &
" bytes using " & integer'image(INIT_RAM_ABITS) &
" address bits";
for i in 0 to (INIT_RAM_SIZE/8)-1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i*2) := temp_word(31 downto 0);
temp_ram(i*2+1) := temp_word(63 downto 32);
end loop;
if RND_PAYLOAD_SIZE /= 0 then
init_load_payload(temp_ram, EXTRA_PAYLOAD_FILE);
end if;
return temp_ram;
end function;

impure function init_zero return ram_t is
variable temp_ram : ram_t := (others => (others => '0'));
begin
return temp_ram;
end function;

impure function initialize_ram(filename: string) return ram_t is
begin
report "Opening file " & filename;
if filename'length = 0 then
return init_zero;
else
return init_load_ram(filename);
end if;
end function;
signal init_ram : ram_t := initialize_ram(INIT_RAM_FILE);

attribute ram_style : string;
attribute ram_style of init_ram: signal is "block";

signal obuf : std_ulogic_vector(31 downto 0);
signal oack : std_ulogic;
begin

init_ram_0: process(clk)
variable adr : integer;
begin
if rising_edge(clk) then
oack <= '0';
if (wb_in.cyc and wb_in.stb) = '1' then
adr := to_integer((unsigned(wb_in.adr(INIT_RAM_ABITS - 3 downto 0))));
if wb_in.we = '0' then
obuf <= init_ram(adr);
else
for i in 0 to 3 loop
if wb_in.sel(i) = '1' then
init_ram(adr)(((i + 1) * 8) - 1 downto i * 8) <=
wb_in.dat(((i + 1) * 8) - 1 downto i * 8);
end if;
end loop;
end if;
oack <= '1';
end if;
wb_out.ack <= oack;
wb_out.dat <= obuf;
end if;
end process;

wb_out.stall <= '0';

end architecture rtl;

@ -1,108 +0,0 @@
#!/usr/bin/python3

from litex.build.tools import write_to_file
from litex.build.tools import replace_in_file
from litedram.gen import *
import subprocess
import os
import shutil

def make_new_dir(base, added):
r = os.path.join(base, added)
if os.path.exists(r):
shutil.rmtree(r)
os.mkdir(r)
return r
gen_src_dir = os.path.dirname(os.path.realpath(__file__))
base_dir = os.path.normpath(os.path.join(gen_src_dir, os.pardir))
build_top_dir = make_new_dir(base_dir, "build")
gen_src_dir = os.path.join(base_dir, "gen-src")
gen_dir = make_new_dir(base_dir, "generated")

# Build the init code for microwatt-initialized DRAM
def build_init_code(build_dir, is_sim):

# More path fudging
sw_dir = os.path.join(build_dir, "software");
sw_inc_dir = os.path.join(sw_dir, "include")
gen_inc_dir = os.path.join(sw_inc_dir, "generated")
src_dir = os.path.join(gen_src_dir, "sdram_init")
lxbios_src_dir = os.path.join(soc_directory, "software")
print(" sw dir:", sw_dir)
print("gen_inc_dir:", gen_inc_dir)
print(" src dir:", src_dir)
print(" lx src dir:", lxbios_src_dir)

# Generate mem.h (hard wire size, it's not important)
mem_h = "#define MAIN_RAM_BASE 0x40000000UL\n#define MAIN_RAM_SIZE 0x10000000UL\n"
write_to_file(os.path.join(gen_inc_dir, "mem.h"), mem_h)

# Environment
env_vars = []
def _makefile_escape(s): # From LiteX
return s.replace("\\", "\\\\")
def add_var(k, v):
env_vars.append("{}={}\n".format(k, _makefile_escape(v)))

makefile = os.path.join(src_dir, "Makefile")
cmd = ["make", "-C", build_dir, "-f", makefile]
cmd.append("BUILD_DIR=%s" % sw_dir)
cmd.append("SRC_DIR=%s" % src_dir)
cmd.append("GENINC_DIR=%s" % sw_inc_dir)
cmd.append("LXSRC_DIR=%s" % lxbios_src_dir)

if is_sim:
cmd.append("EXTRA_CFLAGS=%s" % "-D__SIM__")

# Build init code
print(" Generating init software...")
r = subprocess.check_call(cmd)
print("Make result:", r)

return os.path.join(sw_dir, "obj", "sdram_init.hex")

def generate_one(t):

print("Generating target:", t)

# Is it a simulation ?
is_sim = "sim" in t

# Muck with directory path
build_dir = make_new_dir(build_top_dir, t)
t_dir = make_new_dir(gen_dir, t)

cmd = ["litedram_gen", "--output-dir=%s" % build_dir]
if is_sim:
cmd.append("--sim")
cmd.append("%s.yml" % t)
subprocess.check_call(cmd)

# Grab generated gatewar dir
gw_dir = os.path.join(build_dir, "gateware")

# Generate init code
src_init_file = build_init_code(build_dir, is_sim)
src_initram_file = os.path.join(gen_src_dir, "dram-init-mem.vhdl")

# Copy generated files to target dir, amend them if necessary
initfile_name = "litedram_core.init"
core_file = os.path.join(gw_dir, "litedram_core.v")
dst_init_file = os.path.join(t_dir, initfile_name)
dst_initram_file = os.path.join(t_dir, "litedram-initmem.vhdl")
shutil.copyfile(src_init_file, dst_init_file)
shutil.copyfile(src_initram_file, dst_initram_file)
if is_sim:
initfile_path = os.path.join("litedram", "generated", "sim", initfile_name)
replace_in_file(dst_initram_file, initfile_name, initfile_path)
shutil.copy(core_file, t_dir)

def main():

targets = ['arty','nexys-video', 'genesys2', 'acorn-cle-215', 'wukong-v2', 'orangecrab-85-0.2', 'sim']
for t in targets:
generate_one(t)
if __name__ == "__main__":
main()

@ -1,37 +0,0 @@
# This file is Copyright (c) 2018-2019 Florent Kermarrec <florent@enjoy-digital.fr>
# License: BSD

{
# General ------------------------------------------------------------------
"cpu": "None", # CPU type (ex vexriscv, serv, None)
"speedgrade": -2, # FPGA speedgrade
"memtype": "DDR3", # DRAM type

# PHY ----------------------------------------------------------------------
"cmd_latency": 1, # Command additional latency
"sdram_module": "MT41J256M16", # SDRAM modules of the board or SO-DIMM
"sdram_module_nb": 4, # Number of byte groups
"sdram_rank_nb": 1, # Number of ranks
"sdram_phy": "K7DDRPHY", # Type of FPGA PHY

# Electrical ---------------------------------------------------------------
"rtt_nom": "60ohm", # Nominal termination
"rtt_wr": "60ohm", # Write termination
"ron": "34ohm", # Output driver impedance

# Frequency ----------------------------------------------------------------
"input_clk_freq": 200e6, # Input clock frequency
"sys_clk_freq": 100e6, # System clock frequency (DDR_clk = 4 x sys_clk)
"iodelay_clk_freq": 200e6, # IODELAYs reference clock frequency

# Core ---------------------------------------------------------------------
"cmd_buffer_depth": 16, # Depth of the command buffer

# User Ports ---------------------------------------------------------------
"user_ports": {
"native_0": {
"type": "native",
"block_until_ready": False,
},
},
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save