diff --git a/cmake/modules/VTA.cmake b/cmake/modules/VTA.cmake index 0e58d760be5e..280d340b1632 100644 --- a/cmake/modules/VTA.cmake +++ b/cmake/modules/VTA.cmake @@ -101,7 +101,9 @@ elseif(PYTHON) ${VTA_TARGET} STREQUAL "ultra96") target_link_libraries(vta ${__cma_lib}) elseif(${VTA_TARGET} STREQUAL "de10nano") # DE10-Nano rules - target_compile_definitions(vta PUBLIC VTA_MAX_XFER=2097152) # (1<<21) + #target_compile_definitions(vta PUBLIC VTA_MAX_XFER=2097152) # (1<<21) + target_include_directories(vta PUBLIC vta/src/de10nano) + target_include_directories(vta PUBLIC 3rdparty) target_include_directories(vta PUBLIC "/usr/local/intelFPGA_lite/18.1/embedded/ds-5/sw/gcc/arm-linux-gnueabihf/include") endif() diff --git a/docs/vta/install.md b/docs/vta/install.md index dfa0eae71929..0738050c81c7 100644 --- a/docs/vta/install.md +++ b/docs/vta/install.md @@ -146,8 +146,8 @@ Tips regarding the Pynq RPC Server: Before running the examples on your development machine, you'll need to configure your host environment as follows: ```bash # On the Host-side -export VTA_PYNQ_RPC_HOST=192.168.2.99 -export VTA_PYNQ_RPC_PORT=9091 +export VTA_RPC_HOST=192.168.2.99 +export VTA_RPC_PORT=9091 ``` In addition, you'll need to edit the `vta_config.json` file on the host to indicate that we are targeting the Pynq platform, by setting the `TARGET` field to `"pynq"`. diff --git a/vta/config/de10nano_sample.json b/vta/config/de10nano_sample.json index c7560ccc0f57..e4148c3e8ecf 100644 --- a/vta/config/de10nano_sample.json +++ b/vta/config/de10nano_sample.json @@ -7,7 +7,7 @@ "LOG_BATCH" : 0, "LOG_BLOCK" : 4, "LOG_UOP_BUFF_SIZE" : 15, - "LOG_INP_BUFF_SIZE" :15, + "LOG_INP_BUFF_SIZE" : 15, "LOG_WGT_BUFF_SIZE" : 18, "LOG_ACC_BUFF_SIZE" : 17 } diff --git a/vta/config/pynq_sample.json b/vta/config/pynq_sample.json index 380984a28972..7a2664105f76 100644 --- a/vta/config/pynq_sample.json +++ b/vta/config/pynq_sample.json @@ -7,7 +7,7 @@ "LOG_BATCH" : 0, "LOG_BLOCK" : 4, "LOG_UOP_BUFF_SIZE" : 15, - "LOG_INP_BUFF_SIZE" :15, + "LOG_INP_BUFF_SIZE" : 15, "LOG_WGT_BUFF_SIZE" : 18, "LOG_ACC_BUFF_SIZE" : 17 } diff --git a/vta/config/ultra96_sample.json b/vta/config/ultra96_sample.json index 013420cff52e..35b5a7e322f0 100644 --- a/vta/config/ultra96_sample.json +++ b/vta/config/ultra96_sample.json @@ -7,7 +7,7 @@ "LOG_BATCH" : 0, "LOG_BLOCK" : 4, "LOG_UOP_BUFF_SIZE" : 15, - "LOG_INP_BUFF_SIZE" :15, + "LOG_INP_BUFF_SIZE" : 15, "LOG_WGT_BUFF_SIZE" : 18, "LOG_ACC_BUFF_SIZE" : 17 } diff --git a/vta/hardware/chisel/Makefile b/vta/hardware/chisel/Makefile index 9804230074ac..21149dfb83e3 100644 --- a/vta/hardware/chisel/Makefile +++ b/vta/hardware/chisel/Makefile @@ -32,16 +32,36 @@ ifeq (, $(VERILATOR_INC_DIR)) endif endif -CONFIG = DefaultPynqConfig +CONFIG = DefaultDe10Config TOP = VTA TOP_TEST = Test BUILD_NAME = build +# Set USE_TRACE = 1 to generate a trace during simulation. USE_TRACE = 0 +# With USE_TRACE = 1, default trace format is VCD. +# Set USE_TRACE_FST = 1 to use the FST format. +# Note that although FST is around two orders of magnitude smaller than VCD +# it is also currently much slower to produce (verilator limitation). But if +# you are low on disk space it may be your only option. +USE_TRACE_FST = 0 +# With USE_TRACE = 1, USE_TRACE_DETAILED = 1 will generate traces that also +# include non-interface internal signal names starting with an underscore. +# This will significantly increase the trace size and should only be used +# on a per need basis for difficult debug problems. +USE_TRACE_DETAILED = 0 USE_THREADS = $(shell nproc) VTA_LIBNAME = libvta_hw UNITTEST_NAME = all CXX = g++ +# A debug build with DEBUG = 1 is useful to trace the simulation with a +# debugger. DEBUG = 0 +# With DEBUG = 1, SANITIZE = 1 turns on address sanitizing to verify that +# the verilator build is sane. To be used if you know what you are doing. +SANITIZE = 0 + +CXX_MAJOR := $(shell $(CXX) -dumpversion | sed 's/\..*//') +CXX_HAS_ALIGN_NEW := $(shell [ $(CXX_MAJOR) -ge 7 ] && echo true) config_test = $(TOP_TEST)$(CONFIG) vta_dir = $(abspath ../../) @@ -61,11 +81,15 @@ verilator_opt += -Mdir ${verilator_build_dir} verilator_opt += -I$(chisel_build_dir) ifeq ($(DEBUG), 0) - cxx_flags = -O2 -Wall + cxx_flags = -O2 -Wall -fvisibility=hidden else cxx_flags = -O0 -g -Wall endif -cxx_flags += -fvisibility=hidden -std=c++11 + +cxx_flags += -std=c++11 -Wno-maybe-uninitialized +ifeq ($(CXX_HAS_ALIGN_NEW),true) + cxx_flags += -faligned-new +endif cxx_flags += -DVL_TSIM_NAME=V$(TOP_TEST) cxx_flags += -DVL_PRINTF=printf cxx_flags += -DVL_USER_FINISH @@ -82,13 +106,33 @@ cxx_flags += -I$(tvm_dir)/3rdparty/dlpack/include ld_flags = -fPIC -shared +ifeq ($(SANITIZE), 1) + ifeq ($(DEBUG), 1) + cxx_flags += -fno-omit-frame-pointer -fsanitize=address -fsanitize-recover=address + ld_flags += -fno-omit-frame-pointer -fsanitize=address -fsanitize-recover=address + endif +endif + cxx_objs = $(verilator_build_dir)/verilated.o $(verilator_build_dir)/verilated_dpi.o $(verilator_build_dir)/tsim_device.o ifneq ($(USE_TRACE), 0) - verilator_opt += --trace cxx_flags += -DVM_TRACE=1 - cxx_flags += -DTSIM_TRACE_FILE=$(verilator_build_dir)/$(TOP_TEST).vcd - cxx_objs += $(verilator_build_dir)/verilated_vcd_c.o + ifeq ($(USE_TRACE_FST), 1) + cxx_flags += -DVM_TRACE_FST + verilator_opt += --trace-fst + else + verilator_opt += --trace + endif + ifeq ($(USE_TRACE_DETAILED), 1) + verilator_opt += --trace-underscore --trace-structs + endif + ifeq ($(USE_TRACE_FST), 1) + cxx_flags += -DTSIM_TRACE_FILE=$(verilator_build_dir)/$(TOP_TEST).fst + cxx_objs += $(verilator_build_dir)/verilated_fst_c.o + else + cxx_flags += -DTSIM_TRACE_FILE=$(verilator_build_dir)/$(TOP_TEST).vcd + cxx_objs += $(verilator_build_dir)/verilated_vcd_c.o + endif else cxx_flags += -DVM_TRACE=0 endif diff --git a/vta/hardware/chisel/src/main/scala/core/Compute.scala b/vta/hardware/chisel/src/main/scala/core/Compute.scala index c605a1a1a824..a1e7fadd96cf 100644 --- a/vta/hardware/chisel/src/main/scala/core/Compute.scala +++ b/vta/hardware/chisel/src/main/scala/core/Compute.scala @@ -45,6 +45,7 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { val wgt = new TensorMaster(tensorType = "wgt") val out = new TensorMaster(tensorType = "out") val finish = Output(Bool()) + val acc_wr_event = Output(Bool()) }) val sIdle :: sSync :: sExe :: Nil = Enum(3) val state = RegInit(sIdle) @@ -125,6 +126,7 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { tensorAcc.io.tensor.rd.idx <> Mux(dec.io.isGemm, tensorGemm.io.acc.rd.idx, tensorAlu.io.acc.rd.idx) tensorAcc.io.tensor.wr <> Mux(dec.io.isGemm, tensorGemm.io.acc.wr, tensorAlu.io.acc.wr) io.vme_rd(1) <> tensorAcc.io.vme_rd + io.acc_wr_event := tensorAcc.io.tensor.wr.valid // gemm tensorGemm.io.start := state === sIdle & start & dec.io.isGemm diff --git a/vta/hardware/chisel/src/main/scala/core/Core.scala b/vta/hardware/chisel/src/main/scala/core/Core.scala index 6bfffdc212b7..e2ac51a55d48 100644 --- a/vta/hardware/chisel/src/main/scala/core/Core.scala +++ b/vta/hardware/chisel/src/main/scala/core/Core.scala @@ -111,6 +111,8 @@ class Core(implicit p: Parameters) extends Module { ecounters.io.launch := io.vcr.launch ecounters.io.finish := compute.io.finish io.vcr.ecnt <> ecounters.io.ecnt + io.vcr.ucnt <> ecounters.io.ucnt + ecounters.io.acc_wr_event := compute.io.acc_wr_event // Finish instruction is executed and asserts the VCR finish flag val finish = RegNext(compute.io.finish) diff --git a/vta/hardware/chisel/src/main/scala/core/EventCounters.scala b/vta/hardware/chisel/src/main/scala/core/EventCounters.scala index f9fd7f0be105..5ef358627fec 100644 --- a/vta/hardware/chisel/src/main/scala/core/EventCounters.scala +++ b/vta/hardware/chisel/src/main/scala/core/EventCounters.scala @@ -44,6 +44,8 @@ class EventCounters(debug: Boolean = false)(implicit p: Parameters) extends Modu val launch = Input(Bool()) val finish = Input(Bool()) val ecnt = Vec(vp.nECnt, ValidIO(UInt(vp.regBits.W))) + val ucnt = Vec(vp.nUCnt, ValidIO(UInt(vp.regBits.W))) + val acc_wr_event = Input(Bool()) }) val cycle_cnt = RegInit(0.U(vp.regBits.W)) when(io.launch && !io.finish) { @@ -53,4 +55,13 @@ class EventCounters(debug: Boolean = false)(implicit p: Parameters) extends Modu } io.ecnt(0).valid := io.finish io.ecnt(0).bits := cycle_cnt + + val acc_wr_count = Reg(UInt(vp.regBits.W)) + when (!io.launch || io.finish) { + acc_wr_count := 0.U + }.elsewhen (io.acc_wr_event) { + acc_wr_count := acc_wr_count + 1.U + } + io.ucnt(0).valid := io.finish + io.ucnt(0).bits := acc_wr_count } diff --git a/vta/hardware/chisel/src/main/scala/core/LoadUop.scala b/vta/hardware/chisel/src/main/scala/core/LoadUop.scala index 274581f475b3..87bd50858f2e 100644 --- a/vta/hardware/chisel/src/main/scala/core/LoadUop.scala +++ b/vta/hardware/chisel/src/main/scala/core/LoadUop.scala @@ -112,14 +112,18 @@ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module { when(xcnt === xlen) { when(xrem === 0.U) { state := sIdle - }.elsewhen(xrem < xmax) { - state := sReadCmd - xlen := xrem - xrem := 0.U }.otherwise { - state := sReadCmd - xlen := xmax - 1.U - xrem := xrem - xmax + raddr := raddr + xmax_bytes + when(xrem < xmax) { + state := sReadCmd + xlen := xrem + xrem := 0.U + } + .otherwise { + state := sReadCmd + xlen := xmax - 1.U + xrem := xrem - xmax + } } } } @@ -134,8 +138,6 @@ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module { }.otherwise { raddr := (io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(uopBytes)))) - uopBytes.U } - }.elsewhen(state === sReadData && xcnt === xlen && xrem =/= 0.U) { - raddr := raddr + xmax_bytes } io.vme_rd.cmd.valid := state === sReadCmd diff --git a/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala b/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala index a6feffaa18dc..6af3c834e451 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala @@ -72,7 +72,6 @@ class AluReg(implicit p: Parameters) extends Module { /** Vector of pipeline ALUs */ class AluVector(implicit p: Parameters) extends Module { - val aluBits = p(CoreKey).accBits val io = IO(new Bundle { val opcode = Input(UInt(C_ALU_OP_BITS.W)) val acc_a = new TensorMasterData(tensorType = "acc") diff --git a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala index f5cc849547a6..5ab690d8637c 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala @@ -103,8 +103,7 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( state := sXPad1 }.elsewhen(dec.ypad_1 =/= 0.U) { state := sYPad1 - } - .otherwise { + }.otherwise { state := sIdle } }.elsewhen(dataCtrl.io.stride) { @@ -198,11 +197,9 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( tag := tag + 1.U } - when( - state === sIdle || dataCtrlDone || (set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U)) { + when(state === sIdle || dataCtrlDone || (set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U)) { set := 0.U - }.elsewhen( - (io.vme_rd.data.fire() || isZeroPad) && tag === (tp.numMemBlock - 1).U) { + }.elsewhen((io.vme_rd.data.fire() || isZeroPad) && tag === (tp.numMemBlock - 1).U) { set := set + 1.U } @@ -211,10 +208,12 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( when(state === sIdle) { waddr_cur := dec.sram_offset waddr_nxt := dec.sram_offset - }.elsewhen((io.vme_rd.data - .fire() || isZeroPad) && set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U) { + }.elsewhen((io.vme_rd.data.fire() || isZeroPad) + && set === (tp.tensorLength - 1).U + && tag === (tp.numMemBlock - 1).U) + { waddr_cur := waddr_cur + 1.U - }.elsewhen(dataCtrl.io.stride) { + }.elsewhen(dataCtrl.io.stride && io.vme_rd.data.fire()) { waddr_cur := waddr_nxt + dec.xsize waddr_nxt := waddr_nxt + dec.xsize } @@ -261,8 +260,7 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( } // done - val done_no_pad = io.vme_rd.data - .fire() & dataCtrl.io.done & dec.xpad_1 === 0.U & dec.ypad_1 === 0.U + val done_no_pad = io.vme_rd.data.fire() & dataCtrl.io.done & dec.xpad_1 === 0.U & dec.ypad_1 === 0.U val done_x_pad = state === sXPad1 & xPadCtrl1.io.done & dataCtrlDone & dec.ypad_1 === 0.U val done_y_pad = state === sYPad1 & dataCtrlDone & yPadCtrl1.io.done io.done := done_no_pad | done_x_pad | done_y_pad diff --git a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala index 439023be0934..9b4bf748a3a5 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala @@ -62,20 +62,38 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( val tag = Reg(UInt(8.W)) val set = Reg(UInt(8.W)) + val xfer_bytes = Reg(chiselTypeOf(io.vme_wr.cmd.bits.addr)) + val xstride_bytes = dec.xstride << log2Ceil(tensorLength * tensorWidth) + val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt + val elemBytes = (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).outBits) / 8 + val pulse_bytes_bits = log2Ceil(mp.dataBits >> 3) + + val xfer_init_addr = io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) + val xfer_split_addr = waddr_cur + xfer_bytes + val xfer_stride_addr = waddr_nxt + xstride_bytes + + val xfer_init_bytes = xmax_bytes - xfer_init_addr % xmax_bytes + val xfer_init_pulses = xfer_init_bytes >> pulse_bytes_bits + val xfer_split_bytes = xmax_bytes - xfer_split_addr % xmax_bytes + val xfer_split_pulses = xfer_split_bytes >> pulse_bytes_bits + val xfer_stride_bytes = xmax_bytes - xfer_stride_addr % xmax_bytes + val xfer_stride_pulses= xfer_stride_bytes >> pulse_bytes_bits + val sIdle :: sWriteCmd :: sWriteData :: sReadMem :: sWriteAck :: Nil = Enum(5) val state = RegInit(sIdle) // control switch(state) { is(sIdle) { - when(io.start) { + xfer_bytes := xfer_init_bytes + when (io.start) { state := sWriteCmd - when(xsize < xmax) { + when (xsize < xfer_init_pulses) { xlen := xsize xrem := 0.U }.otherwise { - xlen := xmax - 1.U - xrem := xsize - xmax + xlen := xfer_init_pulses - 1.U + xrem := xsize - xfer_init_pulses } } } @@ -101,24 +119,29 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( when(xrem === 0.U) { when(ycnt === ysize - 1.U) { state := sIdle - }.otherwise { + }.otherwise { // stride state := sWriteCmd - when(xsize < xmax) { + xfer_bytes := xfer_stride_bytes + when(xsize < xfer_stride_pulses) { xlen := xsize xrem := 0.U }.otherwise { - xlen := xmax - 1.U - xrem := xsize - xmax + xlen := xfer_stride_pulses - 1.U + xrem := xsize - xfer_stride_pulses } } - }.elsewhen(xrem < xmax) { + } // split + .elsewhen(xrem < xfer_split_pulses) { state := sWriteCmd + xfer_bytes := xfer_split_bytes xlen := xrem xrem := 0.U - }.otherwise { + } + .otherwise { state := sWriteCmd - xlen := xmax - 1.U - xrem := xrem - xmax + xfer_bytes := xfer_split_bytes + xlen := xfer_split_pulses - 1.U + xrem := xrem - xfer_split_pulses } } } @@ -174,8 +197,7 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( when(state === sIdle) { raddr_cur := dec.sram_offset raddr_nxt := dec.sram_offset - }.elsewhen(io.vme_wr.data - .fire() && set === (tensorLength - 1).U && tag === (numMemBlock - 1).U) { + }.elsewhen(io.vme_wr.data.fire() && set === (tensorLength - 1).U && tag === (numMemBlock - 1).U) { raddr_cur := raddr_cur + 1.U }.elsewhen(stride) { raddr_cur := raddr_nxt + dec.xsize @@ -189,18 +211,14 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( val mdata = MuxLookup(set, 0.U.asTypeOf(chiselTypeOf(wdata_t)), tread) // write-to-dram - val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt - val elemBytes = (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).outBits) / 8 when(state === sIdle) { - waddr_cur := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil( - elemBytes))) - waddr_nxt := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil( - elemBytes))) + waddr_cur := xfer_init_addr + waddr_nxt := xfer_init_addr }.elsewhen(state === sWriteAck && io.vme_wr.ack && xrem =/= 0.U) { - waddr_cur := waddr_cur + xmax_bytes + waddr_cur := xfer_split_addr }.elsewhen(stride) { - waddr_cur := waddr_nxt + (dec.xstride << log2Ceil(tensorLength * tensorWidth)) - waddr_nxt := waddr_nxt + (dec.xstride << log2Ceil(tensorLength * tensorWidth)) + waddr_cur := xfer_stride_addr + waddr_nxt := xfer_stride_addr } io.vme_wr.cmd.valid := state === sWriteCmd diff --git a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala index 6e6f7e776c0e..d0a8ba7ef647 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala @@ -252,8 +252,16 @@ class TensorDataCtrl(tensorType: String = "none", val caddr = Reg(UInt(mp.addrBits.W)) val baddr = Reg(UInt(mp.addrBits.W)) - val len = Reg(UInt(mp.lenBits.W)) + val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt + val elemBytes = + if (tensorType == "inp") { + (p(CoreKey).batch * p(CoreKey).blockIn * p(CoreKey).inpBits) / 8 + } else if (tensorType == "wgt") { + (p(CoreKey).blockOut * p(CoreKey).blockIn * p(CoreKey).wgtBits) / 8 + } else { + (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).accBits) / 8 + } val xmax_bytes = ((1 << mp.lenBits) * mp.dataBits / 8).U val xcnt = Reg(UInt(mp.lenBits.W)) @@ -262,27 +270,53 @@ class TensorDataCtrl(tensorType: String = "none", val xmax = (1 << mp.lenBits).U val ycnt = Reg(chiselTypeOf(dec.ysize)) + val xfer_bytes = Reg(UInt(mp.addrBits.W)) + val pulse_bytes_bits = log2Ceil(mp.dataBits >> 3) + val xstride_bytes = dec.xstride << log2Ceil(elemBytes) + + val xfer_init_addr = io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) + val xfer_split_addr = caddr + xfer_bytes + val xfer_stride_addr = baddr + xstride_bytes + + val xfer_init_bytes = xmax_bytes - xfer_init_addr % xmax_bytes + val xfer_init_pulses = xfer_init_bytes >> pulse_bytes_bits + val xfer_split_bytes = xmax_bytes - xfer_split_addr % xmax_bytes + val xfer_split_pulses = xfer_split_bytes >> pulse_bytes_bits + val xfer_stride_bytes = xmax_bytes - xfer_stride_addr % xmax_bytes + val xfer_stride_pulses= xfer_stride_bytes >> pulse_bytes_bits + val stride = xcnt === len & xrem === 0.U & ycnt =/= dec.ysize - 1.U val split = xcnt === len & xrem =/= 0.U - when(io.start || (io.xupdate && stride)) { - when(xsize < xmax) { + when(io.start) { + xfer_bytes := xfer_init_bytes + when(xsize < xfer_init_pulses) { len := xsize xrem := 0.U }.otherwise { - len := xmax - 1.U - xrem := xsize - xmax + len := xfer_init_pulses - 1.U + xrem := xsize - xfer_init_pulses + } + }.elsewhen(io.xupdate && stride) { + xfer_bytes := xfer_stride_bytes + when(xsize < xfer_stride_pulses) { + len := xsize + xrem := 0.U + }.otherwise { + len := xfer_stride_pulses - 1.U + xrem := xsize - xfer_stride_pulses } }.elsewhen(io.xupdate && split) { - when(xrem < xmax) { + xfer_bytes := xfer_split_bytes + when(xrem < xfer_split_pulses) { len := xrem xrem := 0.U }.otherwise { - len := xmax - 1.U - xrem := xrem - xmax + len := xfer_split_pulses - 1.U + xrem := xrem - xfer_split_pulses } } @@ -298,25 +332,15 @@ class TensorDataCtrl(tensorType: String = "none", ycnt := ycnt + 1.U } - val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt - val elemBytes = - if (tensorType == "inp") { - (p(CoreKey).batch * p(CoreKey).blockIn * p(CoreKey).inpBits) / 8 - } else if (tensorType == "wgt") { - (p(CoreKey).blockOut * p(CoreKey).blockIn * p(CoreKey).wgtBits) / 8 - } else { - (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).accBits) / 8 - } - when(io.start) { - caddr := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) - baddr := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) + caddr := xfer_init_addr + baddr := xfer_init_addr }.elsewhen(io.yupdate) { when(split) { - caddr := caddr + xmax_bytes + caddr := xfer_split_addr }.elsewhen(stride) { - caddr := baddr + (dec.xstride << log2Ceil(elemBytes)) - baddr := baddr + (dec.xstride << log2Ceil(elemBytes)) + caddr := xfer_stride_addr + baddr := xfer_stride_addr } } diff --git a/vta/hardware/chisel/src/main/scala/shell/VCR.scala b/vta/hardware/chisel/src/main/scala/shell/VCR.scala index 3e74a256d537..9a80cd7799a3 100644 --- a/vta/hardware/chisel/src/main/scala/shell/VCR.scala +++ b/vta/hardware/chisel/src/main/scala/shell/VCR.scala @@ -34,6 +34,7 @@ case class VCRParams() { val nECnt = 1 val nVals = 1 val nPtrs = 6 + val nUCnt = 1 val regBits = 32 } @@ -53,6 +54,7 @@ class VCRMaster(implicit p: Parameters) extends VCRBase { val ecnt = Vec(vp.nECnt, Flipped(ValidIO(UInt(vp.regBits.W)))) val vals = Output(Vec(vp.nVals, UInt(vp.regBits.W))) val ptrs = Output(Vec(vp.nPtrs, UInt(mp.addrBits.W))) + val ucnt = Vec(vp.nUCnt, Flipped(ValidIO(UInt(vp.regBits.W)))) } /** VCRClient. @@ -68,6 +70,7 @@ class VCRClient(implicit p: Parameters) extends VCRBase { val ecnt = Vec(vp.nECnt, ValidIO(UInt(vp.regBits.W))) val vals = Input(Vec(vp.nVals, UInt(vp.regBits.W))) val ptrs = Input(Vec(vp.nPtrs, UInt(mp.addrBits.W))) + val ucnt = Vec(vp.nUCnt, ValidIO(UInt(vp.regBits.W))) } /** VTA Control Registers (VCR). @@ -100,7 +103,7 @@ class VCR(implicit p: Parameters) extends Module { // registers val nPtrs = if (mp.addrBits == 32) vp.nPtrs else 2 * vp.nPtrs - val nTotal = vp.nCtrl + vp.nECnt + vp.nVals + nPtrs + val nTotal = vp.nCtrl + vp.nECnt + vp.nVals + nPtrs + vp.nUCnt val reg = Seq.fill(nTotal)(RegInit(0.U(vp.regBits.W))) val addr = Seq.tabulate(nTotal)(_ * 4) @@ -108,6 +111,7 @@ class VCR(implicit p: Parameters) extends Module { val eo = vp.nCtrl val vo = eo + vp.nECnt val po = vo + vp.nVals + val uo = po + nPtrs switch(wstate) { is(sWriteAddress) { @@ -191,4 +195,12 @@ class VCR(implicit p: Parameters) extends Module { io.vcr.ptrs(i) := Cat(reg(po + 2 * i + 1), reg(po + 2 * i)) } } + + for (i <- 0 until vp.nUCnt) { + when(io.vcr.ucnt(i).valid) { + reg(uo + i) := io.vcr.ucnt(i).bits + }.elsewhen(io.host.w.fire() && addr(uo + i).U === waddr) { + reg(uo + i) := wdata + } + } } diff --git a/vta/hardware/dpi/tsim_device.cc b/vta/hardware/dpi/tsim_device.cc index d197fbd4385e..ffa192b283ea 100644 --- a/vta/hardware/dpi/tsim_device.cc +++ b/vta/hardware/dpi/tsim_device.cc @@ -22,8 +22,12 @@ #include #if VM_TRACE +#ifdef VM_TRACE_FST +#include +#else #include #endif +#endif #if VM_TRACE #define STRINGIZE(x) #x @@ -100,7 +104,11 @@ int VTADPISim() { #if VM_TRACE Verilated::traceEverOn(true); +#ifdef VM_TRACE_FST + VerilatedFstC* tfp = new VerilatedFstC; +#else VerilatedVcdC* tfp = new VerilatedVcdC; +#endif // VM_TRACE_FST top->trace(tfp, 99); tfp->open(STRINGIZE_VALUE_OF(TSIM_TRACE_FILE)); #endif @@ -142,7 +150,7 @@ int VTADPISim() { #endif trace_count++; if ((trace_count % 1000000) == 1) - fprintf(stderr, "[traced %dM cycles]\n", trace_count / 1000000); + fprintf(stderr, "[traced %luM cycles]\n", trace_count / 1000000); while (top->sim_wait) { top->clock = 0; std::this_thread::sleep_for(std::chrono::milliseconds(100)); diff --git a/vta/hardware/intel/Makefile b/vta/hardware/intel/Makefile index 775e8aef765f..b3638dc4c0ab 100644 --- a/vta/hardware/intel/Makefile +++ b/vta/hardware/intel/Makefile @@ -35,6 +35,8 @@ DEVICE = $(shell $(VTA_CONFIG) --get-fpga-dev) DEVICE_FAMILY = $(shell $(VTA_CONFIG) --get-fpga-family) # Project name PROJECT = de10_nano_top +# Frequency in MHz +FREQ_MHZ = $(shell $(VTA_CONFIG) --get-fpga-freq) #--------------------- # Compilation parameters @@ -55,7 +57,8 @@ endif IP_PATH = $(IP_BUILD_PATH)/VTA.DefaultDe10Config.v # Bitstream file path -BIT_PATH = $(HW_BUILD_PATH)/export/vta.rbf +BIT_PATH = $(HW_BUILD_PATH)/export/vta_$(FREQ_MHZ)MHz.rbf +CPF_OPT := -o bitstream_compression=on # System design file path QSYS_PATH = $(HW_BUILD_PATH)/soc_system.qsys @@ -77,13 +80,16 @@ $(QSYS_PATH): $(IP_PATH) cd $(HW_BUILD_PATH) && \ cp -r $(SCRIPT_DIR)/* $(HW_BUILD_PATH) && \ python3 $(SCRIPT_DIR)/set_attrs.py -i $(IP_PATH) -o $(HW_BUILD_PATH)/ip/vta/VTAShell.v $(DSP_FLAG) && \ - qsys-script --script=soc_system.tcl $(DEVICE) $(DEVICE_FAMILY) + qsys-script --script=soc_system.tcl $(DEVICE) $(DEVICE_FAMILY) $(FREQ_MHZ) $(BIT_PATH): $(QSYS_PATH) cd $(HW_BUILD_PATH) && \ quartus_sh -t $(SCRIPT_DIR)/compile_design.tcl $(DEVICE) $(PROJECT) && \ mkdir -p $(shell dirname $(BIT_PATH)) && \ - quartus_cpf -c $(HW_BUILD_PATH)/$(PROJECT).sof $(BIT_PATH) + quartus_cpf $(CPF_OPT) -c $(HW_BUILD_PATH)/$(PROJECT).sof $(BIT_PATH) clean: rm -rf $(BUILD_DIR) + +clean-qsys: + rm -rf $(QSYS_PATH) diff --git a/vta/hardware/intel/scripts/set_clocks.sdc b/vta/hardware/intel/scripts/set_clocks.sdc index d48aa354bb9f..b28e01d2549c 100644 --- a/vta/hardware/intel/scripts/set_clocks.sdc +++ b/vta/hardware/intel/scripts/set_clocks.sdc @@ -31,6 +31,9 @@ set_input_delay -clock altera_reserved_tck -clock_fall 3 [get_ports altera_reser set_input_delay -clock altera_reserved_tck -clock_fall 3 [get_ports altera_reserved_tms] set_output_delay -clock altera_reserved_tck 3 [get_ports altera_reserved_tdo] +# Turn off warning on unconstrained LED port. +set_false_path -to [get_ports {LED[0]}] + # Create Generated Clock derive_pll_clocks diff --git a/vta/hardware/intel/scripts/soc_system.tcl b/vta/hardware/intel/scripts/soc_system.tcl index d8bed4fa0994..eea815d47558 100644 --- a/vta/hardware/intel/scripts/soc_system.tcl +++ b/vta/hardware/intel/scripts/soc_system.tcl @@ -21,9 +21,9 @@ create_system soc_system set_project_property DEVICE [lindex $argv 0] set_project_property DEVICE_FAMILY [lindex $argv 1] +set FREQ_MHZ [lindex $argv 2] -# module properties -set_module_property NAME soc_system +set_project_property HIDE_FROM_IP_CATALOG {false} # Instances and instance parameters # (disabled instances are intentionally culled) @@ -33,7 +33,156 @@ set_instance_parameter_value clk_0 {clockFrequencyKnown} {1} set_instance_parameter_value clk_0 {resetSynchronousEdges} {NONE} add_instance hps_0 altera_hps 18.1 +set_instance_parameter_value hps_0 {ABSTRACT_REAL_COMPARE_TEST} {0} +set_instance_parameter_value hps_0 {ABS_RAM_MEM_INIT_FILENAME} {meminit} +set_instance_parameter_value hps_0 {ACV_PHY_CLK_ADD_FR_PHASE} {0.0} +set_instance_parameter_value hps_0 {AC_PACKAGE_DESKEW} {0} +set_instance_parameter_value hps_0 {AC_ROM_USER_ADD_0} {0_0000_0000_0000} +set_instance_parameter_value hps_0 {AC_ROM_USER_ADD_1} {0_0000_0000_1000} +set_instance_parameter_value hps_0 {ADDR_ORDER} {0} +set_instance_parameter_value hps_0 {ADD_EFFICIENCY_MONITOR} {0} +set_instance_parameter_value hps_0 {ADD_EXTERNAL_SEQ_DEBUG_NIOS} {0} +set_instance_parameter_value hps_0 {ADVANCED_CK_PHASES} {0} +set_instance_parameter_value hps_0 {ADVERTIZE_SEQUENCER_SW_BUILD_FILES} {0} +set_instance_parameter_value hps_0 {AFI_DEBUG_INFO_WIDTH} {32} +set_instance_parameter_value hps_0 {ALTMEMPHY_COMPATIBLE_MODE} {0} +set_instance_parameter_value hps_0 {AP_MODE} {0} +set_instance_parameter_value hps_0 {AP_MODE_EN} {0} +set_instance_parameter_value hps_0 {AUTO_PD_CYCLES} {0} +set_instance_parameter_value hps_0 {AUTO_POWERDN_EN} {0} +set_instance_parameter_value hps_0 {AVL_DATA_WIDTH_PORT} {32 32 32 32 32 32} +set_instance_parameter_value hps_0 {AVL_MAX_SIZE} {4} +set_instance_parameter_value hps_0 {BONDING_OUT_ENABLED} {0} +set_instance_parameter_value hps_0 {BOOTFROMFPGA_Enable} {0} +set_instance_parameter_value hps_0 {BSEL} {1} +set_instance_parameter_value hps_0 {BSEL_EN} {0} +set_instance_parameter_value hps_0 {BYTE_ENABLE} {1} +set_instance_parameter_value hps_0 {C2P_WRITE_CLOCK_ADD_PHASE} {0.0} +set_instance_parameter_value hps_0 {CALIBRATION_MODE} {Skip} +set_instance_parameter_value hps_0 {CALIB_REG_WIDTH} {8} +set_instance_parameter_value hps_0 {CAN0_Mode} {N/A} +set_instance_parameter_value hps_0 {CAN0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {CAN1_Mode} {N/A} +set_instance_parameter_value hps_0 {CAN1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {CFG_DATA_REORDERING_TYPE} {INTER_BANK} +set_instance_parameter_value hps_0 {CFG_REORDER_DATA} {1} +set_instance_parameter_value hps_0 {CFG_TCCD_NS} {2.5} +set_instance_parameter_value hps_0 {COMMAND_PHASE} {0.0} +set_instance_parameter_value hps_0 {CONTROLLER_LATENCY} {5} +set_instance_parameter_value hps_0 {CORE_DEBUG_CONNECTION} {EXPORT} +set_instance_parameter_value hps_0 {CPORT_TYPE_PORT} {Bidirectional Bidirectional Bidirectional Bidirectional Bidirectional Bidirectional} +set_instance_parameter_value hps_0 {CSEL} {0} +set_instance_parameter_value hps_0 {CSEL_EN} {0} +set_instance_parameter_value hps_0 {CTI_Enable} {0} +set_instance_parameter_value hps_0 {CTL_AUTOPCH_EN} {0} +set_instance_parameter_value hps_0 {CTL_CMD_QUEUE_DEPTH} {8} +set_instance_parameter_value hps_0 {CTL_CSR_CONNECTION} {INTERNAL_JTAG} +set_instance_parameter_value hps_0 {CTL_CSR_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_CSR_READ_ONLY} {1} +set_instance_parameter_value hps_0 {CTL_DEEP_POWERDN_EN} {0} +set_instance_parameter_value hps_0 {CTL_DYNAMIC_BANK_ALLOCATION} {0} +set_instance_parameter_value hps_0 {CTL_DYNAMIC_BANK_NUM} {4} +set_instance_parameter_value hps_0 {CTL_ECC_AUTO_CORRECTION_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_ECC_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_ENABLE_BURST_INTERRUPT} {0} +set_instance_parameter_value hps_0 {CTL_ENABLE_BURST_TERMINATE} {0} +set_instance_parameter_value hps_0 {CTL_HRB_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_LOOK_AHEAD_DEPTH} {4} +set_instance_parameter_value hps_0 {CTL_SELF_REFRESH_EN} {0} +set_instance_parameter_value hps_0 {CTL_USR_REFRESH_EN} {0} +set_instance_parameter_value hps_0 {CTL_ZQCAL_EN} {0} +set_instance_parameter_value hps_0 {CUT_NEW_FAMILY_TIMING} {1} +set_instance_parameter_value hps_0 {DAT_DATA_WIDTH} {32} +set_instance_parameter_value hps_0 {DEBUGAPB_Enable} {0} +set_instance_parameter_value hps_0 {DEBUG_MODE} {0} +set_instance_parameter_value hps_0 {DEVICE_DEPTH} {1} +set_instance_parameter_value hps_0 {DEVICE_FAMILY_PARAM} {} +set_instance_parameter_value hps_0 {DISABLE_CHILD_MESSAGING} {0} +set_instance_parameter_value hps_0 {DISCRETE_FLY_BY} {1} +set_instance_parameter_value hps_0 {DLL_SHARING_MODE} {None} +set_instance_parameter_value hps_0 {DMA_Enable} {No No No No No No No No} +set_instance_parameter_value hps_0 {DQS_DQSN_MODE} {DIFFERENTIAL} +set_instance_parameter_value hps_0 {DQ_INPUT_REG_USE_CLKN} {0} +set_instance_parameter_value hps_0 {DUPLICATE_AC} {0} +set_instance_parameter_value hps_0 {ED_EXPORT_SEQ_DEBUG} {0} +set_instance_parameter_value hps_0 {EMAC0_Mode} {N/A} +set_instance_parameter_value hps_0 {EMAC0_PTP} {0} +set_instance_parameter_value hps_0 {EMAC0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {EMAC1_Mode} {N/A} +set_instance_parameter_value hps_0 {EMAC1_PTP} {0} +set_instance_parameter_value hps_0 {EMAC1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {ENABLE_ABS_RAM_MEM_INIT} {0} +set_instance_parameter_value hps_0 {ENABLE_BONDING} {0} +set_instance_parameter_value hps_0 {ENABLE_BURST_MERGE} {0} +set_instance_parameter_value hps_0 {ENABLE_CTRL_AVALON_INTERFACE} {1} +set_instance_parameter_value hps_0 {ENABLE_DELAY_CHAIN_WRITE} {0} +set_instance_parameter_value hps_0 {ENABLE_EMIT_BFM_MASTER} {0} +set_instance_parameter_value hps_0 {ENABLE_EXPORT_SEQ_DEBUG_BRIDGE} {0} +set_instance_parameter_value hps_0 {ENABLE_EXTRA_REPORTING} {0} +set_instance_parameter_value hps_0 {ENABLE_ISS_PROBES} {0} +set_instance_parameter_value hps_0 {ENABLE_NON_DESTRUCTIVE_CALIB} {0} +set_instance_parameter_value hps_0 {ENABLE_NON_DES_CAL} {0} +set_instance_parameter_value hps_0 {ENABLE_NON_DES_CAL_TEST} {0} +set_instance_parameter_value hps_0 {ENABLE_SEQUENCER_MARGINING_ON_BY_DEFAULT} {0} +set_instance_parameter_value hps_0 {ENABLE_USER_ECC} {0} +set_instance_parameter_value hps_0 {EXPORT_AFI_HALF_CLK} {0} +set_instance_parameter_value hps_0 {EXTRA_SETTINGS} {} +set_instance_parameter_value hps_0 {F2SCLK_COLDRST_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_DBGRST_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_PERIPHCLK_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_SDRAMCLK_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_WARMRST_Enable} {0} +set_instance_parameter_value hps_0 {F2SDRAM_Type} {} +set_instance_parameter_value hps_0 {F2SDRAM_Width} {} +set_instance_parameter_value hps_0 {F2SINTERRUPT_Enable} {0} +set_instance_parameter_value hps_0 {F2S_Width} {2} +set_instance_parameter_value hps_0 {FIX_READ_LATENCY} {8} +set_instance_parameter_value hps_0 {FORCED_NON_LDC_ADDR_CMD_MEM_CK_INVERT} {0} +set_instance_parameter_value hps_0 {FORCED_NUM_WRITE_FR_CYCLE_SHIFTS} {0} +set_instance_parameter_value hps_0 {FORCE_DQS_TRACKING} {AUTO} +set_instance_parameter_value hps_0 {FORCE_MAX_LATENCY_COUNT_WIDTH} {0} +set_instance_parameter_value hps_0 {FORCE_SEQUENCER_TCL_DEBUG_MODE} {0} +set_instance_parameter_value hps_0 {FORCE_SHADOW_REGS} {AUTO} +set_instance_parameter_value hps_0 {FORCE_SYNTHESIS_LANGUAGE} {} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC0_GTX_CLK} {125} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC0_MD_CLK} {2.5} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC1_GTX_CLK} {125} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC1_MD_CLK} {2.5} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C0_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C1_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C2_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C3_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_QSPI_SCLK_OUT} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_SDIO_CCLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_SPIM0_SCLK_OUT} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_SPIM1_SCLK_OUT} {100} +set_instance_parameter_value hps_0 {GPIO_Enable} {No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No} +set_instance_parameter_value hps_0 {GP_Enable} {0} +set_instance_parameter_value hps_0 {HARD_EMIF} {1} +set_instance_parameter_value hps_0 {HCX_COMPAT_MODE} {0} +set_instance_parameter_value hps_0 {HHP_HPS} {1} +set_instance_parameter_value hps_0 {HHP_HPS_SIMULATION} {0} +set_instance_parameter_value hps_0 {HHP_HPS_VERIFICATION} {0} +set_instance_parameter_value hps_0 {HLGPI_Enable} {0} set_instance_parameter_value hps_0 {HPS_PROTOCOL} {DDR3} +set_instance_parameter_value hps_0 {I2C0_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {I2C1_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {I2C2_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C2_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {I2C3_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C3_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {INCLUDE_BOARD_DELAY_MODEL} {0} +set_instance_parameter_value hps_0 {INCLUDE_MULTIRANK_BOARD_DELAY_MODEL} {0} +set_instance_parameter_value hps_0 {IS_ES_DEVICE} {0} +set_instance_parameter_value hps_0 {LOANIO_Enable} {No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No} +set_instance_parameter_value hps_0 {LOCAL_ID_WIDTH} {8} +set_instance_parameter_value hps_0 {LRDIMM_EXTENDED_CONFIG} {0x000000000000000000} +set_instance_parameter_value hps_0 {LWH2F_Enable} {true} +set_instance_parameter_value hps_0 {MARGIN_VARIATION_TEST} {0} +set_instance_parameter_value hps_0 {MAX_PENDING_RD_CMD} {32} +set_instance_parameter_value hps_0 {MAX_PENDING_WR_CMD} {16} set_instance_parameter_value hps_0 {MEM_ASR} {Manual} set_instance_parameter_value hps_0 {MEM_ATCL} {Disabled} set_instance_parameter_value hps_0 {MEM_AUTO_LEVELING_MODE} {1} @@ -88,34 +237,486 @@ set_instance_parameter_value hps_0 {MEM_VENDOR} {Other} set_instance_parameter_value hps_0 {MEM_VERBOSE} {1} set_instance_parameter_value hps_0 {MEM_VOLTAGE} {1.5V DDR3} set_instance_parameter_value hps_0 {MEM_WTCL} {7} -set_instance_parameter_value hps_0 {F2SCLK_COLDRST_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_DBGRST_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_PERIPHCLK_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_SDRAMCLK_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_WARMRST_Enable} {0} -set_instance_parameter_value hps_0 {LWH2F_Enable} {true} -set_instance_parameter_value hps_0 {S2F_Width} {0} -set_instance_parameter_value hps_0 {F2SDRAM_Type} {} -set_instance_parameter_value hps_0 {F2SDRAM_Width} {} set_instance_parameter_value hps_0 {MPU_EVENTS_Enable} {0} +set_instance_parameter_value hps_0 {MRS_MIRROR_PING_PONG_ATSO} {0} +set_instance_parameter_value hps_0 {MULTICAST_EN} {0} +set_instance_parameter_value hps_0 {NAND_Mode} {N/A} +set_instance_parameter_value hps_0 {NAND_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {NEXTGEN} {1} +set_instance_parameter_value hps_0 {NIOS_ROM_DATA_WIDTH} {32} +set_instance_parameter_value hps_0 {NUM_DLL_SHARING_INTERFACES} {1} +set_instance_parameter_value hps_0 {NUM_EXTRA_REPORT_PATH} {10} +set_instance_parameter_value hps_0 {NUM_OCT_SHARING_INTERFACES} {1} +set_instance_parameter_value hps_0 {NUM_OF_PORTS} {1} +set_instance_parameter_value hps_0 {NUM_PLL_SHARING_INTERFACES} {1} +set_instance_parameter_value hps_0 {OCT_SHARING_MODE} {None} +set_instance_parameter_value hps_0 {P2C_READ_CLOCK_ADD_PHASE} {0.0} +set_instance_parameter_value hps_0 {PACKAGE_DESKEW} {0} +set_instance_parameter_value hps_0 {PARSE_FRIENDLY_DEVICE_FAMILY_PARAM} {} +set_instance_parameter_value hps_0 {PARSE_FRIENDLY_DEVICE_FAMILY_PARAM_VALID} {0} +set_instance_parameter_value hps_0 {PHY_CSR_CONNECTION} {INTERNAL_JTAG} +set_instance_parameter_value hps_0 {PHY_CSR_ENABLED} {0} +set_instance_parameter_value hps_0 {PHY_ONLY} {0} +set_instance_parameter_value hps_0 {PINGPONGPHY_EN} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_CLK_PARAM_VALID} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_DR_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_DR_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_HR_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_HR_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_LOCATION} {Top_Bottom} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_SHARING_MODE} {None} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {POWER_OF_TWO_BUS} {0} +set_instance_parameter_value hps_0 {PRIORITY_PORT} {1 1 1 1 1 1} +set_instance_parameter_value hps_0 {QSPI_Mode} {N/A} +set_instance_parameter_value hps_0 {QSPI_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {RATE} {Full} +set_instance_parameter_value hps_0 {RDIMM_CONFIG} {0000000000000000} +set_instance_parameter_value hps_0 {READ_DQ_DQS_CLOCK_SOURCE} {INVERTED_DQS_BUS} +set_instance_parameter_value hps_0 {READ_FIFO_SIZE} {8} +set_instance_parameter_value hps_0 {REFRESH_BURST_VALIDATION} {0} +set_instance_parameter_value hps_0 {REFRESH_INTERVAL} {15000} +set_instance_parameter_value hps_0 {REF_CLK_FREQ} {125.0} +set_instance_parameter_value hps_0 {REF_CLK_FREQ_MAX_PARAM} {0.0} +set_instance_parameter_value hps_0 {REF_CLK_FREQ_MIN_PARAM} {0.0} +set_instance_parameter_value hps_0 {REF_CLK_FREQ_PARAM_VALID} {0} +set_instance_parameter_value hps_0 {S2FCLK_COLDRST_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_PENDINGRST_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER0CLK_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER1CLK_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER1CLK_FREQ} {100.0} +set_instance_parameter_value hps_0 {S2FCLK_USER2CLK} {5} +set_instance_parameter_value hps_0 {S2FCLK_USER2CLK_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER2CLK_FREQ} {100.0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_CAN_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_CLOCKPERIPHERAL_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_CTI_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_DMA_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_EMAC_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_FPGAMANAGER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_GPIO_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_I2CEMAC_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_I2CPERIPHERAL_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_L4TIMER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_NAND_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_OSCTIMER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_QSPI_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_SDMMC_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_SPIMASTER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_SPISLAVE_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_UART_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_USB_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_WATCHDOG_Enable} {0} +set_instance_parameter_value hps_0 {S2F_Width} {0} +set_instance_parameter_value hps_0 {SDIO_Mode} {N/A} +set_instance_parameter_value hps_0 {SDIO_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SEQUENCER_TYPE} {NIOS} +set_instance_parameter_value hps_0 {SEQ_MODE} {0} +set_instance_parameter_value hps_0 {SKIP_MEM_INIT} {1} +set_instance_parameter_value hps_0 {SOPC_COMPAT_RESET} {0} +set_instance_parameter_value hps_0 {SPEED_GRADE} {7} +set_instance_parameter_value hps_0 {SPIM0_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIM0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SPIM1_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIM1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SPIS0_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIS0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SPIS1_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIS1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {STARVE_LIMIT} {10} +set_instance_parameter_value hps_0 {STM_Enable} {0} +set_instance_parameter_value hps_0 {TEST_Enable} {0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_EYE_REDUCTION_H} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_EYE_REDUCTION_SU} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_SKEW} {0.02} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_SLEW_RATE} {1.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_TO_CK_SKEW} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_CK_CKN_SLEW_RATE} {2.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DELTA_DQS_ARRIVAL_TIME} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DELTA_READ_DQS_ARRIVAL_TIME} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DERATE_METHOD} {AUTO} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQS_DQSN_SLEW_RATE} {2.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQ_EYE_REDUCTION} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQ_SLEW_RATE} {1.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQ_TO_DQS_SKEW} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_ISI_METHOD} {AUTO} +set_instance_parameter_value hps_0 {TIMING_BOARD_MAX_CK_DELAY} {0.6} +set_instance_parameter_value hps_0 {TIMING_BOARD_MAX_DQS_DELAY} {0.6} +set_instance_parameter_value hps_0 {TIMING_BOARD_READ_DQ_EYE_REDUCTION} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_BETWEEN_DIMMS} {0.05} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_BETWEEN_DQS} {0.02} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_CKDQS_DIMM_MAX} {0.01} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_CKDQS_DIMM_MIN} {-0.01} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_WITHIN_DQS} {0.02} +set_instance_parameter_value hps_0 {TIMING_BOARD_TDH} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_TDS} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_TIH} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_TIS} {0.0} +set_instance_parameter_value hps_0 {TIMING_TDH} {125} +set_instance_parameter_value hps_0 {TIMING_TDQSCK} {400} +set_instance_parameter_value hps_0 {TIMING_TDQSCKDL} {1200} +set_instance_parameter_value hps_0 {TIMING_TDQSCKDM} {900} +set_instance_parameter_value hps_0 {TIMING_TDQSCKDS} {450} +set_instance_parameter_value hps_0 {TIMING_TDQSH} {0.35} +set_instance_parameter_value hps_0 {TIMING_TDQSQ} {120} +set_instance_parameter_value hps_0 {TIMING_TDQSS} {0.25} +set_instance_parameter_value hps_0 {TIMING_TDS} {50} +set_instance_parameter_value hps_0 {TIMING_TDSH} {0.2} +set_instance_parameter_value hps_0 {TIMING_TDSS} {0.2} +set_instance_parameter_value hps_0 {TIMING_TIH} {250} +set_instance_parameter_value hps_0 {TIMING_TIS} {175} +set_instance_parameter_value hps_0 {TIMING_TQH} {0.38} +set_instance_parameter_value hps_0 {TIMING_TQHS} {300} +set_instance_parameter_value hps_0 {TIMING_TQSH} {0.38} +set_instance_parameter_value hps_0 {TPIUFPGA_Enable} {0} +set_instance_parameter_value hps_0 {TPIUFPGA_alt} {0} +set_instance_parameter_value hps_0 {TRACE_Mode} {N/A} +set_instance_parameter_value hps_0 {TRACE_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {TRACKING_ERROR_TEST} {0} +set_instance_parameter_value hps_0 {TRACKING_WATCH_TEST} {0} +set_instance_parameter_value hps_0 {TREFI} {35100} +set_instance_parameter_value hps_0 {TRFC} {350} +set_instance_parameter_value hps_0 {UART0_Mode} {N/A} +set_instance_parameter_value hps_0 {UART0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {UART1_Mode} {N/A} +set_instance_parameter_value hps_0 {UART1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {USB0_Mode} {N/A} +set_instance_parameter_value hps_0 {USB0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {USB1_Mode} {N/A} +set_instance_parameter_value hps_0 {USB1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {USER_DEBUG_LEVEL} {1} +set_instance_parameter_value hps_0 {USE_AXI_ADAPTOR} {0} +set_instance_parameter_value hps_0 {USE_FAKE_PHY} {0} +set_instance_parameter_value hps_0 {USE_MEM_CLK_FREQ} {0} +set_instance_parameter_value hps_0 {USE_MM_ADAPTOR} {1} +set_instance_parameter_value hps_0 {USE_SEQUENCER_BFM} {0} +set_instance_parameter_value hps_0 {WEIGHT_PORT} {0 0 0 0 0 0} +set_instance_parameter_value hps_0 {WRBUFFER_ADDR_WIDTH} {6} +set_instance_parameter_value hps_0 {can0_clk_div} {1} +set_instance_parameter_value hps_0 {can1_clk_div} {1} +set_instance_parameter_value hps_0 {configure_advanced_parameters} {0} +set_instance_parameter_value hps_0 {customize_device_pll_info} {0} +set_instance_parameter_value hps_0 {dbctrl_stayosc1} {1} +set_instance_parameter_value hps_0 {dbg_at_clk_div} {0} +set_instance_parameter_value hps_0 {dbg_clk_div} {1} +set_instance_parameter_value hps_0 {dbg_trace_clk_div} {0} +set_instance_parameter_value hps_0 {desired_can0_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_can1_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_cfg_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_emac0_clk_mhz} {250.0} +set_instance_parameter_value hps_0 {desired_emac1_clk_mhz} {250.0} +set_instance_parameter_value hps_0 {desired_gpio_db_clk_hz} {32000} +set_instance_parameter_value hps_0 {desired_l4_mp_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_l4_sp_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_mpu_clk_mhz} {800.0} +set_instance_parameter_value hps_0 {desired_nand_clk_mhz} {12.5} +set_instance_parameter_value hps_0 {desired_qspi_clk_mhz} {400.0} +set_instance_parameter_value hps_0 {desired_sdmmc_clk_mhz} {200.0} +set_instance_parameter_value hps_0 {desired_spi_m_clk_mhz} {200.0} +set_instance_parameter_value hps_0 {desired_usb_mp_clk_mhz} {200.0} +set_instance_parameter_value hps_0 {device_pll_info_manual} {{320000000 1600000000} {320000000 1000000000} {800000000 400000000 400000000}} +set_instance_parameter_value hps_0 {eosc1_clk_mhz} {25.0} +set_instance_parameter_value hps_0 {eosc2_clk_mhz} {25.0} +set_instance_parameter_value hps_0 {gpio_db_clk_div} {6249} +set_instance_parameter_value hps_0 {l3_mp_clk_div} {1} +set_instance_parameter_value hps_0 {l3_sp_clk_div} {1} +set_instance_parameter_value hps_0 {l4_mp_clk_div} {1} +set_instance_parameter_value hps_0 {l4_mp_clk_source} {1} +set_instance_parameter_value hps_0 {l4_sp_clk_div} {1} +set_instance_parameter_value hps_0 {l4_sp_clk_source} {1} +set_instance_parameter_value hps_0 {main_pll_c3} {3} +set_instance_parameter_value hps_0 {main_pll_c4} {3} +set_instance_parameter_value hps_0 {main_pll_c5} {15} +set_instance_parameter_value hps_0 {main_pll_m} {63} +set_instance_parameter_value hps_0 {main_pll_n} {0} +set_instance_parameter_value hps_0 {nand_clk_source} {2} +set_instance_parameter_value hps_0 {periph_pll_c0} {3} +set_instance_parameter_value hps_0 {periph_pll_c1} {3} +set_instance_parameter_value hps_0 {periph_pll_c2} {1} +set_instance_parameter_value hps_0 {periph_pll_c3} {19} +set_instance_parameter_value hps_0 {periph_pll_c4} {4} +set_instance_parameter_value hps_0 {periph_pll_c5} {9} +set_instance_parameter_value hps_0 {periph_pll_m} {79} +set_instance_parameter_value hps_0 {periph_pll_n} {1} +set_instance_parameter_value hps_0 {periph_pll_source} {0} +set_instance_parameter_value hps_0 {qspi_clk_source} {1} +set_instance_parameter_value hps_0 {sdmmc_clk_source} {2} +set_instance_parameter_value hps_0 {show_advanced_parameters} {0} +set_instance_parameter_value hps_0 {show_debug_info_as_warning_msg} {0} +set_instance_parameter_value hps_0 {show_warning_as_error_msg} {0} +set_instance_parameter_value hps_0 {spi_m_clk_div} {0} +set_instance_parameter_value hps_0 {usb_mp_clk_div} {0} +set_instance_parameter_value hps_0 {use_default_mpu_clk} {1} -add_instance vta_0 vta 1.0 - -# connections and connection parameters -add_connection clk_0.clk hps_0.f2h_axi_clock clock -add_connection clk_0.clk hps_0.h2f_lw_axi_clock clock -add_connection clk_0.clk vta_0.clock clock -add_connection clk_0.clk_reset vta_0.reset reset - -add_connection hps_0.h2f_lw_axi_master vta_0.s_axi_control avalon -set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control arbitrationPriority {1} -set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control baseAddress {0x00020000} -set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control defaultConnection {0} +add_instance pll_0 altera_pll 18.1 +set_instance_parameter_value pll_0 {debug_print_output} {0} +set_instance_parameter_value pll_0 {debug_use_rbc_taf_method} {0} +set_instance_parameter_value pll_0 {gui_active_clk} {0} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency0} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency1} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency10} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency11} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency12} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency13} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency14} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency15} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency16} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency17} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency2} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency3} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency4} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency5} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency6} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency7} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency8} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency9} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_phase_shift0} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift1} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift10} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift11} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift12} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift13} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift14} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift15} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift16} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift17} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift2} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift3} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift4} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift5} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift6} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift7} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift8} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift9} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter0} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter1} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter10} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter11} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter12} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter13} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter14} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter15} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter16} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter17} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter2} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter3} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter4} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter5} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter6} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter7} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter8} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter9} {0} +set_instance_parameter_value pll_0 {gui_cascade_outclk_index} {0} +set_instance_parameter_value pll_0 {gui_channel_spacing} {0.0} +set_instance_parameter_value pll_0 {gui_clk_bad} {0} +set_instance_parameter_value pll_0 {gui_device_speed_grade} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c0} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c1} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c10} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c11} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c12} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c13} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c14} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c15} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c16} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c17} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c2} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c3} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c4} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c5} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c6} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c7} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c8} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c9} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_n} {1} +set_instance_parameter_value pll_0 {gui_dps_cntr} {C0} +set_instance_parameter_value pll_0 {gui_dps_dir} {Positive} +set_instance_parameter_value pll_0 {gui_dps_num} {1} +set_instance_parameter_value pll_0 {gui_dsm_out_sel} {1st_order} +set_instance_parameter_value pll_0 {gui_duty_cycle0} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle1} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle10} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle11} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle12} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle13} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle14} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle15} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle16} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle17} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle2} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle3} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle4} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle5} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle6} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle7} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle8} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle9} {50} +set_instance_parameter_value pll_0 {gui_en_adv_params} {0} +set_instance_parameter_value pll_0 {gui_en_dps_ports} {0} +set_instance_parameter_value pll_0 {gui_en_phout_ports} {0} +set_instance_parameter_value pll_0 {gui_en_reconf} {0} +set_instance_parameter_value pll_0 {gui_enable_cascade_in} {0} +set_instance_parameter_value pll_0 {gui_enable_cascade_out} {0} +set_instance_parameter_value pll_0 {gui_enable_mif_dps} {0} +set_instance_parameter_value pll_0 {gui_feedback_clock} {Global Clock} +set_instance_parameter_value pll_0 {gui_frac_multiply_factor} {1.0} +set_instance_parameter_value pll_0 {gui_fractional_cout} {32} +set_instance_parameter_value pll_0 {gui_mif_generate} {0} +set_instance_parameter_value pll_0 {gui_multiply_factor} {1} +set_instance_parameter_value pll_0 {gui_number_of_clocks} {1} +set_instance_parameter_value pll_0 {gui_operation_mode} {normal} +set_instance_parameter_value pll_0 {gui_output_clock_frequency0} $FREQ_MHZ +set_instance_parameter_value pll_0 {gui_output_clock_frequency1} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency10} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency11} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency12} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency13} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency14} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency15} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency16} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency17} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency2} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency3} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency4} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency5} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency6} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency7} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency8} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency9} {100.0} +set_instance_parameter_value pll_0 {gui_phase_shift0} {0} +set_instance_parameter_value pll_0 {gui_phase_shift1} {0} +set_instance_parameter_value pll_0 {gui_phase_shift10} {0} +set_instance_parameter_value pll_0 {gui_phase_shift11} {0} +set_instance_parameter_value pll_0 {gui_phase_shift12} {0} +set_instance_parameter_value pll_0 {gui_phase_shift13} {0} +set_instance_parameter_value pll_0 {gui_phase_shift14} {0} +set_instance_parameter_value pll_0 {gui_phase_shift15} {0} +set_instance_parameter_value pll_0 {gui_phase_shift16} {0} +set_instance_parameter_value pll_0 {gui_phase_shift17} {0} +set_instance_parameter_value pll_0 {gui_phase_shift2} {0} +set_instance_parameter_value pll_0 {gui_phase_shift3} {0} +set_instance_parameter_value pll_0 {gui_phase_shift4} {0} +set_instance_parameter_value pll_0 {gui_phase_shift5} {0} +set_instance_parameter_value pll_0 {gui_phase_shift6} {0} +set_instance_parameter_value pll_0 {gui_phase_shift7} {0} +set_instance_parameter_value pll_0 {gui_phase_shift8} {0} +set_instance_parameter_value pll_0 {gui_phase_shift9} {0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg0} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg1} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg10} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg11} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg12} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg13} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg14} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg15} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg16} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg17} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg2} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg3} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg4} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg5} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg6} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg7} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg8} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg9} {0.0} +set_instance_parameter_value pll_0 {gui_phout_division} {1} +set_instance_parameter_value pll_0 {gui_pll_auto_reset} {Off} +set_instance_parameter_value pll_0 {gui_pll_bandwidth_preset} {Auto} +set_instance_parameter_value pll_0 {gui_pll_cascading_mode} {Create an adjpllin signal to connect with an upstream PLL} +set_instance_parameter_value pll_0 {gui_pll_mode} {Integer-N PLL} +set_instance_parameter_value pll_0 {gui_ps_units0} {ps} +set_instance_parameter_value pll_0 {gui_ps_units1} {ps} +set_instance_parameter_value pll_0 {gui_ps_units10} {ps} +set_instance_parameter_value pll_0 {gui_ps_units11} {ps} +set_instance_parameter_value pll_0 {gui_ps_units12} {ps} +set_instance_parameter_value pll_0 {gui_ps_units13} {ps} +set_instance_parameter_value pll_0 {gui_ps_units14} {ps} +set_instance_parameter_value pll_0 {gui_ps_units15} {ps} +set_instance_parameter_value pll_0 {gui_ps_units16} {ps} +set_instance_parameter_value pll_0 {gui_ps_units17} {ps} +set_instance_parameter_value pll_0 {gui_ps_units2} {ps} +set_instance_parameter_value pll_0 {gui_ps_units3} {ps} +set_instance_parameter_value pll_0 {gui_ps_units4} {ps} +set_instance_parameter_value pll_0 {gui_ps_units5} {ps} +set_instance_parameter_value pll_0 {gui_ps_units6} {ps} +set_instance_parameter_value pll_0 {gui_ps_units7} {ps} +set_instance_parameter_value pll_0 {gui_ps_units8} {ps} +set_instance_parameter_value pll_0 {gui_ps_units9} {ps} +set_instance_parameter_value pll_0 {gui_refclk1_frequency} {100.0} +set_instance_parameter_value pll_0 {gui_refclk_switch} {0} +set_instance_parameter_value pll_0 {gui_reference_clock_frequency} {50.0} +set_instance_parameter_value pll_0 {gui_switchover_delay} {0} +set_instance_parameter_value pll_0 {gui_switchover_mode} {Automatic Switchover} +set_instance_parameter_value pll_0 {gui_use_locked} {0} -add_connection vta_0.m_axi_gmem hps_0.f2h_axi_slave avalon -set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave arbitrationPriority {1} -set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave baseAddress {0x0000} -set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave defaultConnection {0} +add_instance vta_0 vta 1.0 # exported interfaces add_interface clk clock sink @@ -127,8 +728,33 @@ set_interface_property memory EXPORT_OF hps_0.memory add_interface reset reset sink set_interface_property reset EXPORT_OF clk_0.clk_in_reset +# connections and connection parameters +add_connection clk_0.clk pll_0.refclk + +add_connection clk_0.clk_reset pll_0.reset + +add_connection clk_0.clk_reset vta_0.reset + +add_connection hps_0.h2f_lw_axi_master vta_0.s_axi_control +set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control arbitrationPriority {1} +set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control baseAddress {0x00020000} +set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control defaultConnection {0} + +add_connection pll_0.outclk0 hps_0.f2h_axi_clock + +add_connection pll_0.outclk0 hps_0.h2f_lw_axi_clock + +add_connection pll_0.outclk0 vta_0.clock + +add_connection vta_0.m_axi_gmem hps_0.f2h_axi_slave +set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave arbitrationPriority {1} +set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave baseAddress {0x0000} +set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave defaultConnection {0} + # interconnect requirements set_interconnect_requirement {$system} {qsys_mm.clockCrossingAdapter} {HANDSHAKE} +set_interconnect_requirement {$system} {qsys_mm.enableEccProtection} {FALSE} +set_interconnect_requirement {$system} {qsys_mm.insertDefaultSlave} {FALSE} set_interconnect_requirement {$system} {qsys_mm.maxAdditionalLatency} {1} save_system soc_system.qsys diff --git a/vta/python/vta/exec/rpc_server.py b/vta/python/vta/exec/rpc_server.py index 558632306111..81ecc52268eb 100644 --- a/vta/python/vta/exec/rpc_server.py +++ b/vta/python/vta/exec/rpc_server.py @@ -67,11 +67,15 @@ def ext_dev_callback(): @tvm.register_func("tvm.contrib.vta.init", override=True) def program_fpga(file_name): # pylint: disable=import-outside-toplevel - from pynq import xlnk - # Reset xilinx driver - xlnk.Xlnk().xlnk_reset() - path = tvm.get_global_func("tvm.rpc.server.workpath")(file_name) env = get_env() + if env.TARGET == "pynq": + from pynq import xlnk + # Reset xilinx driver + xlnk.Xlnk().xlnk_reset() + elif env.TARGET == "de10nano": + # Load the de10nano program function. + load_vta_dll() + path = tvm.get_global_func("tvm.rpc.server.workpath")(file_name) program_bitstream.bitstream_program(env.TARGET, path) logging.info("Program FPGA with %s ", file_name) @@ -90,9 +94,11 @@ def reconfig_runtime(cfg_json): cfg_json : str JSON string used for configurations. """ + env = get_env() if runtime_dll: + if env.TARGET == "de10nano": + print("Please reconfigure the runtime AFTER programming a bitstream.") raise RuntimeError("Can only reconfig in the beginning of session...") - env = get_env() cfg = json.loads(cfg_json) cfg["TARGET"] = env.TARGET pkg = PkgConfig(cfg, proj_root) diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index 0516e839484a..c15f6df58740 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -77,6 +77,12 @@ def __init__(self, cfg, proj_root): if self.TARGET in ["pynq", "ultra96"]: # add pynq drivers for any board that uses pynq driver stack (see pynq.io) self.lib_source += glob.glob("%s/vta/src/pynq/*.cc" % (proj_root)) + elif self.TARGET in ["de10nano"]: + self.lib_source += glob.glob("%s/vta/src/de10nano/*.cc" % (proj_root)) + self.include_path += [ + "-I%s/vta/src/de10nano" % proj_root, + "-I%s/3rdparty" % proj_root + ] # Linker flags if self.TARGET in ["pynq", "ultra96"]: diff --git a/vta/python/vta/program_bitstream.py b/vta/python/vta/program_bitstream.py index 7d2c4e38db3e..62cb5f21d02a 100644 --- a/vta/python/vta/program_bitstream.py +++ b/vta/python/vta/program_bitstream.py @@ -19,7 +19,7 @@ import argparse def main(): - """Main funciton""" + """Main function""" parser = argparse.ArgumentParser() parser.add_argument("target", type=str, default="", help="target") @@ -27,7 +27,7 @@ def main(): help="bitstream path") args = parser.parse_args() - if (args.target != 'pynq' and args.target != 'sim'): + if args.target not in ('pynq', 'ultra96', 'de10nano', 'sim', 'tsim'): raise RuntimeError("Unknown target {}".format(args.target)) curr_path = os.path.dirname( @@ -48,9 +48,17 @@ def pynq_bitstream_program(bitstream_path): bitstream = Bitstream(bitstream_path) bitstream.download() +def de10nano_bitstream_program(bitstream_path): + # pylint: disable=import-outside-toplevel + from tvm import get_global_func + program = get_global_func("vta.de10nano.program") + program(bitstream_path) + def bitstream_program(target, bitstream): if target in ['pynq', 'ultra96']: pynq_bitstream_program(bitstream) + elif target in ['de10nano']: + de10nano_bitstream_program(bitstream) elif target in ['sim', 'tsim']: # In simulation, bit stream programming is a no-op return diff --git a/vta/python/vta/rpc_client.py b/vta/python/vta/rpc_client.py index f689ef46ba1c..097ea8e4a5cc 100644 --- a/vta/python/vta/rpc_client.py +++ b/vta/python/vta/rpc_client.py @@ -49,6 +49,9 @@ def program_fpga(remote, bitstream=None): else: bitstream = get_bitstream_path() if not os.path.isfile(bitstream): + env = get_env() + if env.TARGET == 'de10nano': + return download_bitstream() fprogram = remote.get_function("tvm.contrib.vta.init") diff --git a/vta/python/vta/testing/util.py b/vta/python/vta/testing/util.py index 009038b2d5ed..afbf00ddac8c 100644 --- a/vta/python/vta/testing/util.py +++ b/vta/python/vta/testing/util.py @@ -59,8 +59,8 @@ def run(run_func): tracker_port = os.environ.get("TVM_TRACKER_PORT", None) # Otherwise, we can set the variables below to directly # obtain a remote from a test device - pynq_host = os.environ.get("VTA_PYNQ_RPC_HOST", None) - pynq_port = os.environ.get("VTA_PYNQ_RPC_PORT", None) + pynq_host = os.environ.get("VTA_RPC_HOST", None) + pynq_port = os.environ.get("VTA_RPC_PORT", None) # Run device from fleet node if env variables are defined if tracker_host and tracker_port: remote = autotvm.measure.request_remote(env.TARGET, @@ -75,7 +75,7 @@ def run(run_func): run_func(env, remote) else: raise RuntimeError( - "Please set the VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT environment variables") + "Please set the VTA_RPC_HOST and VTA_RPC_PORT environment variables") else: raise RuntimeError("Unknown target %s" % env.TARGET) diff --git a/vta/src/de10nano/cma_api.h b/vta/src/de10nano/cma_api.h index f20939d52b63..5e1653f172c7 100644 --- a/vta/src/de10nano/cma_api.h +++ b/vta/src/de10nano/cma_api.h @@ -27,6 +27,8 @@ extern "C" { #endif +#include + /** * \brief Initialize CMA api (basically perform open() syscall). * diff --git a/vta/src/de10nano/de10nano_driver.cc b/vta/src/de10nano/de10nano_driver.cc index 97607f536051..94d000114dfc 100644 --- a/vta/src/de10nano/de10nano_driver.cc +++ b/vta/src/de10nano/de10nano_driver.cc @@ -21,11 +21,14 @@ */ #include "de10nano_driver.h" +#include "de10nano_mgr.h" #include #include +#include #include #include +#include #include "cma_api.h" void* VTAMemAlloc(size_t size, int cached) { @@ -72,12 +75,16 @@ void *VTAMapRegister(uint32_t addr) { uint32_t virt_offset = addr - virt_base; // Open file and mmap uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC); - return mmap(NULL, + // Note that if virt_offset != 0, i.e. addr is not page aligned + // munmap will not be unmapping all memory. + void *vmem = mmap(NULL, (VTA_IP_REG_MAP_RANGE + virt_offset), PROT_READ|PROT_WRITE, MAP_SHARED, mmap_file, virt_base); + close(mmap_file); + return vmem; } void VTAUnmapRegister(void *vta) { @@ -149,6 +156,24 @@ int VTADeviceRun(VTADeviceHandle handle, insn_phy_addr, insn_count, wait_cycles); } -void VTAProgram(const char* bitstream) { - CHECK(false) << "VTAProgram not implemented for de10nano"; +void VTAProgram(const char *rbf) { + De10NanoMgr mgr; + CHECK(mgr.mapped()) << "de10nano: mapping of /dev/mem failed"; + CHECK(mgr.program_rbf(rbf)) << "Programming of the de10nano failed.\n" + "This is usually due to the use of an RBF file that is incompatible " + "with the MSEL switches on the DE10-Nano board. The recommended RBF " + "format is FastPassiveParallel32 with compression enabled, " + "corresponding to MSEL 01010. An RBF file in FPP32 mode can be " + "generated in a Quartus session with the command " + "'quartus_cpf -o bitstream_compression=on -c .sof .rbf'."; } + +using tvm::runtime::TVMRetValue; +using tvm::runtime::TVMArgs; + +TVM_REGISTER_GLOBAL("vta.de10nano.program") +.set_body([](TVMArgs args, TVMRetValue* rv) { + std::string bitstream = args[0]; + VTAProgram(bitstream.c_str()); +}); + diff --git a/vta/src/de10nano/de10nano_mgr.h b/vta/src/de10nano/de10nano_mgr.h new file mode 100644 index 000000000000..a054640b4191 --- /dev/null +++ b/vta/src/de10nano/de10nano_mgr.h @@ -0,0 +1,551 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * \file de10nano_mgr.h + * \brief DE10-Nano fpga manager. + */ + +#ifndef VTA_DE10NANO_DE10NANO_MGR_H_ +#define VTA_DE10NANO_DE10NANO_MGR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +// Register definition and address map taken from cv_5v4.pdf, +// Cyclone V Hard Processor System Technical Reference Manual, +// chapter 5: FPGA Manager. +struct De10NanoMgr { + // Reg32 is a static base class interface and implementation + // of a generic 32 bit register that avoids the use of a virtual + // class and ugly bit shift manipulations. + struct Reg32 { + explicit Reg32(uint32_t offset, uint32_t reset = 0) : + m_offset(offset), + m_reset(reset) + {} + void map(uint8_t *base) { + m_addr = reinterpret_cast(base + m_offset); + m_reg = reinterpret_cast(reinterpret_cast(this)+sizeof(Reg32)); + } + uint32_t read() { + *m_reg = *m_addr; + return *m_reg; + } + void write() { *m_addr = *m_reg; } + void write(uint32_t value) { *m_addr = *m_reg = value; } + void clear() { *m_reg = 0; } + void reset() { *m_reg = m_reset; } + void print(const char *name, bool addr = false) { + if (addr) + printf("DE10-Nano-Mgr: %16s: 0x%08x addr: %p\n", name, read(), m_addr); + else + printf("DE10-Nano-Mgr: %16s: 0x%08x\n", name, read()); + } + + uint32_t m_offset, m_reset, *m_reg; + volatile uint32_t *m_addr; + + private: // Do not use this class on its own. + Reg32(const Reg32 &rhs); + }; + + // Register definitions. All registers are of 32 bit size. + // Add one structure for each register, making sure that all + // bit fields come first and pack exactly into 32 bits. + + struct data : public Reg32 { + data() : Reg32(0x0, 0x0) {} + uint32_t value; + } data; + + struct stat : public Reg32 { + stat() : Reg32(0x0, 0x45) {} + enum mode_values { + FPGA_POWER_OFF = 0x0, + FPGA_RESET_PHASE = 0x1, + FPGA_CONFIG_PHASE = 0x2, + FPGA_INIT_PHASE = 0x3, + FPGA_USER_MODE = 0x4, + FPGA_ZOMBIE_MODE = 0x5 + }; + + enum msel_values { + FPP16_AESN_ZIPN = 0x0, + FPP32_AESO_ZIPY = 0xA + }; + + const char * mode_str() { + const char *str = "UNKNOWN"; + switch (mode) { + case FPGA_POWER_OFF : str = "POWER_OFF" ; break; + case FPGA_RESET_PHASE : str = "RESET_PHASE" ; break; + case FPGA_CONFIG_PHASE : str = "CONFIG_PHASE" ; break; + case FPGA_INIT_PHASE : str = "INIT_PHASE" ; break; + case FPGA_USER_MODE : str = "USER_MODE" ; break; + case FPGA_ZOMBIE_MODE : str = "UNDEF_MODE" ; break; + } + return str; + } + + bool msel_is_invalid() { + return msel & 0x10 || (msel & 0x3) == 0x3; + } + + void print(bool addr = false, bool fields = true) { + Reg32::print("stat", addr); + if (fields) { + printf("DE10-Nano-Mgr: %16s: %x\n", "msel", msel); + printf("DE10-Nano-Mgr: %16s: %s\n", "mode", mode_str()); + } + } + + uint32_t mode : 3; // 2:0 RW + uint32_t msel : 5; // 7:3 RO + uint32_t rsvd : 24; // 31:8 + } stat; + + struct ctrl : public Reg32 { + ctrl() : Reg32(0x4, 0x200) {} + + uint32_t en : 1; // 0 RW + uint32_t nce : 1; // 1 RW + uint32_t nconfigpull : 1; // 2 RW + uint32_t nstatuspull : 1; // 3 RW + uint32_t confdonepull : 1; // 4 RW + uint32_t prreq : 1; // 5 RW + uint32_t cdratio : 2; // 7:6 RW + uint32_t axicfgen : 1; // 8 RW + uint32_t cfgwdth : 1; // 9 RW + uint32_t rsvd : 22; // 31:10 + + void print(bool addr = false, bool fields = true) { + Reg32::print("ctrl", addr); + if (fields) { + printf("DE10-Nano-Mgr: %16s: %x\n", "en" , en); + printf("DE10-Nano-Mgr: %16s: %x\n", "nce" , nce); + printf("DE10-Nano-Mgr: %16s: %x\n", "nconfigpull" , nconfigpull); + printf("DE10-Nano-Mgr: %16s: %x\n", "nstatuspull" , nstatuspull); + printf("DE10-Nano-Mgr: %16s: %x\n", "confdonepull", confdonepull); + printf("DE10-Nano-Mgr: %16s: %x\n", "prreq" , prreq); + printf("DE10-Nano-Mgr: %16s: %x\n", "cdratio" , cdratio); + printf("DE10-Nano-Mgr: %16s: %x\n", "axicfgen" , axicfgen); + printf("DE10-Nano-Mgr: %16s: %x\n", "cfgwdth" , cfgwdth); + } + } + } ctrl; + + struct dclkcnt : public Reg32 { + dclkcnt() : Reg32(0x8, 0x0) {} + void print() { return Reg32::print("dclkcnt"); } + + uint32_t cnt; // RW + } dclkcnt; + + struct dclkstat : public Reg32 { + dclkstat() : Reg32(0xC, 0x0) {} + void print() { return Reg32::print("dclkstat"); } + + uint32_t dcntdone : 1; // RW + uint32_t rsvd : 31; + } dclkstat; + + struct gpio_inten : public Reg32 { + gpio_inten() : Reg32(0x830, 0x0) {} + void print() { return Reg32::print("gpio_inten"); } + + uint32_t value : 32; // RW + } gpio_inten; + + struct gpio_porta_eoi : public Reg32 { + gpio_porta_eoi() : Reg32(0x84C, 0x0) {} + void print() { return Reg32::print("gpio_porta_eoi"); } + + uint32_t ns : 1; // 0 WO + uint32_t cd : 1; // 1 WO + uint32_t id : 1; // 2 WO + uint32_t crc : 1; // 3 WO + uint32_t ccd : 1; // 4 WO + uint32_t prr : 1; // 5 WO + uint32_t pre : 1; // 6 WO + uint32_t prd : 1; // 7 WO + uint32_t ncp : 1; // 8 WO + uint32_t nsp : 1; // 9 WO + uint32_t cdp : 1; // 10 WO + uint32_t fpo : 1; // 11 WO + uint32_t rsvd : 20; // 31:12 + } gpio_porta_eoi; + + struct gpio_ext_porta : public Reg32 { + gpio_ext_porta() : Reg32(0x850, 0x0) {} + void print(bool addr = false, bool fields = true) { + Reg32::print("gpio_ext_porta", addr); + if (fields) { + printf("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS" , ns); + printf("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE" , cd); + printf("DE10-Nano-Mgr: %16s: %x\n", "INIT_DONE" , id); + printf("DE10-Nano-Mgr: %16s: %x\n", "CRC_ERROR" , crc); + printf("DE10-Nano-Mgr: %16s: %x\n", "CVP_CONF_DONE" , ccd); + printf("DE10-Nano-Mgr: %16s: %x\n", "PR_READY" , prr); + printf("DE10-Nano-Mgr: %16s: %x\n", "PR_ERROR" , pre); + printf("DE10-Nano-Mgr: %16s: %x\n", "PR_DONE" , prd); + printf("DE10-Nano-Mgr: %16s: %x\n", "nCONFIG_PIN" , ncp); + printf("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS_PIN" , nsp); + printf("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE_PIN" , cdp); + printf("DE10-Nano-Mgr: %16s: %x\n", "FPGA_POWER_ON" , fpo); + } + } + + uint32_t ns : 1; // 0 RO + uint32_t cd : 1; // 1 RO + uint32_t id : 1; // 2 RO + uint32_t crc : 1; // 3 RO + uint32_t ccd : 1; // 4 RO + uint32_t prr : 1; // 5 RO + uint32_t pre : 1; // 6 RO + uint32_t prd : 1; // 7 RO + uint32_t ncp : 1; // 8 RO + uint32_t nsp : 1; // 9 RO + uint32_t cdp : 1; // 10 RO + uint32_t fpo : 1; // 11 RO + uint32_t rsvd : 20; // 31:12 + } gpio_ext_porta; + + struct monitor { + // This is used to both break a polling loop if the specified number + // of milliseconds have passed and to relax the polling yielding the + // cpu every millisecond. + monitor() : msg(""), m_status(true), m_ticks(0), m_counter(0) { + m_epoc_us = time_stamp(); + } + + void init(const char *message, uint32_t ticks_ms = 1000) { + msg = message; + m_ticks = m_counter = ticks_ms; + m_init_us = time_stamp(); + printf("DE10-Nano-Mgr: %-32s : ", msg); + } + + bool status() { return m_status; } + + void reset() { m_counter = m_ticks; } + + void done(bool status = true) { + uint32_t elapsed = time_stamp(m_init_us); + const char *rs = "FAIL"; + if (!m_counter) { + status = false; + rs = "TOUT"; + } else if (status) { + rs = "PASS"; + } + printf("\rDE10-Nano-Mgr: %-32s : %s in %u us\n", msg, rs, elapsed); + if (!status) { + m_status = false; + throw 1; + } + } + + ~monitor() { + uint32_t elapsed = time_stamp(m_epoc_us); + const char *rs = m_status ? "SUCCESS" : "FAILURE"; + printf("DE10-Nano-Mgr: EXIT %s in %u us\n", rs, elapsed); + } + + uint64_t time_stamp(uint64_t base_us = 0) { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec * 1000000L + tv.tv_usec - base_us; + } + + bool operator() (bool cond) { + if (m_counter) { + if (!cond) + return false; + m_counter--; + usleep(1000); + } + return m_counter; + } + const char *msg; + + private: + bool m_status; + uint32_t m_ticks, m_counter; + uint64_t m_init_us, m_epoc_us; + }; + + enum BaseAddr { + REGS_BASE_ADDR = 0xFF706000U, + DATA_BASE_ADDR = 0xFFB90000U + }; + + De10NanoMgr() { + m_page_size = sysconf(_SC_PAGE_SIZE); + #ifdef MOCK_DEVMEM + m_regs_base = reinterpret_cast(malloc(m_page_size)); + m_data_base = reinterpret_cast(malloc(m_page_size)); + #else + m_regs_base = map_mem(REGS_BASE_ADDR); + m_data_base = map_mem(DATA_BASE_ADDR); + #endif // MOCK_DEVMEM + data.map(m_data_base); + stat.map(m_regs_base); + ctrl.map(m_regs_base); + dclkcnt.map(m_regs_base); + dclkstat.map(m_regs_base); + gpio_inten.map(m_regs_base); + gpio_porta_eoi.map(m_regs_base); + gpio_ext_porta.map(m_regs_base); + } + + ~De10NanoMgr() { + #ifdef MOCK_DEVMEM + free(m_regs_base); + free(m_data_base); + #else + unmap_mem(m_regs_base); + unmap_mem(m_data_base); + #endif // MOCK_DEVMEM + } + + bool mapped() const { return m_regs_base && m_data_base; } + + void print(bool addr = false) { + stat.print(addr, false); + ctrl.print(addr, false); + gpio_inten.print(); + gpio_porta_eoi.print(); + gpio_ext_porta.print(addr, false); + } + + private: + uint32_t msel_to_cfgwdth(uint32_t msel) { + return(msel & 0b1000) >> 3; + } + + uint32_t msel_to_cdratio(uint32_t msel) { + uint32_t cfgwdth = msel_to_cfgwdth(msel); + uint32_t cdratio = msel & 0b11; + if (cfgwdth && cdratio) + cdratio++; + return cdratio; + } + + uint8_t * map_mem(off_t addr, size_t pages = 1) { + if (m_page_size <= 0) { return NULL; } + + int mem_fd = open("/dev/mem", O_SYNC | O_RDWR); + if (mem_fd < 0) { return NULL; } + + void *vbase = mmap(NULL, pages*m_page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, mem_fd, addr & ~(pages*m_page_size-1)); + if (vbase == MAP_FAILED) { return NULL; } + + close(mem_fd); + return reinterpret_cast(vbase); + } + + void unmap_mem(void *base, size_t pages = 1) { + if (base) + munmap(base, pages * m_page_size); + } + + uint8_t *m_regs_base, *m_data_base; + size_t m_page_size; + + public: + // Configuration sequence documented at page A-34. + bool program_rbf(const char *rbf) { + monitor mon; + int rbf_fd; + uint32_t count = 0; + printf("DE10-Nano-Mgr: Programming FPGA from image %s\n", rbf); + + try { + mon.init("Open RBF file"); + rbf_fd = open(rbf, (O_RDONLY | O_SYNC)); + mon.done(rbf_fd >= 0); + + // 1. Set the cdratio and cfgwdth bits of the ctrl register in the + // FPGA manager registers (fpgamgrregs) to match the characteristics + // of the configuration image. Tese settings are dependent on the + // MSEL pins input. + // 2. Set the nce bit of the ctrl register to 0 to enable HPS + // configuration. + // 3. Set the en bit of the ctrl register to 1 to give the FPGA + // manager control of the configuration input signals. + // 4. Set the nconfigpull bit of the ctrl register to 1 to pull + // down the nCONFIG pin and put the FPGA portion of the device + // into the reset phase. + mon.init("Enable FPGA configuration"); + stat.read(); + if (stat.msel_is_invalid()) { + printf("DE10-Nano-Mgr: msel %x is not a valid HPS configuration\n", stat.msel); + } else { + ctrl.read(); + ctrl.cdratio = msel_to_cdratio(stat.msel); + ctrl.cfgwdth = msel_to_cfgwdth(stat.msel); + ctrl.nce = 0; + ctrl.en = 1; + ctrl.nconfigpull = 1; + ctrl.write(); + } + mon.done(!stat.msel_is_invalid()); + + // 5. Poll the mode bit of the stat register and wait until + // the FPGA enters the reset phase. + mon.init("Wait for FPGA to reset"); + do { + stat.read(); + } while (mon(stat.mode != stat::FPGA_RESET_PHASE)); + mon.done(); + stat.print(); + + // 6. Set the nconfigpull bit of the ctrl register to 0 to + // release the FPGA from reset. + mon.init("Release FPGA from reset"); + ctrl.nconfigpull = 0; + ctrl.write(); + mon.done(); + + // 7. Read the mode bit of the stat register and wait until + // the FPGA enters the configuration phase. + mon.init("Wait for configuration phase"); + do { + stat.read(); + } while (mon(stat.mode != stat::FPGA_CONFIG_PHASE)); + mon.done(); + stat.print(); + + // 8. Clear the interrupt bit of nSTATUS (ns) in the gpio interrupt + // register (fpgamgrregs.mon.gpio_porta_eoi). + mon.init("Clear nSTATUS interrupt bit"); + gpio_porta_eoi.clear(); + gpio_porta_eoi.ns = 1; + gpio_porta_eoi.write(); + mon.done(); + + // 9. Set the axicfgen bit of the ctrl register to 1 to enable + // sending configuration data to the FPGA. + mon.init("Enable configuration on AXI"); + ctrl.axicfgen = 1; + ctrl.write(); + mon.done(); + + // 10. Write the configuration image to the configuration data register + // (data) in the FPGA manager module configuration data registers + // (fpgamgrdata). You can also choose to use a DMA controller to + // transfer the configuration image from a peripheral device to the + // FPGA manager. + ssize_t bytes; + mon.init("Write configuration Image"); + do { + data.value = 0; + bytes = read(rbf_fd, &data.value, sizeof(data.value)); + if (bytes > 0) { + if (!(count % (1<<16))) { + printf("\rDE10-Nano-Mgr: %-32s : %u B", mon.msg, count); + fflush(stdout); + } + data.write(); + count += bytes; + } + } while (bytes == 4); + mon.done(count > 0); + printf("DE10-Nano-Mgr: %-32s : written %u B\n", mon.msg, count); + close(rbf_fd); + + // 11. Use the fpgamgrregs.mon.gpio_ext_porta registers to monitor + // the CONF_DONE (cd) and nSTATUS (ns) bits. + mon.init("Wait for CONF_DONE"); + do { + gpio_ext_porta.read(); + } while (mon(gpio_ext_porta.cd != 1 && gpio_ext_porta.ns != 1)); + mon.done(); + stat.print(); + + // 12. Set the axicfgen bit of the ctrl register to 0 to disable + // configuration data on AXI slave. + mon.init("Disable configuration on AXI"); + ctrl.axicfgen = 0; + ctrl.write(); + mon.done(); + + // 13. Clear any previous DONE status by writing a 1 to the dcntdone + // bit of the DCLK status register (dclkstat) to clear the completed + // status flag. + mon.init("Clear DCLK DONE status"); + dclkstat.dcntdone = 1; + dclkstat.write(); + mon.done(); + + // 14. Send the DCLKs required by the FPGA to enter the + // initialization phase. + mon.init("Send DCLK for init phase"); + dclkcnt.cnt = 4; + dclkcnt.write(); + mon.done(); + + // 15. Poll the dcntdone bit of the DCLK status register (dclkstat) + // until it changes to 1, which indicates that all the DCLKs have + // been sent. + mon.init("Wait for DCLK"); + do { + dclkstat.read(); + } while (mon(dclkstat.dcntdone != 1)); + mon.done(); + + // 16. Write a 1 to the dcntdone bit of the DCLK status register to + // clear the completed status flag. + mon.init("Clear DCLK status flag"); + dclkstat.dcntdone = 1; + dclkstat.write(); + mon.done(); + + // 17. Read the mode bit of the stat register to wait for the FPGA + // to enter user mode. + mon.init("Wait for FPGA user mode"); + do { + stat.read(); + } while (mon(stat.mode != stat::FPGA_USER_MODE)); + mon.done(); + + // 18. Set the en bit of the ctrl register to 0 to allow the + // external pins to drive the configuration input signals. + mon.init("Release control"); + ctrl.en = 0; + ctrl.write(); + mon.done(); + } + catch(int i) { + close(rbf_fd); + printf("DE10-Nano-Mgr: %-32s : written %u B\n", mon.msg, count); + print(); + } + + return mon.status(); + } +}; + +#endif // VTA_DE10NANO_DE10NANO_MGR_H_ diff --git a/vta/tests/python/de10nano/test_program_rpc.py b/vta/tests/python/de10nano/test_program_rpc.py new file mode 100644 index 000000000000..6dbd4571d636 --- /dev/null +++ b/vta/tests/python/de10nano/test_program_rpc.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import sys, os +import tvm +from tvm import rpc +from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime + +host = os.environ.get("VTA_RPC_HOST", "de10nano") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) + +def program_rpc_bitstream(path=None): + """Program the FPGA on the RPC server + + Parameters + ---------- + path : path to bitstream (optional) + """ + assert tvm.runtime.enabled("rpc") + remote = rpc.connect(host, port) + program_fpga(remote, path) + +def reconfig_rpc_runtime(): + """Reconfig the RPC server runtime + """ + assert tvm.runtime.enabled("rpc") + remote = rpc.connect(host, port) + reconfig_runtime(remote) + +bitstream = sys.argv[1] if len(sys.argv) == 2 else None +program_rpc_bitstream(bitstream) +reconfig_rpc_runtime() diff --git a/vta/tests/python/pynq/test_program_rpc.py b/vta/tests/python/pynq/test_program_rpc.py index fb0873586d44..ad6e43e59e02 100644 --- a/vta/tests/python/pynq/test_program_rpc.py +++ b/vta/tests/python/pynq/test_program_rpc.py @@ -20,8 +20,8 @@ from tvm import rpc from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime -host = os.environ.get("VTA_PYNQ_RPC_HOST", "pynq") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "pynq") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) def program_rpc_bitstream(path=None): """Program the FPGA on the RPC server diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py index 15cba4373056..d8c517c6e02d 100644 --- a/vta/tutorials/frontend/deploy_classification.py +++ b/vta/tutorials/frontend/deploy_classification.py @@ -109,8 +109,8 @@ # Otherwise if you have a device you want to program directly from # the host, make sure you've set the variables below to the IP of # your board. - device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") - device_port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091") + device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") + device_port = os.environ.get("VTA_RPC_PORT", "9091") if not tracker_host or not tracker_port: remote = rpc.connect(device_host, int(device_port)) else: diff --git a/vta/tutorials/frontend/deploy_detection.py b/vta/tutorials/frontend/deploy_detection.py index 0d1dbddcb1ec..1559d138213f 100644 --- a/vta/tutorials/frontend/deploy_detection.py +++ b/vta/tutorials/frontend/deploy_detection.py @@ -149,8 +149,8 @@ # Otherwise if you have a device you want to program directly from # the host, make sure you've set the variables below to the IP of # your board. - device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") - device_port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091") + device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") + device_port = os.environ.get("VTA_RPC_PORT", "9091") if not tracker_host or not tracker_port: remote = rpc.connect(device_host, int(device_port)) else: diff --git a/vta/tutorials/matrix_multiply.py b/vta/tutorials/matrix_multiply.py index 444762684bb9..efbebf7ff688 100644 --- a/vta/tutorials/matrix_multiply.py +++ b/vta/tutorials/matrix_multiply.py @@ -47,12 +47,12 @@ env = vta.get_env() # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. -if env.TARGET == "pynq": +if env.TARGET == "pynq" or env.TARGET == "de10nano": # Make sure that TVM was compiled with RPC=1 assert tvm.runtime.enabled("rpc") diff --git a/vta/tutorials/optimize/convolution_opt.py b/vta/tutorials/optimize/convolution_opt.py index 2616fb28c89a..9d05d4b922cb 100644 --- a/vta/tutorials/optimize/convolution_opt.py +++ b/vta/tutorials/optimize/convolution_opt.py @@ -51,8 +51,8 @@ env = vta.get_env() # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. diff --git a/vta/tutorials/optimize/matrix_multiply_opt.py b/vta/tutorials/optimize/matrix_multiply_opt.py index 597a7e8ecf7f..e038ac4b4e2d 100644 --- a/vta/tutorials/optimize/matrix_multiply_opt.py +++ b/vta/tutorials/optimize/matrix_multiply_opt.py @@ -50,8 +50,8 @@ env = vta.get_env() # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. diff --git a/vta/tutorials/vta_get_started.py b/vta/tutorials/vta_get_started.py index 3dd1f8c8753a..ab416874b71b 100644 --- a/vta/tutorials/vta_get_started.py +++ b/vta/tutorials/vta_get_started.py @@ -71,12 +71,12 @@ from vta.testing import simulator # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. -if env.TARGET == "pynq": +if env.TARGET == "pynq" or env.TARGET == "de10nano": # Make sure that TVM was compiled with RPC=1 assert tvm.runtime.enabled("rpc")