From f2c8c18bb063958e52dcbc3946f4d5f4d0ba28b7 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Mon, 2 Mar 2020 14:33:23 -0800 Subject: [PATCH 01/19] [VTA][de10nano] Enable user defined target frequency. Issue: The VTA target frequency on the DE10-Nano is hardcoded to 50MHz unnecessarily limiting performance. Solution: Add a PLL to the FPGA sub-system along with support for the selection of a user specified frequency at build time. The board successfully builds and runs at 100MHz. * Added a PLL in the soc_system.tcl platform designer generator script. * Modified the Makefile to automatically set the target frequency from that specified in the pkg_config.py file. * Modified the Makefile to generate a bitstream with an RBF format that enables programming of the FPGA directly from the on-board processor. Specifically, the RBF is generated in FastParallel32 mode with compression, which corresponds to the default MSEL switch setting on the board, i.e. 01010. * Added a false path override to file set_clocks.sdc to turn off unconstrained path warnings on the VTA pulse LED. --- vta/hardware/intel/Makefile | 12 +- vta/hardware/intel/scripts/set_clocks.sdc | 3 + vta/hardware/intel/scripts/soc_system.tcl | 680 +++++++++++++++++++++- 3 files changed, 665 insertions(+), 30 deletions(-) diff --git a/vta/hardware/intel/Makefile b/vta/hardware/intel/Makefile index 775e8aef765f..b3638dc4c0ab 100644 --- a/vta/hardware/intel/Makefile +++ b/vta/hardware/intel/Makefile @@ -35,6 +35,8 @@ DEVICE = $(shell $(VTA_CONFIG) --get-fpga-dev) DEVICE_FAMILY = $(shell $(VTA_CONFIG) --get-fpga-family) # Project name PROJECT = de10_nano_top +# Frequency in MHz +FREQ_MHZ = $(shell $(VTA_CONFIG) --get-fpga-freq) #--------------------- # Compilation parameters @@ -55,7 +57,8 @@ endif IP_PATH = $(IP_BUILD_PATH)/VTA.DefaultDe10Config.v # Bitstream file path -BIT_PATH = $(HW_BUILD_PATH)/export/vta.rbf +BIT_PATH = $(HW_BUILD_PATH)/export/vta_$(FREQ_MHZ)MHz.rbf +CPF_OPT := -o bitstream_compression=on # System design file path QSYS_PATH = $(HW_BUILD_PATH)/soc_system.qsys @@ -77,13 +80,16 @@ $(QSYS_PATH): $(IP_PATH) cd $(HW_BUILD_PATH) && \ cp -r $(SCRIPT_DIR)/* $(HW_BUILD_PATH) && \ python3 $(SCRIPT_DIR)/set_attrs.py -i $(IP_PATH) -o $(HW_BUILD_PATH)/ip/vta/VTAShell.v $(DSP_FLAG) && \ - qsys-script --script=soc_system.tcl $(DEVICE) $(DEVICE_FAMILY) + qsys-script --script=soc_system.tcl $(DEVICE) $(DEVICE_FAMILY) $(FREQ_MHZ) $(BIT_PATH): $(QSYS_PATH) cd $(HW_BUILD_PATH) && \ quartus_sh -t $(SCRIPT_DIR)/compile_design.tcl $(DEVICE) $(PROJECT) && \ mkdir -p $(shell dirname $(BIT_PATH)) && \ - quartus_cpf -c $(HW_BUILD_PATH)/$(PROJECT).sof $(BIT_PATH) + quartus_cpf $(CPF_OPT) -c $(HW_BUILD_PATH)/$(PROJECT).sof $(BIT_PATH) clean: rm -rf $(BUILD_DIR) + +clean-qsys: + rm -rf $(QSYS_PATH) diff --git a/vta/hardware/intel/scripts/set_clocks.sdc b/vta/hardware/intel/scripts/set_clocks.sdc index d48aa354bb9f..b28e01d2549c 100644 --- a/vta/hardware/intel/scripts/set_clocks.sdc +++ b/vta/hardware/intel/scripts/set_clocks.sdc @@ -31,6 +31,9 @@ set_input_delay -clock altera_reserved_tck -clock_fall 3 [get_ports altera_reser set_input_delay -clock altera_reserved_tck -clock_fall 3 [get_ports altera_reserved_tms] set_output_delay -clock altera_reserved_tck 3 [get_ports altera_reserved_tdo] +# Turn off warning on unconstrained LED port. +set_false_path -to [get_ports {LED[0]}] + # Create Generated Clock derive_pll_clocks diff --git a/vta/hardware/intel/scripts/soc_system.tcl b/vta/hardware/intel/scripts/soc_system.tcl index d8bed4fa0994..eea815d47558 100644 --- a/vta/hardware/intel/scripts/soc_system.tcl +++ b/vta/hardware/intel/scripts/soc_system.tcl @@ -21,9 +21,9 @@ create_system soc_system set_project_property DEVICE [lindex $argv 0] set_project_property DEVICE_FAMILY [lindex $argv 1] +set FREQ_MHZ [lindex $argv 2] -# module properties -set_module_property NAME soc_system +set_project_property HIDE_FROM_IP_CATALOG {false} # Instances and instance parameters # (disabled instances are intentionally culled) @@ -33,7 +33,156 @@ set_instance_parameter_value clk_0 {clockFrequencyKnown} {1} set_instance_parameter_value clk_0 {resetSynchronousEdges} {NONE} add_instance hps_0 altera_hps 18.1 +set_instance_parameter_value hps_0 {ABSTRACT_REAL_COMPARE_TEST} {0} +set_instance_parameter_value hps_0 {ABS_RAM_MEM_INIT_FILENAME} {meminit} +set_instance_parameter_value hps_0 {ACV_PHY_CLK_ADD_FR_PHASE} {0.0} +set_instance_parameter_value hps_0 {AC_PACKAGE_DESKEW} {0} +set_instance_parameter_value hps_0 {AC_ROM_USER_ADD_0} {0_0000_0000_0000} +set_instance_parameter_value hps_0 {AC_ROM_USER_ADD_1} {0_0000_0000_1000} +set_instance_parameter_value hps_0 {ADDR_ORDER} {0} +set_instance_parameter_value hps_0 {ADD_EFFICIENCY_MONITOR} {0} +set_instance_parameter_value hps_0 {ADD_EXTERNAL_SEQ_DEBUG_NIOS} {0} +set_instance_parameter_value hps_0 {ADVANCED_CK_PHASES} {0} +set_instance_parameter_value hps_0 {ADVERTIZE_SEQUENCER_SW_BUILD_FILES} {0} +set_instance_parameter_value hps_0 {AFI_DEBUG_INFO_WIDTH} {32} +set_instance_parameter_value hps_0 {ALTMEMPHY_COMPATIBLE_MODE} {0} +set_instance_parameter_value hps_0 {AP_MODE} {0} +set_instance_parameter_value hps_0 {AP_MODE_EN} {0} +set_instance_parameter_value hps_0 {AUTO_PD_CYCLES} {0} +set_instance_parameter_value hps_0 {AUTO_POWERDN_EN} {0} +set_instance_parameter_value hps_0 {AVL_DATA_WIDTH_PORT} {32 32 32 32 32 32} +set_instance_parameter_value hps_0 {AVL_MAX_SIZE} {4} +set_instance_parameter_value hps_0 {BONDING_OUT_ENABLED} {0} +set_instance_parameter_value hps_0 {BOOTFROMFPGA_Enable} {0} +set_instance_parameter_value hps_0 {BSEL} {1} +set_instance_parameter_value hps_0 {BSEL_EN} {0} +set_instance_parameter_value hps_0 {BYTE_ENABLE} {1} +set_instance_parameter_value hps_0 {C2P_WRITE_CLOCK_ADD_PHASE} {0.0} +set_instance_parameter_value hps_0 {CALIBRATION_MODE} {Skip} +set_instance_parameter_value hps_0 {CALIB_REG_WIDTH} {8} +set_instance_parameter_value hps_0 {CAN0_Mode} {N/A} +set_instance_parameter_value hps_0 {CAN0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {CAN1_Mode} {N/A} +set_instance_parameter_value hps_0 {CAN1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {CFG_DATA_REORDERING_TYPE} {INTER_BANK} +set_instance_parameter_value hps_0 {CFG_REORDER_DATA} {1} +set_instance_parameter_value hps_0 {CFG_TCCD_NS} {2.5} +set_instance_parameter_value hps_0 {COMMAND_PHASE} {0.0} +set_instance_parameter_value hps_0 {CONTROLLER_LATENCY} {5} +set_instance_parameter_value hps_0 {CORE_DEBUG_CONNECTION} {EXPORT} +set_instance_parameter_value hps_0 {CPORT_TYPE_PORT} {Bidirectional Bidirectional Bidirectional Bidirectional Bidirectional Bidirectional} +set_instance_parameter_value hps_0 {CSEL} {0} +set_instance_parameter_value hps_0 {CSEL_EN} {0} +set_instance_parameter_value hps_0 {CTI_Enable} {0} +set_instance_parameter_value hps_0 {CTL_AUTOPCH_EN} {0} +set_instance_parameter_value hps_0 {CTL_CMD_QUEUE_DEPTH} {8} +set_instance_parameter_value hps_0 {CTL_CSR_CONNECTION} {INTERNAL_JTAG} +set_instance_parameter_value hps_0 {CTL_CSR_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_CSR_READ_ONLY} {1} +set_instance_parameter_value hps_0 {CTL_DEEP_POWERDN_EN} {0} +set_instance_parameter_value hps_0 {CTL_DYNAMIC_BANK_ALLOCATION} {0} +set_instance_parameter_value hps_0 {CTL_DYNAMIC_BANK_NUM} {4} +set_instance_parameter_value hps_0 {CTL_ECC_AUTO_CORRECTION_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_ECC_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_ENABLE_BURST_INTERRUPT} {0} +set_instance_parameter_value hps_0 {CTL_ENABLE_BURST_TERMINATE} {0} +set_instance_parameter_value hps_0 {CTL_HRB_ENABLED} {0} +set_instance_parameter_value hps_0 {CTL_LOOK_AHEAD_DEPTH} {4} +set_instance_parameter_value hps_0 {CTL_SELF_REFRESH_EN} {0} +set_instance_parameter_value hps_0 {CTL_USR_REFRESH_EN} {0} +set_instance_parameter_value hps_0 {CTL_ZQCAL_EN} {0} +set_instance_parameter_value hps_0 {CUT_NEW_FAMILY_TIMING} {1} +set_instance_parameter_value hps_0 {DAT_DATA_WIDTH} {32} +set_instance_parameter_value hps_0 {DEBUGAPB_Enable} {0} +set_instance_parameter_value hps_0 {DEBUG_MODE} {0} +set_instance_parameter_value hps_0 {DEVICE_DEPTH} {1} +set_instance_parameter_value hps_0 {DEVICE_FAMILY_PARAM} {} +set_instance_parameter_value hps_0 {DISABLE_CHILD_MESSAGING} {0} +set_instance_parameter_value hps_0 {DISCRETE_FLY_BY} {1} +set_instance_parameter_value hps_0 {DLL_SHARING_MODE} {None} +set_instance_parameter_value hps_0 {DMA_Enable} {No No No No No No No No} +set_instance_parameter_value hps_0 {DQS_DQSN_MODE} {DIFFERENTIAL} +set_instance_parameter_value hps_0 {DQ_INPUT_REG_USE_CLKN} {0} +set_instance_parameter_value hps_0 {DUPLICATE_AC} {0} +set_instance_parameter_value hps_0 {ED_EXPORT_SEQ_DEBUG} {0} +set_instance_parameter_value hps_0 {EMAC0_Mode} {N/A} +set_instance_parameter_value hps_0 {EMAC0_PTP} {0} +set_instance_parameter_value hps_0 {EMAC0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {EMAC1_Mode} {N/A} +set_instance_parameter_value hps_0 {EMAC1_PTP} {0} +set_instance_parameter_value hps_0 {EMAC1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {ENABLE_ABS_RAM_MEM_INIT} {0} +set_instance_parameter_value hps_0 {ENABLE_BONDING} {0} +set_instance_parameter_value hps_0 {ENABLE_BURST_MERGE} {0} +set_instance_parameter_value hps_0 {ENABLE_CTRL_AVALON_INTERFACE} {1} +set_instance_parameter_value hps_0 {ENABLE_DELAY_CHAIN_WRITE} {0} +set_instance_parameter_value hps_0 {ENABLE_EMIT_BFM_MASTER} {0} +set_instance_parameter_value hps_0 {ENABLE_EXPORT_SEQ_DEBUG_BRIDGE} {0} +set_instance_parameter_value hps_0 {ENABLE_EXTRA_REPORTING} {0} +set_instance_parameter_value hps_0 {ENABLE_ISS_PROBES} {0} +set_instance_parameter_value hps_0 {ENABLE_NON_DESTRUCTIVE_CALIB} {0} +set_instance_parameter_value hps_0 {ENABLE_NON_DES_CAL} {0} +set_instance_parameter_value hps_0 {ENABLE_NON_DES_CAL_TEST} {0} +set_instance_parameter_value hps_0 {ENABLE_SEQUENCER_MARGINING_ON_BY_DEFAULT} {0} +set_instance_parameter_value hps_0 {ENABLE_USER_ECC} {0} +set_instance_parameter_value hps_0 {EXPORT_AFI_HALF_CLK} {0} +set_instance_parameter_value hps_0 {EXTRA_SETTINGS} {} +set_instance_parameter_value hps_0 {F2SCLK_COLDRST_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_DBGRST_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_PERIPHCLK_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_SDRAMCLK_Enable} {0} +set_instance_parameter_value hps_0 {F2SCLK_WARMRST_Enable} {0} +set_instance_parameter_value hps_0 {F2SDRAM_Type} {} +set_instance_parameter_value hps_0 {F2SDRAM_Width} {} +set_instance_parameter_value hps_0 {F2SINTERRUPT_Enable} {0} +set_instance_parameter_value hps_0 {F2S_Width} {2} +set_instance_parameter_value hps_0 {FIX_READ_LATENCY} {8} +set_instance_parameter_value hps_0 {FORCED_NON_LDC_ADDR_CMD_MEM_CK_INVERT} {0} +set_instance_parameter_value hps_0 {FORCED_NUM_WRITE_FR_CYCLE_SHIFTS} {0} +set_instance_parameter_value hps_0 {FORCE_DQS_TRACKING} {AUTO} +set_instance_parameter_value hps_0 {FORCE_MAX_LATENCY_COUNT_WIDTH} {0} +set_instance_parameter_value hps_0 {FORCE_SEQUENCER_TCL_DEBUG_MODE} {0} +set_instance_parameter_value hps_0 {FORCE_SHADOW_REGS} {AUTO} +set_instance_parameter_value hps_0 {FORCE_SYNTHESIS_LANGUAGE} {} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC0_GTX_CLK} {125} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC0_MD_CLK} {2.5} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC1_GTX_CLK} {125} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_EMAC1_MD_CLK} {2.5} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C0_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C1_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C2_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_I2C3_CLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_QSPI_SCLK_OUT} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_SDIO_CCLK} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_SPIM0_SCLK_OUT} {100} +set_instance_parameter_value hps_0 {FPGA_PERIPHERAL_OUTPUT_CLOCK_FREQ_SPIM1_SCLK_OUT} {100} +set_instance_parameter_value hps_0 {GPIO_Enable} {No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No} +set_instance_parameter_value hps_0 {GP_Enable} {0} +set_instance_parameter_value hps_0 {HARD_EMIF} {1} +set_instance_parameter_value hps_0 {HCX_COMPAT_MODE} {0} +set_instance_parameter_value hps_0 {HHP_HPS} {1} +set_instance_parameter_value hps_0 {HHP_HPS_SIMULATION} {0} +set_instance_parameter_value hps_0 {HHP_HPS_VERIFICATION} {0} +set_instance_parameter_value hps_0 {HLGPI_Enable} {0} set_instance_parameter_value hps_0 {HPS_PROTOCOL} {DDR3} +set_instance_parameter_value hps_0 {I2C0_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {I2C1_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {I2C2_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C2_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {I2C3_Mode} {N/A} +set_instance_parameter_value hps_0 {I2C3_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {INCLUDE_BOARD_DELAY_MODEL} {0} +set_instance_parameter_value hps_0 {INCLUDE_MULTIRANK_BOARD_DELAY_MODEL} {0} +set_instance_parameter_value hps_0 {IS_ES_DEVICE} {0} +set_instance_parameter_value hps_0 {LOANIO_Enable} {No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No No} +set_instance_parameter_value hps_0 {LOCAL_ID_WIDTH} {8} +set_instance_parameter_value hps_0 {LRDIMM_EXTENDED_CONFIG} {0x000000000000000000} +set_instance_parameter_value hps_0 {LWH2F_Enable} {true} +set_instance_parameter_value hps_0 {MARGIN_VARIATION_TEST} {0} +set_instance_parameter_value hps_0 {MAX_PENDING_RD_CMD} {32} +set_instance_parameter_value hps_0 {MAX_PENDING_WR_CMD} {16} set_instance_parameter_value hps_0 {MEM_ASR} {Manual} set_instance_parameter_value hps_0 {MEM_ATCL} {Disabled} set_instance_parameter_value hps_0 {MEM_AUTO_LEVELING_MODE} {1} @@ -88,34 +237,486 @@ set_instance_parameter_value hps_0 {MEM_VENDOR} {Other} set_instance_parameter_value hps_0 {MEM_VERBOSE} {1} set_instance_parameter_value hps_0 {MEM_VOLTAGE} {1.5V DDR3} set_instance_parameter_value hps_0 {MEM_WTCL} {7} -set_instance_parameter_value hps_0 {F2SCLK_COLDRST_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_DBGRST_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_PERIPHCLK_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_SDRAMCLK_Enable} {0} -set_instance_parameter_value hps_0 {F2SCLK_WARMRST_Enable} {0} -set_instance_parameter_value hps_0 {LWH2F_Enable} {true} -set_instance_parameter_value hps_0 {S2F_Width} {0} -set_instance_parameter_value hps_0 {F2SDRAM_Type} {} -set_instance_parameter_value hps_0 {F2SDRAM_Width} {} set_instance_parameter_value hps_0 {MPU_EVENTS_Enable} {0} +set_instance_parameter_value hps_0 {MRS_MIRROR_PING_PONG_ATSO} {0} +set_instance_parameter_value hps_0 {MULTICAST_EN} {0} +set_instance_parameter_value hps_0 {NAND_Mode} {N/A} +set_instance_parameter_value hps_0 {NAND_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {NEXTGEN} {1} +set_instance_parameter_value hps_0 {NIOS_ROM_DATA_WIDTH} {32} +set_instance_parameter_value hps_0 {NUM_DLL_SHARING_INTERFACES} {1} +set_instance_parameter_value hps_0 {NUM_EXTRA_REPORT_PATH} {10} +set_instance_parameter_value hps_0 {NUM_OCT_SHARING_INTERFACES} {1} +set_instance_parameter_value hps_0 {NUM_OF_PORTS} {1} +set_instance_parameter_value hps_0 {NUM_PLL_SHARING_INTERFACES} {1} +set_instance_parameter_value hps_0 {OCT_SHARING_MODE} {None} +set_instance_parameter_value hps_0 {P2C_READ_CLOCK_ADD_PHASE} {0.0} +set_instance_parameter_value hps_0 {PACKAGE_DESKEW} {0} +set_instance_parameter_value hps_0 {PARSE_FRIENDLY_DEVICE_FAMILY_PARAM} {} +set_instance_parameter_value hps_0 {PARSE_FRIENDLY_DEVICE_FAMILY_PARAM_VALID} {0} +set_instance_parameter_value hps_0 {PHY_CSR_CONNECTION} {INTERNAL_JTAG} +set_instance_parameter_value hps_0 {PHY_CSR_ENABLED} {0} +set_instance_parameter_value hps_0 {PHY_ONLY} {0} +set_instance_parameter_value hps_0 {PINGPONGPHY_EN} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_ADDR_CMD_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_HALF_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_AFI_PHY_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_C2P_WRITE_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_CLK_PARAM_VALID} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_CONFIG_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_DR_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_DR_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_DR_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_HR_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_HR_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_HR_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_LOCATION} {Top_Bottom} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_MEM_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_NIOS_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_P2C_READ_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_SHARING_MODE} {None} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_DIV_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_FREQ_PARAM} {0.0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_FREQ_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_MULT_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_PHASE_PS_PARAM} {0} +set_instance_parameter_value hps_0 {PLL_WRITE_CLK_PHASE_PS_SIM_STR_PARAM} {} +set_instance_parameter_value hps_0 {POWER_OF_TWO_BUS} {0} +set_instance_parameter_value hps_0 {PRIORITY_PORT} {1 1 1 1 1 1} +set_instance_parameter_value hps_0 {QSPI_Mode} {N/A} +set_instance_parameter_value hps_0 {QSPI_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {RATE} {Full} +set_instance_parameter_value hps_0 {RDIMM_CONFIG} {0000000000000000} +set_instance_parameter_value hps_0 {READ_DQ_DQS_CLOCK_SOURCE} {INVERTED_DQS_BUS} +set_instance_parameter_value hps_0 {READ_FIFO_SIZE} {8} +set_instance_parameter_value hps_0 {REFRESH_BURST_VALIDATION} {0} +set_instance_parameter_value hps_0 {REFRESH_INTERVAL} {15000} +set_instance_parameter_value hps_0 {REF_CLK_FREQ} {125.0} +set_instance_parameter_value hps_0 {REF_CLK_FREQ_MAX_PARAM} {0.0} +set_instance_parameter_value hps_0 {REF_CLK_FREQ_MIN_PARAM} {0.0} +set_instance_parameter_value hps_0 {REF_CLK_FREQ_PARAM_VALID} {0} +set_instance_parameter_value hps_0 {S2FCLK_COLDRST_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_PENDINGRST_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER0CLK_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER1CLK_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER1CLK_FREQ} {100.0} +set_instance_parameter_value hps_0 {S2FCLK_USER2CLK} {5} +set_instance_parameter_value hps_0 {S2FCLK_USER2CLK_Enable} {0} +set_instance_parameter_value hps_0 {S2FCLK_USER2CLK_FREQ} {100.0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_CAN_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_CLOCKPERIPHERAL_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_CTI_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_DMA_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_EMAC_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_FPGAMANAGER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_GPIO_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_I2CEMAC_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_I2CPERIPHERAL_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_L4TIMER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_NAND_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_OSCTIMER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_QSPI_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_SDMMC_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_SPIMASTER_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_SPISLAVE_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_UART_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_USB_Enable} {0} +set_instance_parameter_value hps_0 {S2FINTERRUPT_WATCHDOG_Enable} {0} +set_instance_parameter_value hps_0 {S2F_Width} {0} +set_instance_parameter_value hps_0 {SDIO_Mode} {N/A} +set_instance_parameter_value hps_0 {SDIO_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SEQUENCER_TYPE} {NIOS} +set_instance_parameter_value hps_0 {SEQ_MODE} {0} +set_instance_parameter_value hps_0 {SKIP_MEM_INIT} {1} +set_instance_parameter_value hps_0 {SOPC_COMPAT_RESET} {0} +set_instance_parameter_value hps_0 {SPEED_GRADE} {7} +set_instance_parameter_value hps_0 {SPIM0_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIM0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SPIM1_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIM1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SPIS0_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIS0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {SPIS1_Mode} {N/A} +set_instance_parameter_value hps_0 {SPIS1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {STARVE_LIMIT} {10} +set_instance_parameter_value hps_0 {STM_Enable} {0} +set_instance_parameter_value hps_0 {TEST_Enable} {0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_EYE_REDUCTION_H} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_EYE_REDUCTION_SU} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_SKEW} {0.02} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_SLEW_RATE} {1.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_AC_TO_CK_SKEW} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_CK_CKN_SLEW_RATE} {2.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DELTA_DQS_ARRIVAL_TIME} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DELTA_READ_DQS_ARRIVAL_TIME} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DERATE_METHOD} {AUTO} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQS_DQSN_SLEW_RATE} {2.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQ_EYE_REDUCTION} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQ_SLEW_RATE} {1.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_DQ_TO_DQS_SKEW} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_ISI_METHOD} {AUTO} +set_instance_parameter_value hps_0 {TIMING_BOARD_MAX_CK_DELAY} {0.6} +set_instance_parameter_value hps_0 {TIMING_BOARD_MAX_DQS_DELAY} {0.6} +set_instance_parameter_value hps_0 {TIMING_BOARD_READ_DQ_EYE_REDUCTION} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_BETWEEN_DIMMS} {0.05} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_BETWEEN_DQS} {0.02} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_CKDQS_DIMM_MAX} {0.01} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_CKDQS_DIMM_MIN} {-0.01} +set_instance_parameter_value hps_0 {TIMING_BOARD_SKEW_WITHIN_DQS} {0.02} +set_instance_parameter_value hps_0 {TIMING_BOARD_TDH} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_TDS} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_TIH} {0.0} +set_instance_parameter_value hps_0 {TIMING_BOARD_TIS} {0.0} +set_instance_parameter_value hps_0 {TIMING_TDH} {125} +set_instance_parameter_value hps_0 {TIMING_TDQSCK} {400} +set_instance_parameter_value hps_0 {TIMING_TDQSCKDL} {1200} +set_instance_parameter_value hps_0 {TIMING_TDQSCKDM} {900} +set_instance_parameter_value hps_0 {TIMING_TDQSCKDS} {450} +set_instance_parameter_value hps_0 {TIMING_TDQSH} {0.35} +set_instance_parameter_value hps_0 {TIMING_TDQSQ} {120} +set_instance_parameter_value hps_0 {TIMING_TDQSS} {0.25} +set_instance_parameter_value hps_0 {TIMING_TDS} {50} +set_instance_parameter_value hps_0 {TIMING_TDSH} {0.2} +set_instance_parameter_value hps_0 {TIMING_TDSS} {0.2} +set_instance_parameter_value hps_0 {TIMING_TIH} {250} +set_instance_parameter_value hps_0 {TIMING_TIS} {175} +set_instance_parameter_value hps_0 {TIMING_TQH} {0.38} +set_instance_parameter_value hps_0 {TIMING_TQHS} {300} +set_instance_parameter_value hps_0 {TIMING_TQSH} {0.38} +set_instance_parameter_value hps_0 {TPIUFPGA_Enable} {0} +set_instance_parameter_value hps_0 {TPIUFPGA_alt} {0} +set_instance_parameter_value hps_0 {TRACE_Mode} {N/A} +set_instance_parameter_value hps_0 {TRACE_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {TRACKING_ERROR_TEST} {0} +set_instance_parameter_value hps_0 {TRACKING_WATCH_TEST} {0} +set_instance_parameter_value hps_0 {TREFI} {35100} +set_instance_parameter_value hps_0 {TRFC} {350} +set_instance_parameter_value hps_0 {UART0_Mode} {N/A} +set_instance_parameter_value hps_0 {UART0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {UART1_Mode} {N/A} +set_instance_parameter_value hps_0 {UART1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {USB0_Mode} {N/A} +set_instance_parameter_value hps_0 {USB0_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {USB1_Mode} {N/A} +set_instance_parameter_value hps_0 {USB1_PinMuxing} {Unused} +set_instance_parameter_value hps_0 {USER_DEBUG_LEVEL} {1} +set_instance_parameter_value hps_0 {USE_AXI_ADAPTOR} {0} +set_instance_parameter_value hps_0 {USE_FAKE_PHY} {0} +set_instance_parameter_value hps_0 {USE_MEM_CLK_FREQ} {0} +set_instance_parameter_value hps_0 {USE_MM_ADAPTOR} {1} +set_instance_parameter_value hps_0 {USE_SEQUENCER_BFM} {0} +set_instance_parameter_value hps_0 {WEIGHT_PORT} {0 0 0 0 0 0} +set_instance_parameter_value hps_0 {WRBUFFER_ADDR_WIDTH} {6} +set_instance_parameter_value hps_0 {can0_clk_div} {1} +set_instance_parameter_value hps_0 {can1_clk_div} {1} +set_instance_parameter_value hps_0 {configure_advanced_parameters} {0} +set_instance_parameter_value hps_0 {customize_device_pll_info} {0} +set_instance_parameter_value hps_0 {dbctrl_stayosc1} {1} +set_instance_parameter_value hps_0 {dbg_at_clk_div} {0} +set_instance_parameter_value hps_0 {dbg_clk_div} {1} +set_instance_parameter_value hps_0 {dbg_trace_clk_div} {0} +set_instance_parameter_value hps_0 {desired_can0_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_can1_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_cfg_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_emac0_clk_mhz} {250.0} +set_instance_parameter_value hps_0 {desired_emac1_clk_mhz} {250.0} +set_instance_parameter_value hps_0 {desired_gpio_db_clk_hz} {32000} +set_instance_parameter_value hps_0 {desired_l4_mp_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_l4_sp_clk_mhz} {100.0} +set_instance_parameter_value hps_0 {desired_mpu_clk_mhz} {800.0} +set_instance_parameter_value hps_0 {desired_nand_clk_mhz} {12.5} +set_instance_parameter_value hps_0 {desired_qspi_clk_mhz} {400.0} +set_instance_parameter_value hps_0 {desired_sdmmc_clk_mhz} {200.0} +set_instance_parameter_value hps_0 {desired_spi_m_clk_mhz} {200.0} +set_instance_parameter_value hps_0 {desired_usb_mp_clk_mhz} {200.0} +set_instance_parameter_value hps_0 {device_pll_info_manual} {{320000000 1600000000} {320000000 1000000000} {800000000 400000000 400000000}} +set_instance_parameter_value hps_0 {eosc1_clk_mhz} {25.0} +set_instance_parameter_value hps_0 {eosc2_clk_mhz} {25.0} +set_instance_parameter_value hps_0 {gpio_db_clk_div} {6249} +set_instance_parameter_value hps_0 {l3_mp_clk_div} {1} +set_instance_parameter_value hps_0 {l3_sp_clk_div} {1} +set_instance_parameter_value hps_0 {l4_mp_clk_div} {1} +set_instance_parameter_value hps_0 {l4_mp_clk_source} {1} +set_instance_parameter_value hps_0 {l4_sp_clk_div} {1} +set_instance_parameter_value hps_0 {l4_sp_clk_source} {1} +set_instance_parameter_value hps_0 {main_pll_c3} {3} +set_instance_parameter_value hps_0 {main_pll_c4} {3} +set_instance_parameter_value hps_0 {main_pll_c5} {15} +set_instance_parameter_value hps_0 {main_pll_m} {63} +set_instance_parameter_value hps_0 {main_pll_n} {0} +set_instance_parameter_value hps_0 {nand_clk_source} {2} +set_instance_parameter_value hps_0 {periph_pll_c0} {3} +set_instance_parameter_value hps_0 {periph_pll_c1} {3} +set_instance_parameter_value hps_0 {periph_pll_c2} {1} +set_instance_parameter_value hps_0 {periph_pll_c3} {19} +set_instance_parameter_value hps_0 {periph_pll_c4} {4} +set_instance_parameter_value hps_0 {periph_pll_c5} {9} +set_instance_parameter_value hps_0 {periph_pll_m} {79} +set_instance_parameter_value hps_0 {periph_pll_n} {1} +set_instance_parameter_value hps_0 {periph_pll_source} {0} +set_instance_parameter_value hps_0 {qspi_clk_source} {1} +set_instance_parameter_value hps_0 {sdmmc_clk_source} {2} +set_instance_parameter_value hps_0 {show_advanced_parameters} {0} +set_instance_parameter_value hps_0 {show_debug_info_as_warning_msg} {0} +set_instance_parameter_value hps_0 {show_warning_as_error_msg} {0} +set_instance_parameter_value hps_0 {spi_m_clk_div} {0} +set_instance_parameter_value hps_0 {usb_mp_clk_div} {0} +set_instance_parameter_value hps_0 {use_default_mpu_clk} {1} -add_instance vta_0 vta 1.0 - -# connections and connection parameters -add_connection clk_0.clk hps_0.f2h_axi_clock clock -add_connection clk_0.clk hps_0.h2f_lw_axi_clock clock -add_connection clk_0.clk vta_0.clock clock -add_connection clk_0.clk_reset vta_0.reset reset - -add_connection hps_0.h2f_lw_axi_master vta_0.s_axi_control avalon -set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control arbitrationPriority {1} -set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control baseAddress {0x00020000} -set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control defaultConnection {0} +add_instance pll_0 altera_pll 18.1 +set_instance_parameter_value pll_0 {debug_print_output} {0} +set_instance_parameter_value pll_0 {debug_use_rbc_taf_method} {0} +set_instance_parameter_value pll_0 {gui_active_clk} {0} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency0} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency1} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency10} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency11} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency12} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency13} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency14} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency15} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency16} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency17} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency2} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency3} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency4} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency5} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency6} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency7} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency8} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_output_clock_frequency9} {0 MHz} +set_instance_parameter_value pll_0 {gui_actual_phase_shift0} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift1} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift10} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift11} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift12} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift13} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift14} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift15} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift16} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift17} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift2} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift3} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift4} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift5} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift6} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift7} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift8} {0} +set_instance_parameter_value pll_0 {gui_actual_phase_shift9} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter0} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter1} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter10} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter11} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter12} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter13} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter14} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter15} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter16} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter17} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter2} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter3} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter4} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter5} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter6} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter7} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter8} {0} +set_instance_parameter_value pll_0 {gui_cascade_counter9} {0} +set_instance_parameter_value pll_0 {gui_cascade_outclk_index} {0} +set_instance_parameter_value pll_0 {gui_channel_spacing} {0.0} +set_instance_parameter_value pll_0 {gui_clk_bad} {0} +set_instance_parameter_value pll_0 {gui_device_speed_grade} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c0} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c1} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c10} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c11} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c12} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c13} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c14} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c15} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c16} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c17} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c2} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c3} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c4} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c5} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c6} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c7} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c8} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_c9} {1} +set_instance_parameter_value pll_0 {gui_divide_factor_n} {1} +set_instance_parameter_value pll_0 {gui_dps_cntr} {C0} +set_instance_parameter_value pll_0 {gui_dps_dir} {Positive} +set_instance_parameter_value pll_0 {gui_dps_num} {1} +set_instance_parameter_value pll_0 {gui_dsm_out_sel} {1st_order} +set_instance_parameter_value pll_0 {gui_duty_cycle0} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle1} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle10} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle11} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle12} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle13} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle14} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle15} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle16} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle17} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle2} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle3} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle4} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle5} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle6} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle7} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle8} {50} +set_instance_parameter_value pll_0 {gui_duty_cycle9} {50} +set_instance_parameter_value pll_0 {gui_en_adv_params} {0} +set_instance_parameter_value pll_0 {gui_en_dps_ports} {0} +set_instance_parameter_value pll_0 {gui_en_phout_ports} {0} +set_instance_parameter_value pll_0 {gui_en_reconf} {0} +set_instance_parameter_value pll_0 {gui_enable_cascade_in} {0} +set_instance_parameter_value pll_0 {gui_enable_cascade_out} {0} +set_instance_parameter_value pll_0 {gui_enable_mif_dps} {0} +set_instance_parameter_value pll_0 {gui_feedback_clock} {Global Clock} +set_instance_parameter_value pll_0 {gui_frac_multiply_factor} {1.0} +set_instance_parameter_value pll_0 {gui_fractional_cout} {32} +set_instance_parameter_value pll_0 {gui_mif_generate} {0} +set_instance_parameter_value pll_0 {gui_multiply_factor} {1} +set_instance_parameter_value pll_0 {gui_number_of_clocks} {1} +set_instance_parameter_value pll_0 {gui_operation_mode} {normal} +set_instance_parameter_value pll_0 {gui_output_clock_frequency0} $FREQ_MHZ +set_instance_parameter_value pll_0 {gui_output_clock_frequency1} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency10} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency11} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency12} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency13} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency14} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency15} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency16} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency17} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency2} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency3} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency4} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency5} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency6} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency7} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency8} {100.0} +set_instance_parameter_value pll_0 {gui_output_clock_frequency9} {100.0} +set_instance_parameter_value pll_0 {gui_phase_shift0} {0} +set_instance_parameter_value pll_0 {gui_phase_shift1} {0} +set_instance_parameter_value pll_0 {gui_phase_shift10} {0} +set_instance_parameter_value pll_0 {gui_phase_shift11} {0} +set_instance_parameter_value pll_0 {gui_phase_shift12} {0} +set_instance_parameter_value pll_0 {gui_phase_shift13} {0} +set_instance_parameter_value pll_0 {gui_phase_shift14} {0} +set_instance_parameter_value pll_0 {gui_phase_shift15} {0} +set_instance_parameter_value pll_0 {gui_phase_shift16} {0} +set_instance_parameter_value pll_0 {gui_phase_shift17} {0} +set_instance_parameter_value pll_0 {gui_phase_shift2} {0} +set_instance_parameter_value pll_0 {gui_phase_shift3} {0} +set_instance_parameter_value pll_0 {gui_phase_shift4} {0} +set_instance_parameter_value pll_0 {gui_phase_shift5} {0} +set_instance_parameter_value pll_0 {gui_phase_shift6} {0} +set_instance_parameter_value pll_0 {gui_phase_shift7} {0} +set_instance_parameter_value pll_0 {gui_phase_shift8} {0} +set_instance_parameter_value pll_0 {gui_phase_shift9} {0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg0} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg1} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg10} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg11} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg12} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg13} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg14} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg15} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg16} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg17} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg2} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg3} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg4} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg5} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg6} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg7} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg8} {0.0} +set_instance_parameter_value pll_0 {gui_phase_shift_deg9} {0.0} +set_instance_parameter_value pll_0 {gui_phout_division} {1} +set_instance_parameter_value pll_0 {gui_pll_auto_reset} {Off} +set_instance_parameter_value pll_0 {gui_pll_bandwidth_preset} {Auto} +set_instance_parameter_value pll_0 {gui_pll_cascading_mode} {Create an adjpllin signal to connect with an upstream PLL} +set_instance_parameter_value pll_0 {gui_pll_mode} {Integer-N PLL} +set_instance_parameter_value pll_0 {gui_ps_units0} {ps} +set_instance_parameter_value pll_0 {gui_ps_units1} {ps} +set_instance_parameter_value pll_0 {gui_ps_units10} {ps} +set_instance_parameter_value pll_0 {gui_ps_units11} {ps} +set_instance_parameter_value pll_0 {gui_ps_units12} {ps} +set_instance_parameter_value pll_0 {gui_ps_units13} {ps} +set_instance_parameter_value pll_0 {gui_ps_units14} {ps} +set_instance_parameter_value pll_0 {gui_ps_units15} {ps} +set_instance_parameter_value pll_0 {gui_ps_units16} {ps} +set_instance_parameter_value pll_0 {gui_ps_units17} {ps} +set_instance_parameter_value pll_0 {gui_ps_units2} {ps} +set_instance_parameter_value pll_0 {gui_ps_units3} {ps} +set_instance_parameter_value pll_0 {gui_ps_units4} {ps} +set_instance_parameter_value pll_0 {gui_ps_units5} {ps} +set_instance_parameter_value pll_0 {gui_ps_units6} {ps} +set_instance_parameter_value pll_0 {gui_ps_units7} {ps} +set_instance_parameter_value pll_0 {gui_ps_units8} {ps} +set_instance_parameter_value pll_0 {gui_ps_units9} {ps} +set_instance_parameter_value pll_0 {gui_refclk1_frequency} {100.0} +set_instance_parameter_value pll_0 {gui_refclk_switch} {0} +set_instance_parameter_value pll_0 {gui_reference_clock_frequency} {50.0} +set_instance_parameter_value pll_0 {gui_switchover_delay} {0} +set_instance_parameter_value pll_0 {gui_switchover_mode} {Automatic Switchover} +set_instance_parameter_value pll_0 {gui_use_locked} {0} -add_connection vta_0.m_axi_gmem hps_0.f2h_axi_slave avalon -set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave arbitrationPriority {1} -set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave baseAddress {0x0000} -set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave defaultConnection {0} +add_instance vta_0 vta 1.0 # exported interfaces add_interface clk clock sink @@ -127,8 +728,33 @@ set_interface_property memory EXPORT_OF hps_0.memory add_interface reset reset sink set_interface_property reset EXPORT_OF clk_0.clk_in_reset +# connections and connection parameters +add_connection clk_0.clk pll_0.refclk + +add_connection clk_0.clk_reset pll_0.reset + +add_connection clk_0.clk_reset vta_0.reset + +add_connection hps_0.h2f_lw_axi_master vta_0.s_axi_control +set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control arbitrationPriority {1} +set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control baseAddress {0x00020000} +set_connection_parameter_value hps_0.h2f_lw_axi_master/vta_0.s_axi_control defaultConnection {0} + +add_connection pll_0.outclk0 hps_0.f2h_axi_clock + +add_connection pll_0.outclk0 hps_0.h2f_lw_axi_clock + +add_connection pll_0.outclk0 vta_0.clock + +add_connection vta_0.m_axi_gmem hps_0.f2h_axi_slave +set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave arbitrationPriority {1} +set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave baseAddress {0x0000} +set_connection_parameter_value vta_0.m_axi_gmem/hps_0.f2h_axi_slave defaultConnection {0} + # interconnect requirements set_interconnect_requirement {$system} {qsys_mm.clockCrossingAdapter} {HANDSHAKE} +set_interconnect_requirement {$system} {qsys_mm.enableEccProtection} {FALSE} +set_interconnect_requirement {$system} {qsys_mm.insertDefaultSlave} {FALSE} set_interconnect_requirement {$system} {qsys_mm.maxAdditionalLatency} {1} save_system soc_system.qsys From 8342eeb65cdfda0fc7e3c2c7b401f858ca522b4c Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Mon, 2 Mar 2020 15:02:47 -0800 Subject: [PATCH 02/19] [VTA][TSIM] Add more debug and tracing options. * Modified Makefile to change default config to DafaultDe10Config. * Added option in Makefile to produce more detailed tracing for extra observability in debugging complex scenarios. * Added option in Makefile to produce traces in FST format which are 2 orders of magnitude smaller, although much slower to generate. * Added option in Makefile to build the simulator with GCC address sanitizer. * Modified Makefile to not lint the scala code by default avoiding unintended wrong indentation. Linting should be better performed manually on a per-need basis. --- vta/hardware/chisel/Makefile | 38 +++++++++++++++++++++++++++------ vta/hardware/dpi/tsim_device.cc | 10 ++++++++- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/vta/hardware/chisel/Makefile b/vta/hardware/chisel/Makefile index 9804230074ac..1b3afaf7d18a 100644 --- a/vta/hardware/chisel/Makefile +++ b/vta/hardware/chisel/Makefile @@ -32,16 +32,19 @@ ifeq (, $(VERILATOR_INC_DIR)) endif endif -CONFIG = DefaultPynqConfig +CONFIG = DefaultDe10Config TOP = VTA TOP_TEST = Test BUILD_NAME = build USE_TRACE = 0 +USE_TRACE_FST = 0 +USE_TRACE_DETAILED = 0 USE_THREADS = $(shell nproc) VTA_LIBNAME = libvta_hw UNITTEST_NAME = all CXX = g++ DEBUG = 0 +SANITIZE = 0 config_test = $(TOP_TEST)$(CONFIG) vta_dir = $(abspath ../../) @@ -61,11 +64,12 @@ verilator_opt += -Mdir ${verilator_build_dir} verilator_opt += -I$(chisel_build_dir) ifeq ($(DEBUG), 0) - cxx_flags = -O2 -Wall + cxx_flags = -O2 -Wall -fvisibility=hidden else cxx_flags = -O0 -g -Wall endif -cxx_flags += -fvisibility=hidden -std=c++11 + +cxx_flags += -std=c++11 -faligned-new cxx_flags += -DVL_TSIM_NAME=V$(TOP_TEST) cxx_flags += -DVL_PRINTF=printf cxx_flags += -DVL_USER_FINISH @@ -82,13 +86,33 @@ cxx_flags += -I$(tvm_dir)/3rdparty/dlpack/include ld_flags = -fPIC -shared +ifeq ($(SANITIZE), 1) + ifeq ($(DEBUG), 1) + cxx_flags += -fno-omit-frame-pointer -fsanitize=address -fsanitize-recover=address + ld_flags += -fno-omit-frame-pointer -fsanitize=address -fsanitize-recover=address + endif +endif + cxx_objs = $(verilator_build_dir)/verilated.o $(verilator_build_dir)/verilated_dpi.o $(verilator_build_dir)/tsim_device.o ifneq ($(USE_TRACE), 0) - verilator_opt += --trace cxx_flags += -DVM_TRACE=1 - cxx_flags += -DTSIM_TRACE_FILE=$(verilator_build_dir)/$(TOP_TEST).vcd - cxx_objs += $(verilator_build_dir)/verilated_vcd_c.o + ifeq ($(USE_TRACE_FST), 1) + cxx_flags += -DVM_TRACE_FST + verilator_opt += --trace-fst + else + verilator_opt += --trace + endif + ifeq ($(USE_TRACE_DETAILED), 1) + verilator_opt += --trace-underscore --trace-structs + endif + ifeq ($(USE_TRACE_FST), 1) + cxx_flags += -DTSIM_TRACE_FILE=$(verilator_build_dir)/$(TOP_TEST).fst + cxx_objs += $(verilator_build_dir)/verilated_fst_c.o + else + cxx_flags += -DTSIM_TRACE_FILE=$(verilator_build_dir)/$(TOP_TEST).vcd + cxx_objs += $(verilator_build_dir)/verilated_vcd_c.o + endif else cxx_flags += -DVM_TRACE=0 endif @@ -109,7 +133,7 @@ else lib_path = $(vta_dir)/$(BUILD_NAME)/$(VTA_LIBNAME).so endif -default: lint lib +default: lib lint: sbt scalastyle diff --git a/vta/hardware/dpi/tsim_device.cc b/vta/hardware/dpi/tsim_device.cc index d197fbd4385e..ffa192b283ea 100644 --- a/vta/hardware/dpi/tsim_device.cc +++ b/vta/hardware/dpi/tsim_device.cc @@ -22,8 +22,12 @@ #include #if VM_TRACE +#ifdef VM_TRACE_FST +#include +#else #include #endif +#endif #if VM_TRACE #define STRINGIZE(x) #x @@ -100,7 +104,11 @@ int VTADPISim() { #if VM_TRACE Verilated::traceEverOn(true); +#ifdef VM_TRACE_FST + VerilatedFstC* tfp = new VerilatedFstC; +#else VerilatedVcdC* tfp = new VerilatedVcdC; +#endif // VM_TRACE_FST top->trace(tfp, 99); tfp->open(STRINGIZE_VALUE_OF(TSIM_TRACE_FILE)); #endif @@ -142,7 +150,7 @@ int VTADPISim() { #endif trace_count++; if ((trace_count % 1000000) == 1) - fprintf(stderr, "[traced %dM cycles]\n", trace_count / 1000000); + fprintf(stderr, "[traced %luM cycles]\n", trace_count / 1000000); while (top->sim_wait) { top->clock = 0; std::this_thread::sleep_for(std::chrono::milliseconds(100)); From 1e865edca80f400b17a3ab15c055d6eaf7480200 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Mon, 2 Mar 2020 15:19:49 -0800 Subject: [PATCH 03/19] [VTA][de10nano] Enable remote programming of FPGA. Issue: The Cyclone V FPGA on board of the DE10-Nano can only be programmed using the JTAG port, which is a limiting option for users. Solution: Add support for the remote programming of the FPGA implementing the FPGA programming manager protocol published in the Cyclone V user manual. * Added file de10nano_mgr.h implementing an FPGA manager class that supports handling of control and status registers as well as a push-button option to program the FPGA. The class can be easily extended to include more registers if needed. * Used an instance of the FPGA manager to implement function VTAProgram also warning users when incompatible bitstream files are used. * Registered VTAProgram as a global function and modified the program_bitstream python class to use it. --- vta/python/vta/program_bitstream.py | 11 +- vta/src/de10nano/cma_api.h | 2 + vta/src/de10nano/de10nano_driver.cc | 31 +- vta/src/de10nano/de10nano_mgr.h | 605 ++++++++++++++++++++++++++++ 4 files changed, 644 insertions(+), 5 deletions(-) create mode 100644 vta/src/de10nano/de10nano_mgr.h diff --git a/vta/python/vta/program_bitstream.py b/vta/python/vta/program_bitstream.py index 7d2c4e38db3e..ccdb45116f5c 100644 --- a/vta/python/vta/program_bitstream.py +++ b/vta/python/vta/program_bitstream.py @@ -19,7 +19,7 @@ import argparse def main(): - """Main funciton""" + """Main function""" parser = argparse.ArgumentParser() parser.add_argument("target", type=str, default="", help="target") @@ -27,7 +27,7 @@ def main(): help="bitstream path") args = parser.parse_args() - if (args.target != 'pynq' and args.target != 'sim'): + if args.target not in ('pynq', 'ultra96', 'de10nano', 'sim', 'tsim'): raise RuntimeError("Unknown target {}".format(args.target)) curr_path = os.path.dirname( @@ -48,9 +48,16 @@ def pynq_bitstream_program(bitstream_path): bitstream = Bitstream(bitstream_path) bitstream.download() +def de10nano_bitstream_program(bitstream_path): + from tvm import get_global_func + program = get_global_func("vta.de10nano.program") + program (bitstream_path) + def bitstream_program(target, bitstream): if target in ['pynq', 'ultra96']: pynq_bitstream_program(bitstream) + elif target in ['de10nano']: + de10nano_bitstream_program(bitstream) elif target in ['sim', 'tsim']: # In simulation, bit stream programming is a no-op return diff --git a/vta/src/de10nano/cma_api.h b/vta/src/de10nano/cma_api.h index f20939d52b63..5e1653f172c7 100644 --- a/vta/src/de10nano/cma_api.h +++ b/vta/src/de10nano/cma_api.h @@ -27,6 +27,8 @@ extern "C" { #endif +#include + /** * \brief Initialize CMA api (basically perform open() syscall). * diff --git a/vta/src/de10nano/de10nano_driver.cc b/vta/src/de10nano/de10nano_driver.cc index 97607f536051..b8dba2744595 100644 --- a/vta/src/de10nano/de10nano_driver.cc +++ b/vta/src/de10nano/de10nano_driver.cc @@ -21,9 +21,11 @@ */ #include "de10nano_driver.h" +#include "de10nano_mgr.h" #include #include +#include #include #include #include "cma_api.h" @@ -72,12 +74,16 @@ void *VTAMapRegister(uint32_t addr) { uint32_t virt_offset = addr - virt_base; // Open file and mmap uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC); - return mmap(NULL, + // Note that if virt_offset != 0, i.e. addr is not page aligned + // munmap will not be unmapping all memory. + void *vmem = mmap(NULL, (VTA_IP_REG_MAP_RANGE + virt_offset), PROT_READ|PROT_WRITE, MAP_SHARED, mmap_file, virt_base); + close (mmap_file); + return vmem; } void VTAUnmapRegister(void *vta) { @@ -149,6 +155,25 @@ int VTADeviceRun(VTADeviceHandle handle, insn_phy_addr, insn_count, wait_cycles); } -void VTAProgram(const char* bitstream) { - CHECK(false) << "VTAProgram not implemented for de10nano"; +void VTAProgram (const char *rbf) +{ + de10nano_mgr mgr; + CHECK(mgr.mapped()) << "de10nano: mapping of /dev/mem failed"; + CHECK(mgr.program_rbf (rbf)) << "Programming of the de10nano failed.\n" + "This is usually due to the use of an RBF file that is incompatible " + "with the MSEL switches on the DE10-Nano board. The recommended RBF " + "format is FastPassiveParallel32 with compression enabled, " + "corresponding to MSEL 01010. An RBF file in FPP32 mode can be " + "generated in a Quartus session with the command " + "'quartus_cpf -o bitstream_compression=on -c .sof .rbf'."; } + +using tvm::runtime::TVMRetValue; +using tvm::runtime::TVMArgs; + +TVM_REGISTER_GLOBAL("vta.de10nano.program") +.set_body([](TVMArgs args, TVMRetValue* rv) { + std::string bitstream = args[0]; + VTAProgram(bitstream.c_str()); +}); + diff --git a/vta/src/de10nano/de10nano_mgr.h b/vta/src/de10nano/de10nano_mgr.h new file mode 100644 index 000000000000..dae21ed394da --- /dev/null +++ b/vta/src/de10nano/de10nano_mgr.h @@ -0,0 +1,605 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * \file de10nano_mgr.h + * \brief DE10-Nano fpga manager. + */ + +#ifndef DE10NANO_FPGA_MGR_H +#define DE10NANO_FPGA_MGR_H + +extern "C" +{ + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include +} + +// Register definition and address map taken from cv_5v4.pdf, +// Cyclone V Hard Processor System Technical Reference Manual, +// chapter 5: FPGA Manager. +struct de10nano_mgr +{ + // reg32 is a static base class interface and implementation + // of a generic 32 bit register that avoids the use of a virtual + // class and ugly bit shift manipulations. + struct reg32 + { + reg32 (uint32_t offset, uint32_t reset = 0) : + m_offset (offset), + m_reset (reset) + {} + void map (uint8_t *base) + { + m_addr = reinterpret_cast(base + m_offset); + m_reg = reinterpret_cast((uint8_t*)this+sizeof(reg32)); + } + uint32_t read () + { + *m_reg = *m_addr; + return *m_reg; + } + void write () { *m_addr = *m_reg; } + void write (uint32_t value) { *m_addr = *m_reg = value; } + void clear () { *m_reg = 0; } + void reset () { *m_reg = m_reset; } + void print (const char *name, bool addr = false) + { + if (addr) + printf ("DE10-Nano-Mgr: %16s: 0x%08x addr: %p\n", name, read(), m_addr); + else + printf ("DE10-Nano-Mgr: %16s: 0x%08x\n", name, read()); + } + + uint32_t m_offset, m_reset, *m_reg; + volatile uint32_t *m_addr; + + private: // Do not use this class on its own. + reg32 (const reg32 &rhs); + }; + + // Register definitions. All registers are of 32 bit size. + // Add one structure for each register, making sure that all + // bit fields come first and pack exactly into 32 bits. + + struct data : public reg32 + { + data () : reg32 (0x0, 0x0) {} + uint32_t value; + } data; + + struct stat : public reg32 + { + stat () : reg32 (0x0, 0x45) {} + enum mode_values + { + FPGA_POWER_OFF = 0x0, + FPGA_RESET_PHASE = 0x1, + FPGA_CONFIG_PHASE = 0x2, + FPGA_INIT_PHASE = 0x3, + FPGA_USER_MODE = 0x4, + FPGA_ZOMBIE_MODE = 0x5 + }; + + enum msel_values + { + FPP16_AESN_ZIPN = 0x0, + FPP32_AESO_ZIPY = 0xA + }; + + const char * mode_str () + { + const char *str = "UNKNOWN"; + switch (mode) + { + case FPGA_POWER_OFF : str = "POWER_OFF" ; break; + case FPGA_RESET_PHASE : str = "RESET_PHASE" ; break; + case FPGA_CONFIG_PHASE : str = "CONFIG_PHASE" ; break; + case FPGA_INIT_PHASE : str = "INIT_PHASE" ; break; + case FPGA_USER_MODE : str = "USER_MODE" ; break; + case FPGA_ZOMBIE_MODE : str = "UNDEF_MODE" ; break; + } + return str; + } + + bool msel_is_invalid () + { + return msel & 0x10 || (msel & 0x3) == 0x3; + } + + void print (bool addr = false, bool fields = true) + { + reg32::print ("stat", addr); + if (fields) + { + printf ("DE10-Nano-Mgr: %16s: %x\n", "msel", msel); + printf ("DE10-Nano-Mgr: %16s: %s\n", "mode", mode_str()); + } + } + + uint32_t mode : 3; // 2:0 RW + uint32_t msel : 5; // 7:3 RO + uint32_t rsvd : 24; // 31:8 + } stat; + + struct ctrl : public reg32 + { + ctrl () : reg32 (0x4, 0x200) {} + + uint32_t en : 1; // 0 RW + uint32_t nce : 1; // 1 RW + uint32_t nconfigpull : 1; // 2 RW + uint32_t nstatuspull : 1; // 3 RW + uint32_t confdonepull : 1; // 4 RW + uint32_t prreq : 1; // 5 RW + uint32_t cdratio : 2; // 7:6 RW + uint32_t axicfgen : 1; // 8 RW + uint32_t cfgwdth : 1; // 9 RW + uint32_t rsvd : 22; // 31:10 + + void print (bool addr = false, bool fields = true) + { + reg32::print ("ctrl", addr); + if (fields) + { + printf ("DE10-Nano-Mgr: %16s: %x\n", "en" , en ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "nce" , nce ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "nconfigpull" , nconfigpull ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "nstatuspull" , nstatuspull ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "confdonepull", confdonepull ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "prreq" , prreq ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "cdratio" , cdratio ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "axicfgen" , axicfgen ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "cfgwdth" , cfgwdth ); + } + } + } ctrl; + + struct dclkcnt : public reg32 + { + dclkcnt () : reg32 (0x8, 0x0) {} + void print () { return reg32::print ("dclkcnt"); } + + uint32_t cnt; // RW + } dclkcnt; + + struct dclkstat : public reg32 + { + dclkstat () : reg32 (0xC, 0x0) {} + void print () { return reg32::print ("dclkstat"); } + + uint32_t dcntdone : 1; // RW + uint32_t rsvd : 31; + } dclkstat; + + struct gpio_inten : public reg32 + { + gpio_inten () : reg32 (0x830, 0x0) {} + void print () { return reg32::print ("gpio_inten"); } + + uint32_t value : 32; // RW + } gpio_inten; + + struct gpio_porta_eoi : public reg32 + { + gpio_porta_eoi () : reg32 (0x84C, 0x0) {} + void print () { return reg32::print ("gpio_porta_eoi"); } + + uint32_t ns : 1; // 0 WO + uint32_t cd : 1; // 1 WO + uint32_t id : 1; // 2 WO + uint32_t crc : 1; // 3 WO + uint32_t ccd : 1; // 4 WO + uint32_t prr : 1; // 5 WO + uint32_t pre : 1; // 6 WO + uint32_t prd : 1; // 7 WO + uint32_t ncp : 1; // 8 WO + uint32_t nsp : 1; // 9 WO + uint32_t cdp : 1; // 10 WO + uint32_t fpo : 1; // 11 WO + uint32_t rsvd : 20; // 31:12 + } gpio_porta_eoi; + + struct gpio_ext_porta : public reg32 + { + gpio_ext_porta () : reg32 (0x850, 0x0) {} + void print (bool addr = false, bool fields = true) + { + reg32::print ("gpio_ext_porta", addr); + if (fields) + { + printf ("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS" , ns ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE" , cd ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "INIT_DONE" , id ); + printf ("DE10-Nano-Mgr: %16s: %x\n", "CRC_ERROR" , crc); + printf ("DE10-Nano-Mgr: %16s: %x\n", "CVP_CONF_DONE" , ccd); + printf ("DE10-Nano-Mgr: %16s: %x\n", "PR_READY" , prr); + printf ("DE10-Nano-Mgr: %16s: %x\n", "PR_ERROR" , pre); + printf ("DE10-Nano-Mgr: %16s: %x\n", "PR_DONE" , prd); + printf ("DE10-Nano-Mgr: %16s: %x\n", "nCONFIG_PIN" , ncp); + printf ("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS_PIN" , nsp); + printf ("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE_PIN" , cdp); + printf ("DE10-Nano-Mgr: %16s: %x\n", "FPGA_POWER_ON" , fpo); + } + } + + uint32_t ns : 1; // 0 RO + uint32_t cd : 1; // 1 RO + uint32_t id : 1; // 2 RO + uint32_t crc : 1; // 3 RO + uint32_t ccd : 1; // 4 RO + uint32_t prr : 1; // 5 RO + uint32_t pre : 1; // 6 RO + uint32_t prd : 1; // 7 RO + uint32_t ncp : 1; // 8 RO + uint32_t nsp : 1; // 9 RO + uint32_t cdp : 1; // 10 RO + uint32_t fpo : 1; // 11 RO + uint32_t rsvd : 20; // 31:12 + } gpio_ext_porta; + + struct monitor + { + // This is used to both break a polling loop if the specified number + // of milliseconds have passed and to relax the polling yielding the + // cpu every millisecond. + monitor () : msg (""), m_status (true), m_ticks (0), m_counter (0) + { + m_epoc_us = time_stamp (); + } + + void init (const char *message, uint32_t ticks_ms = 1000) + { + msg = message; + m_ticks = m_counter = ticks_ms; + m_init_us = time_stamp (); + printf ("DE10-Nano-Mgr: %-32s : ", msg); + } + + bool status () { return m_status; } + + void reset () { m_counter = m_ticks; } + + void done (bool status = true) + { + uint32_t elapsed = time_stamp (m_init_us); + const char *rs = "FAIL"; + if (!m_counter) + { + status = false; + rs = "TOUT"; + } + else if (status) + rs = "PASS"; + printf ("\rDE10-Nano-Mgr: %-32s : %s in %u us\n", msg, rs, elapsed); + if (!status) + { + m_status = false; + throw 1; + } + } + + ~monitor () + { + uint32_t elapsed = time_stamp (m_epoc_us); + const char *rs = m_status ? "SUCCESS" : "FAILURE"; + printf ("DE10-Nano-Mgr: EXIT %s in %u us\n", rs, elapsed); + } + + uint64_t time_stamp (uint64_t base_us = 0) + { + struct timeval tv; + gettimeofday (&tv, NULL); + return tv.tv_sec * 1000000L + tv.tv_usec - base_us; + } + + bool operator () (bool cond) + { + if (m_counter) + { + if (!cond) + return false; + m_counter--; + usleep (1000); + } + return m_counter; + } + const char *msg; + private: + bool m_status; + uint32_t m_ticks, m_counter; + uint64_t m_init_us, m_epoc_us; + }; + + enum BaseAddr + { + REGS_BASE_ADDR = 0xFF706000U, + DATA_BASE_ADDR = 0xFFB90000U + }; + + de10nano_mgr () + { + m_page_size = sysconf (_SC_PAGE_SIZE); + #ifdef MOCK_DEVMEM + m_regs_base = (uint8_t*) malloc (m_page_size); + m_data_base = (uint8_t*) malloc (m_page_size); + #else + m_regs_base = map_mem (REGS_BASE_ADDR); + m_data_base = map_mem (DATA_BASE_ADDR); + #endif // MOCK_DEVMEM + data.map(m_data_base); + stat.map(m_regs_base); + ctrl.map(m_regs_base); + dclkcnt.map(m_regs_base); + dclkstat.map(m_regs_base); + gpio_inten.map(m_regs_base); + gpio_porta_eoi.map(m_regs_base); + gpio_ext_porta.map(m_regs_base); + } + + ~de10nano_mgr () + { + #ifdef MOCK_DEVMEM + free (m_regs_base); + free (m_data_base); + #else + unmap_mem (m_regs_base); + unmap_mem (m_data_base); + #endif // MOCK_DEVMEM + } + + bool mapped () const { return m_regs_base && m_data_base; } + + void print (bool addr = false) + { + stat.print(addr, false); + ctrl.print(addr, false); + gpio_inten.print(); + gpio_porta_eoi.print(); + gpio_ext_porta.print(addr, false); + } + + private: + + uint32_t msel_to_cfgwdth (uint32_t msel) + { + return (msel & 0b1000) >> 3; + } + + uint32_t msel_to_cdratio (uint32_t msel) + { + uint32_t cfgwdth = msel_to_cfgwdth (msel); + uint32_t cdratio = msel & 0b11; + if (cfgwdth && cdratio) + cdratio++; + return cdratio; + } + + uint8_t * map_mem (off_t addr, size_t pages = 1) + { + if (m_page_size <= 0) { return NULL; } + + int mem_fd = open("/dev/mem", O_SYNC | O_RDWR); + if (mem_fd < 0) { return NULL; } + + void *vbase = mmap(NULL, pages*m_page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, mem_fd, addr & ~(pages*m_page_size-1)); + if (vbase == MAP_FAILED) { return NULL; } + + close (mem_fd); + return (uint8_t*) vbase; + } + + void unmap_mem (void *base, size_t pages = 1) + { + if (base) + munmap (base, pages * m_page_size); + } + + uint8_t *m_regs_base, *m_data_base; + size_t m_page_size; + + public: + + // Configuration sequence documented at page A-34. + bool program_rbf (const char *rbf) + { + monitor mon; + int rbf_fd; + uint32_t count = 0; + printf ("DE10-Nano-Mgr: Programming FPGA from image %s\n", rbf); + + try { + + mon.init ("Open RBF file"); + rbf_fd = open (rbf, (O_RDONLY | O_SYNC)); + mon.done (rbf_fd >= 0); + + // 1. Set the cdratio and cfgwdth bits of the ctrl register in the + // FPGA manager registers (fpgamgrregs) to match the characteristics + // of the configuration image. Tese settings are dependent on the + // MSEL pins input. + // 2. Set the nce bit of the ctrl register to 0 to enable HPS + // configuration. + // 3. Set the en bit of the ctrl register to 1 to give the FPGA + // manager control of the configuration input signals. + // 4. Set the nconfigpull bit of the ctrl register to 1 to pull + // down the nCONFIG pin and put the FPGA portion of the device + // into the reset phase. + mon.init ("Enable FPGA configuration"); + stat.read(); + if (stat.msel_is_invalid()) + printf ("DE10-Nano-Mgr: msel %x is not a valid HPS configuration\n", stat.msel); + else + { + ctrl.read(); + ctrl.cdratio = msel_to_cdratio (stat.msel); + ctrl.cfgwdth = msel_to_cfgwdth (stat.msel); + ctrl.nce = 0; + ctrl.en = 1; + ctrl.nconfigpull = 1; + ctrl.write(); + } + mon.done (!stat.msel_is_invalid()); + + // 5. Poll the mode bit of the stat register and wait until + // the FPGA enters the reset phase. + mon.init ("Wait for FPGA to reset"); + do { + stat.read(); + } while (mon(stat.mode != stat::FPGA_RESET_PHASE)); + mon.done (); + stat.print(); + + // 6. Set the nconfigpull bit of the ctrl register to 0 to + // release the FPGA from reset. + mon.init ("Release FPGA from reset"); + ctrl.nconfigpull = 0; + ctrl.write(); + mon.done (); + + // 7. Read the mode bit of the stat register and wait until + // the FPGA enters the configuration phase. + mon.init ("Wait for configuration phase"); + do { + stat.read(); + } while (mon(stat.mode != stat::FPGA_CONFIG_PHASE)); + mon.done (); + stat.print(); + + // 8. Clear the interrupt bit of nSTATUS (ns) in the gpio interrupt + // register (fpgamgrregs.mon.gpio_porta_eoi). + mon.init ("Clear nSTATUS interrupt bit"); + gpio_porta_eoi.clear(); + gpio_porta_eoi.ns = 1; + gpio_porta_eoi.write(); + mon.done (); + + // 9. Set the axicfgen bit of the ctrl register to 1 to enable + // sending configuration data to the FPGA. + mon.init ("Enable configuration on AXI"); + ctrl.axicfgen = 1; + ctrl.write(); + mon.done (); + + // 10. Write the configuration image to the configuration data register + // (data) in the FPGA manager module configuration data registers + // (fpgamgrdata). You can also choose to use a DMA controller to + // transfer the configuration image from a peripheral device to the + // FPGA manager. + ssize_t bytes; + mon.init ("Write configuration Image"); + do { + data.value = 0; + bytes = read (rbf_fd, &data.value, sizeof(data.value)); + if (bytes > 0) + { + if (!(count % (1<<16))) + { + printf("\rDE10-Nano-Mgr: %-32s : %u B", mon.msg, count); + fflush (stdout); + } + data.write(); + count += bytes; + } + } while (bytes == 4); + mon.done (count > 0); + printf("DE10-Nano-Mgr: %-32s : written %u B\n", mon.msg, count); + close (rbf_fd); + + // 11. Use the fpgamgrregs.mon.gpio_ext_porta registers to monitor + // the CONF_DONE (cd) and nSTATUS (ns) bits. + mon.init ("Wait for CONF_DONE"); + do { + gpio_ext_porta.read(); + } while (mon(gpio_ext_porta.cd != 1 && gpio_ext_porta.ns != 1)); + mon.done (); + stat.print(); + + // 12. Set the axicfgen bit of the ctrl register to 0 to disable + // configuration data on AXI slave. + mon.init ("Disable configuration on AXI"); + ctrl.axicfgen = 0; + ctrl.write(); + mon.done (); + + // 13. Clear any previous DONE status by writing a 1 to the dcntdone + // bit of the DCLK status register (dclkstat) to clear the completed + // status flag. + mon.init ("Clear DCLK DONE status"); + dclkstat.dcntdone = 1; + dclkstat.write(); + mon.done (); + + // 14. Send the DCLKs required by the FPGA to enter the + // initialization phase. + mon.init ("Send DCLK for init phase"); + dclkcnt.cnt = 4; + dclkcnt.write(); + mon.done (); + + // 15. Poll the dcntdone bit of the DCLK status register (dclkstat) + // until it changes to 1, which indicates that all the DCLKs have + // been sent. + mon.init ("Wait for DCLK"); + do { + dclkstat.read(); + } while (mon(dclkstat.dcntdone != 1)); + mon.done (); + + // 16. Write a 1 to the dcntdone bit of the DCLK status register to + // clear the completed status flag. + mon.init ("Clear DCLK status flag"); + dclkstat.dcntdone = 1; + dclkstat.write(); + mon.done (); + + // 17. Read the mode bit of the stat register to wait for the FPGA + // to enter user mode. + mon.init ("Wait for FPGA user mode"); + do { + stat.read(); + } while (mon(stat.mode != stat::FPGA_USER_MODE)); + mon.done(); + + // 18. Set the en bit of the ctrl register to 0 to allow the + // external pins to drive the configuration input signals. + mon.init ("Release control"); + ctrl.en = 0; + ctrl.write(); + mon.done(); + } + catch (int i) + { + close (rbf_fd); + printf("DE10-Nano-Mgr: %-32s : written %u B\n", mon.msg, count); + print (); + } + + return mon.status(); + } +}; + +#endif // DE10NANO_FPGA_MGR_H From f467ced37f3f59414093b923833bb429571b9a16 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Mon, 2 Mar 2020 15:51:46 -0800 Subject: [PATCH 04/19] [VTA][de10nano] Enhance de10nano runtime support. Issue: The de10nano target has incomplete, non-working support for runtime reconfiguration, bitstream programming, and examples of usage. Solution: Complete runtime support for the de10nano target. * Modified VTA.cmake to comment out a default override for VTA_MAX_XFER to 21 bit wide. * Modified VTA.cmake to add needed de10nano include dirs. * Modified relevant files to support de10nano same way as other targets for VTA runtime reconfiguration and FPGA programming. * Added test_program_rpc.py example as a runtime FPGA programming example. Note that unlike the pynq target no bitstream is either downloaded or programmed when the bitstream argument is set to None. * Cosmetic changes to vta config files. --- cmake/modules/VTA.cmake | 4 +- vta/config/de10nano_sample.json | 2 +- vta/config/pynq_sample.json | 2 +- vta/config/ultra96_sample.json | 2 +- vta/python/vta/exec/rpc_server.py | 16 ++++--- vta/python/vta/pkg_config.py | 6 +++ vta/python/vta/rpc_client.py | 3 ++ vta/src/de10nano/de10nano_mgr.h | 26 +++++------ vta/tests/python/de10nano/test_program_rpc.py | 45 +++++++++++++++++++ vta/tutorials/matrix_multiply.py | 2 +- vta/tutorials/vta_get_started.py | 2 +- 11 files changed, 86 insertions(+), 24 deletions(-) create mode 100644 vta/tests/python/de10nano/test_program_rpc.py diff --git a/cmake/modules/VTA.cmake b/cmake/modules/VTA.cmake index 0e58d760be5e..280d340b1632 100644 --- a/cmake/modules/VTA.cmake +++ b/cmake/modules/VTA.cmake @@ -101,7 +101,9 @@ elseif(PYTHON) ${VTA_TARGET} STREQUAL "ultra96") target_link_libraries(vta ${__cma_lib}) elseif(${VTA_TARGET} STREQUAL "de10nano") # DE10-Nano rules - target_compile_definitions(vta PUBLIC VTA_MAX_XFER=2097152) # (1<<21) + #target_compile_definitions(vta PUBLIC VTA_MAX_XFER=2097152) # (1<<21) + target_include_directories(vta PUBLIC vta/src/de10nano) + target_include_directories(vta PUBLIC 3rdparty) target_include_directories(vta PUBLIC "/usr/local/intelFPGA_lite/18.1/embedded/ds-5/sw/gcc/arm-linux-gnueabihf/include") endif() diff --git a/vta/config/de10nano_sample.json b/vta/config/de10nano_sample.json index c7560ccc0f57..e4148c3e8ecf 100644 --- a/vta/config/de10nano_sample.json +++ b/vta/config/de10nano_sample.json @@ -7,7 +7,7 @@ "LOG_BATCH" : 0, "LOG_BLOCK" : 4, "LOG_UOP_BUFF_SIZE" : 15, - "LOG_INP_BUFF_SIZE" :15, + "LOG_INP_BUFF_SIZE" : 15, "LOG_WGT_BUFF_SIZE" : 18, "LOG_ACC_BUFF_SIZE" : 17 } diff --git a/vta/config/pynq_sample.json b/vta/config/pynq_sample.json index 380984a28972..7a2664105f76 100644 --- a/vta/config/pynq_sample.json +++ b/vta/config/pynq_sample.json @@ -7,7 +7,7 @@ "LOG_BATCH" : 0, "LOG_BLOCK" : 4, "LOG_UOP_BUFF_SIZE" : 15, - "LOG_INP_BUFF_SIZE" :15, + "LOG_INP_BUFF_SIZE" : 15, "LOG_WGT_BUFF_SIZE" : 18, "LOG_ACC_BUFF_SIZE" : 17 } diff --git a/vta/config/ultra96_sample.json b/vta/config/ultra96_sample.json index 013420cff52e..35b5a7e322f0 100644 --- a/vta/config/ultra96_sample.json +++ b/vta/config/ultra96_sample.json @@ -7,7 +7,7 @@ "LOG_BATCH" : 0, "LOG_BLOCK" : 4, "LOG_UOP_BUFF_SIZE" : 15, - "LOG_INP_BUFF_SIZE" :15, + "LOG_INP_BUFF_SIZE" : 15, "LOG_WGT_BUFF_SIZE" : 18, "LOG_ACC_BUFF_SIZE" : 17 } diff --git a/vta/python/vta/exec/rpc_server.py b/vta/python/vta/exec/rpc_server.py index 558632306111..de019753d23e 100644 --- a/vta/python/vta/exec/rpc_server.py +++ b/vta/python/vta/exec/rpc_server.py @@ -67,11 +67,15 @@ def ext_dev_callback(): @tvm.register_func("tvm.contrib.vta.init", override=True) def program_fpga(file_name): # pylint: disable=import-outside-toplevel - from pynq import xlnk - # Reset xilinx driver - xlnk.Xlnk().xlnk_reset() - path = tvm.get_global_func("tvm.rpc.server.workpath")(file_name) env = get_env() + if env.TARGET == "pynq": + from pynq import xlnk + # Reset xilinx driver + xlnk.Xlnk().xlnk_reset() + elif env.TARGET == "de10nano": + # Load the de10nano program function. + load_vta_dll() + path = tvm.get_global_func("tvm.rpc.server.workpath")(file_name) program_bitstream.bitstream_program(env.TARGET, path) logging.info("Program FPGA with %s ", file_name) @@ -90,9 +94,11 @@ def reconfig_runtime(cfg_json): cfg_json : str JSON string used for configurations. """ + env = get_env() if runtime_dll: + if env.TARGET == "de10nano": + print("Please reconfigure the runtime AFTER programming a bitstream.") raise RuntimeError("Can only reconfig in the beginning of session...") - env = get_env() cfg = json.loads(cfg_json) cfg["TARGET"] = env.TARGET pkg = PkgConfig(cfg, proj_root) diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index 0516e839484a..f7afad6bf038 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -77,6 +77,12 @@ def __init__(self, cfg, proj_root): if self.TARGET in ["pynq", "ultra96"]: # add pynq drivers for any board that uses pynq driver stack (see pynq.io) self.lib_source += glob.glob("%s/vta/src/pynq/*.cc" % (proj_root)) + elif self.TARGET in ["de10nano"]: + self.lib_source += glob.glob(f"{proj_root}/vta/src/de10nano/*.cc") + self.include_path += [ + f"-I{proj_root}/vta/src/de10nano", + f"-I{proj_root}/3rdparty" + ] # Linker flags if self.TARGET in ["pynq", "ultra96"]: diff --git a/vta/python/vta/rpc_client.py b/vta/python/vta/rpc_client.py index f689ef46ba1c..097ea8e4a5cc 100644 --- a/vta/python/vta/rpc_client.py +++ b/vta/python/vta/rpc_client.py @@ -49,6 +49,9 @@ def program_fpga(remote, bitstream=None): else: bitstream = get_bitstream_path() if not os.path.isfile(bitstream): + env = get_env() + if env.TARGET == 'de10nano': + return download_bitstream() fprogram = remote.get_function("tvm.contrib.vta.init") diff --git a/vta/src/de10nano/de10nano_mgr.h b/vta/src/de10nano/de10nano_mgr.h index dae21ed394da..a87e400e727a 100644 --- a/vta/src/de10nano/de10nano_mgr.h +++ b/vta/src/de10nano/de10nano_mgr.h @@ -37,7 +37,7 @@ extern "C" #include } -// Register definition and address map taken from cv_5v4.pdf, +// Register definition and address map taken from cv_5v4.pdf, // Cyclone V Hard Processor System Technical Reference Manual, // chapter 5: FPGA Manager. struct de10nano_mgr @@ -47,7 +47,7 @@ struct de10nano_mgr // class and ugly bit shift manipulations. struct reg32 { - reg32 (uint32_t offset, uint32_t reset = 0) : + reg32 (uint32_t offset, uint32_t reset = 0) : m_offset (offset), m_reset (reset) {} @@ -57,7 +57,7 @@ struct de10nano_mgr m_reg = reinterpret_cast((uint8_t*)this+sizeof(reg32)); } uint32_t read () - { + { *m_reg = *m_addr; return *m_reg; } @@ -72,16 +72,16 @@ struct de10nano_mgr else printf ("DE10-Nano-Mgr: %16s: 0x%08x\n", name, read()); } - + uint32_t m_offset, m_reset, *m_reg; volatile uint32_t *m_addr; - + private: // Do not use this class on its own. reg32 (const reg32 &rhs); }; // Register definitions. All registers are of 32 bit size. - // Add one structure for each register, making sure that all + // Add one structure for each register, making sure that all // bit fields come first and pack exactly into 32 bits. struct data : public reg32 @@ -130,7 +130,7 @@ struct de10nano_mgr } void print (bool addr = false, bool fields = true) - { + { reg32::print ("stat", addr); if (fields) { @@ -160,7 +160,7 @@ struct de10nano_mgr uint32_t rsvd : 22; // 31:10 void print (bool addr = false, bool fields = true) - { + { reg32::print ("ctrl", addr); if (fields) { @@ -226,7 +226,7 @@ struct de10nano_mgr { gpio_ext_porta () : reg32 (0x850, 0x0) {} void print (bool addr = false, bool fields = true) - { + { reg32::print ("gpio_ext_porta", addr); if (fields) { @@ -437,9 +437,9 @@ struct de10nano_mgr rbf_fd = open (rbf, (O_RDONLY | O_SYNC)); mon.done (rbf_fd >= 0); - // 1. Set the cdratio and cfgwdth bits of the ctrl register in the + // 1. Set the cdratio and cfgwdth bits of the ctrl register in the // FPGA manager registers (fpgamgrregs) to match the characteristics - // of the configuration image. Tese settings are dependent on the + // of the configuration image. Tese settings are dependent on the // MSEL pins input. // 2. Set the nce bit of the ctrl register to 0 to enable HPS // configuration. @@ -479,7 +479,7 @@ struct de10nano_mgr ctrl.nconfigpull = 0; ctrl.write(); mon.done (); - + // 7. Read the mode bit of the stat register and wait until // the FPGA enters the configuration phase. mon.init ("Wait for configuration phase"); @@ -497,7 +497,7 @@ struct de10nano_mgr gpio_porta_eoi.write(); mon.done (); - // 9. Set the axicfgen bit of the ctrl register to 1 to enable + // 9. Set the axicfgen bit of the ctrl register to 1 to enable // sending configuration data to the FPGA. mon.init ("Enable configuration on AXI"); ctrl.axicfgen = 1; diff --git a/vta/tests/python/de10nano/test_program_rpc.py b/vta/tests/python/de10nano/test_program_rpc.py new file mode 100644 index 000000000000..ce287bc39ac4 --- /dev/null +++ b/vta/tests/python/de10nano/test_program_rpc.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import sys, os +import tvm +from tvm import rpc +from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime + +host = os.environ.get("VTA_PYNQ_RPC_HOST", "de10nano") +port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) + +def program_rpc_bitstream(path=None): + """Program the FPGA on the RPC server + + Parameters + ---------- + path : path to bitstream (optional) + """ + assert tvm.runtime.enabled("rpc") + remote = rpc.connect(host, port) + program_fpga(remote, path) + +def reconfig_rpc_runtime(): + """Reconfig the RPC server runtime + """ + assert tvm.runtime.enabled("rpc") + remote = rpc.connect(host, port) + reconfig_runtime(remote) + +bitstream = sys.argv[1] if len(sys.argv) == 2 else None +program_rpc_bitstream(bitstream) +reconfig_rpc_runtime() diff --git a/vta/tutorials/matrix_multiply.py b/vta/tutorials/matrix_multiply.py index 444762684bb9..1b5b56a3c469 100644 --- a/vta/tutorials/matrix_multiply.py +++ b/vta/tutorials/matrix_multiply.py @@ -52,7 +52,7 @@ # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. -if env.TARGET == "pynq": +if env.TARGET == "pynq" or env.TARGET == "de10nano": # Make sure that TVM was compiled with RPC=1 assert tvm.runtime.enabled("rpc") diff --git a/vta/tutorials/vta_get_started.py b/vta/tutorials/vta_get_started.py index 3dd1f8c8753a..bec442a157b7 100644 --- a/vta/tutorials/vta_get_started.py +++ b/vta/tutorials/vta_get_started.py @@ -76,7 +76,7 @@ # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. -if env.TARGET == "pynq": +if env.TARGET == "pynq" or env.TARGET == "de10nano": # Make sure that TVM was compiled with RPC=1 assert tvm.runtime.enabled("rpc") From c4ad202f56822aaad54f2b55c1cc3d3dcb3113cf Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Mon, 2 Mar 2020 16:22:24 -0800 Subject: [PATCH 05/19] [VTA][Chisel] LoadUop FSM bug fix. Issue: The LoadUop FSM incorrectly advances the address of the next uop to read from DRAM when the DRAM data valid bit is deasserted and asserted at the end of a read. This is caused by a mismatch in the logic of the state and output portions of the FSM. This is one of two issues that was gating the correct operation of VTA on the DE10-Nano target. Solution: Modify the logic of the output section of the FSM to include a check on the DRAM read valid bit or fold the output assignemnt into the state section. * Folded the assignemnt of the next uop address in the state section of the FSM. --- .../chisel/src/main/scala/core/LoadUop.scala | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/vta/hardware/chisel/src/main/scala/core/LoadUop.scala b/vta/hardware/chisel/src/main/scala/core/LoadUop.scala index 274581f475b3..87bd50858f2e 100644 --- a/vta/hardware/chisel/src/main/scala/core/LoadUop.scala +++ b/vta/hardware/chisel/src/main/scala/core/LoadUop.scala @@ -112,14 +112,18 @@ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module { when(xcnt === xlen) { when(xrem === 0.U) { state := sIdle - }.elsewhen(xrem < xmax) { - state := sReadCmd - xlen := xrem - xrem := 0.U }.otherwise { - state := sReadCmd - xlen := xmax - 1.U - xrem := xrem - xmax + raddr := raddr + xmax_bytes + when(xrem < xmax) { + state := sReadCmd + xlen := xrem + xrem := 0.U + } + .otherwise { + state := sReadCmd + xlen := xmax - 1.U + xrem := xrem - xmax + } } } } @@ -134,8 +138,6 @@ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module { }.otherwise { raddr := (io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(uopBytes)))) - uopBytes.U } - }.elsewhen(state === sReadData && xcnt === xlen && xrem =/= 0.U) { - raddr := raddr + xmax_bytes } io.vme_rd.cmd.valid := state === sReadCmd From bcd3d97671545b80746cccf8f51715033d359e95 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Mon, 2 Mar 2020 16:27:52 -0800 Subject: [PATCH 06/19] [VTA][Chisel] Dynamically adjust DMA tranfer size. Issue: In the DE10-Nano target and possibly in others, DMA transfers that cross the boundaries of memory pages result in incorrect reads and writes from and to DRAM. When this happens depending on different input values, VTA loads and stores exhibit incorrect results for DMA pulses at the end of a transfer. This is one of two issues that were gating the DE10-Nano target from functioning correctly, but may affect other Chisel based targets. Solution: Add support for dynamically adjustble DMA transfer sizes in load and store operations. For a more elegant and modular implementation the feature can be enabled at compile time with a static constant that can be passed as a configuration option. * Modified the load and store finite state machines to dynamically adjust the size of initial and stride DMA transfers. The feature is enabled by default by virtue of the static constant ADAPTIVE_DMA_XFER_ENABLE. --- .../src/main/scala/core/TensorAlu.scala | 1 - .../src/main/scala/core/TensorLoad.scala | 21 +++--- .../src/main/scala/core/TensorStore.scala | 66 +++++++++++------ .../src/main/scala/core/TensorUtil.scala | 72 +++++++++++++------ 4 files changed, 101 insertions(+), 59 deletions(-) diff --git a/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala b/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala index a6feffaa18dc..6af3c834e451 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala @@ -72,7 +72,6 @@ class AluReg(implicit p: Parameters) extends Module { /** Vector of pipeline ALUs */ class AluVector(implicit p: Parameters) extends Module { - val aluBits = p(CoreKey).accBits val io = IO(new Bundle { val opcode = Input(UInt(C_ALU_OP_BITS.W)) val acc_a = new TensorMasterData(tensorType = "acc") diff --git a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala index f5cc849547a6..f10594329cd7 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala @@ -103,8 +103,7 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( state := sXPad1 }.elsewhen(dec.ypad_1 =/= 0.U) { state := sYPad1 - } - .otherwise { + }.otherwise { state := sIdle } }.elsewhen(dataCtrl.io.stride) { @@ -198,11 +197,9 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( tag := tag + 1.U } - when( - state === sIdle || dataCtrlDone || (set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U)) { + when(state === sIdle || dataCtrlDone || (set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U)) { set := 0.U - }.elsewhen( - (io.vme_rd.data.fire() || isZeroPad) && tag === (tp.numMemBlock - 1).U) { + }.elsewhen((io.vme_rd.data.fire() || isZeroPad) && tag === (tp.numMemBlock - 1).U) { set := set + 1.U } @@ -211,10 +208,9 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( when(state === sIdle) { waddr_cur := dec.sram_offset waddr_nxt := dec.sram_offset - }.elsewhen((io.vme_rd.data - .fire() || isZeroPad) && set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U) { + }.elsewhen((io.vme_rd.data.fire() || isZeroPad) && set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U) { waddr_cur := waddr_cur + 1.U - }.elsewhen(dataCtrl.io.stride) { + }.elsewhen(dataCtrl.io.stride && io.vme_rd.data.fire()) { waddr_cur := waddr_nxt + dec.xsize waddr_nxt := waddr_nxt + dec.xsize } @@ -261,9 +257,10 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( } // done - val done_no_pad = io.vme_rd.data - .fire() & dataCtrl.io.done & dec.xpad_1 === 0.U & dec.ypad_1 === 0.U - val done_x_pad = state === sXPad1 & xPadCtrl1.io.done & dataCtrlDone & dec.ypad_1 === 0.U + val done_no_pad = io.vme_rd.data.fire() & dataCtrl.io.done & + dec.xpad_1 === 0.U & dec.ypad_1 === 0.U + val done_x_pad = state === sXPad1 & xPadCtrl1.io.done & + dataCtrlDone & dec.ypad_1 === 0.U val done_y_pad = state === sYPad1 & dataCtrlDone & yPadCtrl1.io.done io.done := done_no_pad | done_x_pad | done_y_pad diff --git a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala index 439023be0934..030f91172cda 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala @@ -62,20 +62,40 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( val tag = Reg(UInt(8.W)) val set = Reg(UInt(8.W)) + // Dynamically adjust the size of DMA transfers to avoid crossing page boundaries. + final val ADAPTIVE_DMA_XFER_ENABLE = true + val xfer_bytes = Reg(chiselTypeOf(io.vme_wr.cmd.bits.addr)) + val xstride_bytes = dec.xstride << log2Ceil(tensorLength * tensorWidth) + val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt + val elemBytes = (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).outBits) / 8 + val pulse_bytes_bits = log2Ceil(mp.dataBits >> 3) + + val xfer_init_addr = io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) + val xfer_split_addr = waddr_cur + xfer_bytes + val xfer_stride_addr = waddr_nxt + xstride_bytes + + val xfer_init_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_init_addr % xmax_bytes else xmax_bytes + val xfer_init_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_init_bytes >> pulse_bytes_bits else xmax + val xfer_split_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_split_addr % xmax_bytes else xmax_bytes + val xfer_split_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_split_bytes >> pulse_bytes_bits else xmax + val xfer_stride_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_stride_addr % xmax_bytes else xmax_bytes + val xfer_stride_pulses= if (ADAPTIVE_DMA_XFER_ENABLE) xfer_stride_bytes >> pulse_bytes_bits else xmax + val sIdle :: sWriteCmd :: sWriteData :: sReadMem :: sWriteAck :: Nil = Enum(5) val state = RegInit(sIdle) // control switch(state) { is(sIdle) { - when(io.start) { + xfer_bytes := xfer_init_bytes + when (io.start) { state := sWriteCmd - when(xsize < xmax) { + when (xsize < xfer_init_pulses) { xlen := xsize xrem := 0.U }.otherwise { - xlen := xmax - 1.U - xrem := xsize - xmax + xlen := xfer_init_pulses - 1.U + xrem := xsize - xfer_init_pulses } } } @@ -101,24 +121,29 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( when(xrem === 0.U) { when(ycnt === ysize - 1.U) { state := sIdle - }.otherwise { + }.otherwise { // stride state := sWriteCmd - when(xsize < xmax) { + xfer_bytes := xfer_stride_bytes + when(xsize < xfer_stride_pulses) { xlen := xsize xrem := 0.U }.otherwise { - xlen := xmax - 1.U - xrem := xsize - xmax + xlen := xfer_stride_pulses - 1.U + xrem := xsize - xfer_stride_pulses } } - }.elsewhen(xrem < xmax) { + } // split + .elsewhen(xrem < xfer_split_pulses) { state := sWriteCmd + xfer_bytes := xfer_split_bytes xlen := xrem xrem := 0.U - }.otherwise { + } + .otherwise { state := sWriteCmd - xlen := xmax - 1.U - xrem := xrem - xmax + xfer_bytes := xfer_split_bytes + xlen := xfer_split_pulses - 1.U + xrem := xrem - xfer_split_pulses } } } @@ -174,8 +199,7 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( when(state === sIdle) { raddr_cur := dec.sram_offset raddr_nxt := dec.sram_offset - }.elsewhen(io.vme_wr.data - .fire() && set === (tensorLength - 1).U && tag === (numMemBlock - 1).U) { + }.elsewhen(io.vme_wr.data.fire() && set === (tensorLength - 1).U && tag === (numMemBlock - 1).U) { raddr_cur := raddr_cur + 1.U }.elsewhen(stride) { raddr_cur := raddr_nxt + dec.xsize @@ -189,18 +213,14 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( val mdata = MuxLookup(set, 0.U.asTypeOf(chiselTypeOf(wdata_t)), tread) // write-to-dram - val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt - val elemBytes = (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).outBits) / 8 when(state === sIdle) { - waddr_cur := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil( - elemBytes))) - waddr_nxt := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil( - elemBytes))) + waddr_cur := xfer_init_addr + waddr_nxt := xfer_init_addr }.elsewhen(state === sWriteAck && io.vme_wr.ack && xrem =/= 0.U) { - waddr_cur := waddr_cur + xmax_bytes + waddr_cur := xfer_split_addr }.elsewhen(stride) { - waddr_cur := waddr_nxt + (dec.xstride << log2Ceil(tensorLength * tensorWidth)) - waddr_nxt := waddr_nxt + (dec.xstride << log2Ceil(tensorLength * tensorWidth)) + waddr_cur := xfer_stride_addr + waddr_nxt := xfer_stride_addr } io.vme_wr.cmd.valid := state === sWriteCmd diff --git a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala index 6e6f7e776c0e..e7e738faee54 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala @@ -252,8 +252,16 @@ class TensorDataCtrl(tensorType: String = "none", val caddr = Reg(UInt(mp.addrBits.W)) val baddr = Reg(UInt(mp.addrBits.W)) - val len = Reg(UInt(mp.lenBits.W)) + val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt + val elemBytes = + if (tensorType == "inp") { + (p(CoreKey).batch * p(CoreKey).blockIn * p(CoreKey).inpBits) / 8 + } else if (tensorType == "wgt") { + (p(CoreKey).blockOut * p(CoreKey).blockIn * p(CoreKey).wgtBits) / 8 + } else { + (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).accBits) / 8 + } val xmax_bytes = ((1 << mp.lenBits) * mp.dataBits / 8).U val xcnt = Reg(UInt(mp.lenBits.W)) @@ -262,27 +270,55 @@ class TensorDataCtrl(tensorType: String = "none", val xmax = (1 << mp.lenBits).U val ycnt = Reg(chiselTypeOf(dec.ysize)) + // Dynamically adjust the size of DMA transfers to avoid crossing page boundaries. + final val ADAPTIVE_DMA_XFER_ENABLE = true + val xfer_bytes = Reg(UInt(mp.addrBits.W)) + val pulse_bytes_bits = log2Ceil(mp.dataBits >> 3) + val xstride_bytes = dec.xstride << log2Ceil(elemBytes) + + val xfer_init_addr = io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) + val xfer_split_addr = caddr + xfer_bytes + val xfer_stride_addr = baddr + xstride_bytes + + val xfer_init_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_init_addr % xmax_bytes else xmax_bytes + val xfer_init_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_init_bytes >> pulse_bytes_bits else xmax + val xfer_split_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_split_addr % xmax_bytes else xmax_bytes + val xfer_split_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_split_bytes >> pulse_bytes_bits else xmax + val xfer_stride_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_stride_addr % xmax_bytes else xmax_bytes + val xfer_stride_pulses= if (ADAPTIVE_DMA_XFER_ENABLE) xfer_stride_bytes >> pulse_bytes_bits else xmax + val stride = xcnt === len & xrem === 0.U & ycnt =/= dec.ysize - 1.U val split = xcnt === len & xrem =/= 0.U - when(io.start || (io.xupdate && stride)) { - when(xsize < xmax) { + when(io.start) { + xfer_bytes := xfer_init_bytes + when(xsize < xfer_init_pulses) { len := xsize xrem := 0.U }.otherwise { - len := xmax - 1.U - xrem := xsize - xmax + len := xfer_init_pulses - 1.U + xrem := xsize - xfer_init_pulses + } + }.elsewhen(io.xupdate && stride) { + xfer_bytes := xfer_stride_bytes + when(xsize < xfer_stride_pulses) { + len := xsize + xrem := 0.U + }.otherwise { + len := xfer_stride_pulses - 1.U + xrem := xsize - xfer_stride_pulses } }.elsewhen(io.xupdate && split) { - when(xrem < xmax) { + xfer_bytes := xfer_split_bytes + when(xrem < xfer_split_pulses) { len := xrem xrem := 0.U }.otherwise { - len := xmax - 1.U - xrem := xrem - xmax + len := xfer_split_pulses - 1.U + xrem := xrem - xfer_split_pulses } } @@ -298,25 +334,15 @@ class TensorDataCtrl(tensorType: String = "none", ycnt := ycnt + 1.U } - val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt - val elemBytes = - if (tensorType == "inp") { - (p(CoreKey).batch * p(CoreKey).blockIn * p(CoreKey).inpBits) / 8 - } else if (tensorType == "wgt") { - (p(CoreKey).blockOut * p(CoreKey).blockIn * p(CoreKey).wgtBits) / 8 - } else { - (p(CoreKey).batch * p(CoreKey).blockOut * p(CoreKey).accBits) / 8 - } - when(io.start) { - caddr := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) - baddr := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil(elemBytes))) + caddr := xfer_init_addr + baddr := xfer_init_addr }.elsewhen(io.yupdate) { when(split) { - caddr := caddr + xmax_bytes + caddr := xfer_split_addr }.elsewhen(stride) { - caddr := baddr + (dec.xstride << log2Ceil(elemBytes)) - baddr := baddr + (dec.xstride << log2Ceil(elemBytes)) + caddr := xfer_stride_addr + baddr := xfer_stride_addr } } From a184053b063bef4e877fad9826420e39968d9149 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Mon, 2 Mar 2020 16:50:51 -0800 Subject: [PATCH 07/19] [VTA][Chisel] Improve FSIM/TSIM/FPGA xref debug. Issue: Cross reference between FSIM, TSIM, and Chisel based FPGA traces is an invaluable instrument that enables fast analysis on FSIM, and analysis/debug on TSIM and FPGA, especially for complex flows like conv2d or full inferences. Currently this cannot be done easily since a suitable reference is missing. The clock cycle event counter cannot be used since it is undefined in FSIM and not reliable between TSIM and FPGA because of different latencies. Solution: Introduce a new event counter that preserves a program order across FSIM, TSIM, FPGA. We propose adding the accumulator write event counter in the Chisel EventCounter class and a simple instrumentation in the FSIM runtime code. Note that this technique enabled finding the Chisel issues reportes in the PR, which would have been otherwise far more difficult. * Added the acc_wr_count event counter and changed interfaces accordingly. --- .../chisel/src/main/scala/core/Compute.scala | 2 ++ vta/hardware/chisel/src/main/scala/core/Core.scala | 2 ++ .../chisel/src/main/scala/core/EventCounters.scala | 11 +++++++++++ vta/hardware/chisel/src/main/scala/shell/VCR.scala | 14 +++++++++++++- 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/vta/hardware/chisel/src/main/scala/core/Compute.scala b/vta/hardware/chisel/src/main/scala/core/Compute.scala index c605a1a1a824..a1e7fadd96cf 100644 --- a/vta/hardware/chisel/src/main/scala/core/Compute.scala +++ b/vta/hardware/chisel/src/main/scala/core/Compute.scala @@ -45,6 +45,7 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { val wgt = new TensorMaster(tensorType = "wgt") val out = new TensorMaster(tensorType = "out") val finish = Output(Bool()) + val acc_wr_event = Output(Bool()) }) val sIdle :: sSync :: sExe :: Nil = Enum(3) val state = RegInit(sIdle) @@ -125,6 +126,7 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { tensorAcc.io.tensor.rd.idx <> Mux(dec.io.isGemm, tensorGemm.io.acc.rd.idx, tensorAlu.io.acc.rd.idx) tensorAcc.io.tensor.wr <> Mux(dec.io.isGemm, tensorGemm.io.acc.wr, tensorAlu.io.acc.wr) io.vme_rd(1) <> tensorAcc.io.vme_rd + io.acc_wr_event := tensorAcc.io.tensor.wr.valid // gemm tensorGemm.io.start := state === sIdle & start & dec.io.isGemm diff --git a/vta/hardware/chisel/src/main/scala/core/Core.scala b/vta/hardware/chisel/src/main/scala/core/Core.scala index 6bfffdc212b7..e2ac51a55d48 100644 --- a/vta/hardware/chisel/src/main/scala/core/Core.scala +++ b/vta/hardware/chisel/src/main/scala/core/Core.scala @@ -111,6 +111,8 @@ class Core(implicit p: Parameters) extends Module { ecounters.io.launch := io.vcr.launch ecounters.io.finish := compute.io.finish io.vcr.ecnt <> ecounters.io.ecnt + io.vcr.ucnt <> ecounters.io.ucnt + ecounters.io.acc_wr_event := compute.io.acc_wr_event // Finish instruction is executed and asserts the VCR finish flag val finish = RegNext(compute.io.finish) diff --git a/vta/hardware/chisel/src/main/scala/core/EventCounters.scala b/vta/hardware/chisel/src/main/scala/core/EventCounters.scala index f9fd7f0be105..5ef358627fec 100644 --- a/vta/hardware/chisel/src/main/scala/core/EventCounters.scala +++ b/vta/hardware/chisel/src/main/scala/core/EventCounters.scala @@ -44,6 +44,8 @@ class EventCounters(debug: Boolean = false)(implicit p: Parameters) extends Modu val launch = Input(Bool()) val finish = Input(Bool()) val ecnt = Vec(vp.nECnt, ValidIO(UInt(vp.regBits.W))) + val ucnt = Vec(vp.nUCnt, ValidIO(UInt(vp.regBits.W))) + val acc_wr_event = Input(Bool()) }) val cycle_cnt = RegInit(0.U(vp.regBits.W)) when(io.launch && !io.finish) { @@ -53,4 +55,13 @@ class EventCounters(debug: Boolean = false)(implicit p: Parameters) extends Modu } io.ecnt(0).valid := io.finish io.ecnt(0).bits := cycle_cnt + + val acc_wr_count = Reg(UInt(vp.regBits.W)) + when (!io.launch || io.finish) { + acc_wr_count := 0.U + }.elsewhen (io.acc_wr_event) { + acc_wr_count := acc_wr_count + 1.U + } + io.ucnt(0).valid := io.finish + io.ucnt(0).bits := acc_wr_count } diff --git a/vta/hardware/chisel/src/main/scala/shell/VCR.scala b/vta/hardware/chisel/src/main/scala/shell/VCR.scala index 3e74a256d537..9a80cd7799a3 100644 --- a/vta/hardware/chisel/src/main/scala/shell/VCR.scala +++ b/vta/hardware/chisel/src/main/scala/shell/VCR.scala @@ -34,6 +34,7 @@ case class VCRParams() { val nECnt = 1 val nVals = 1 val nPtrs = 6 + val nUCnt = 1 val regBits = 32 } @@ -53,6 +54,7 @@ class VCRMaster(implicit p: Parameters) extends VCRBase { val ecnt = Vec(vp.nECnt, Flipped(ValidIO(UInt(vp.regBits.W)))) val vals = Output(Vec(vp.nVals, UInt(vp.regBits.W))) val ptrs = Output(Vec(vp.nPtrs, UInt(mp.addrBits.W))) + val ucnt = Vec(vp.nUCnt, Flipped(ValidIO(UInt(vp.regBits.W)))) } /** VCRClient. @@ -68,6 +70,7 @@ class VCRClient(implicit p: Parameters) extends VCRBase { val ecnt = Vec(vp.nECnt, ValidIO(UInt(vp.regBits.W))) val vals = Input(Vec(vp.nVals, UInt(vp.regBits.W))) val ptrs = Input(Vec(vp.nPtrs, UInt(mp.addrBits.W))) + val ucnt = Vec(vp.nUCnt, ValidIO(UInt(vp.regBits.W))) } /** VTA Control Registers (VCR). @@ -100,7 +103,7 @@ class VCR(implicit p: Parameters) extends Module { // registers val nPtrs = if (mp.addrBits == 32) vp.nPtrs else 2 * vp.nPtrs - val nTotal = vp.nCtrl + vp.nECnt + vp.nVals + nPtrs + val nTotal = vp.nCtrl + vp.nECnt + vp.nVals + nPtrs + vp.nUCnt val reg = Seq.fill(nTotal)(RegInit(0.U(vp.regBits.W))) val addr = Seq.tabulate(nTotal)(_ * 4) @@ -108,6 +111,7 @@ class VCR(implicit p: Parameters) extends Module { val eo = vp.nCtrl val vo = eo + vp.nECnt val po = vo + vp.nVals + val uo = po + nPtrs switch(wstate) { is(sWriteAddress) { @@ -191,4 +195,12 @@ class VCR(implicit p: Parameters) extends Module { io.vcr.ptrs(i) := Cat(reg(po + 2 * i + 1), reg(po + 2 * i)) } } + + for (i <- 0 until vp.nUCnt) { + when(io.vcr.ucnt(i).valid) { + reg(uo + i) := io.vcr.ucnt(i).bits + }.elsewhen(io.host.w.fire() && addr(uo + i).U === waddr) { + reg(uo + i) := wdata + } + } } From 7197cb244097230a73ab00c0640a6f003e08e6cd Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Wed, 4 Mar 2020 15:10:58 -0800 Subject: [PATCH 08/19] [VTA][de10nano] Comply with linting rules. --- vta/src/de10nano/de10nano_driver.cc | 8 +- vta/src/de10nano/de10nano_mgr.h | 493 +++++++++++++--------------- 2 files changed, 225 insertions(+), 276 deletions(-) diff --git a/vta/src/de10nano/de10nano_driver.cc b/vta/src/de10nano/de10nano_driver.cc index b8dba2744595..33c278e2c5dd 100644 --- a/vta/src/de10nano/de10nano_driver.cc +++ b/vta/src/de10nano/de10nano_driver.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include "cma_api.h" void* VTAMemAlloc(size_t size, int cached) { @@ -82,7 +83,7 @@ void *VTAMapRegister(uint32_t addr) { MAP_SHARED, mmap_file, virt_base); - close (mmap_file); + close(mmap_file); return vmem; } @@ -155,11 +156,10 @@ int VTADeviceRun(VTADeviceHandle handle, insn_phy_addr, insn_count, wait_cycles); } -void VTAProgram (const char *rbf) -{ +void VTAProgram(const char *rbf) { de10nano_mgr mgr; CHECK(mgr.mapped()) << "de10nano: mapping of /dev/mem failed"; - CHECK(mgr.program_rbf (rbf)) << "Programming of the de10nano failed.\n" + CHECK(mgr.program_rbf(rbf)) << "Programming of the de10nano failed.\n" "This is usually due to the use of an RBF file that is incompatible " "with the MSEL switches on the DE10-Nano board. The recommended RBF " "format is FastPassiveParallel32 with compression enabled, " diff --git a/vta/src/de10nano/de10nano_mgr.h b/vta/src/de10nano/de10nano_mgr.h index a87e400e727a..8cdfd74846ab 100644 --- a/vta/src/de10nano/de10nano_mgr.h +++ b/vta/src/de10nano/de10nano_mgr.h @@ -20,18 +20,16 @@ * \brief DE10-Nano fpga manager. */ -#ifndef DE10NANO_FPGA_MGR_H -#define DE10NANO_FPGA_MGR_H +#ifndef VTA_DE10NANO_DE10NANO_MGR_H_ +#define VTA_DE10NANO_DE10NANO_MGR_H_ -extern "C" -{ +extern "C" { #include #include #include #include #include #include - #include #include #include #include @@ -40,61 +38,53 @@ extern "C" // Register definition and address map taken from cv_5v4.pdf, // Cyclone V Hard Processor System Technical Reference Manual, // chapter 5: FPGA Manager. -struct de10nano_mgr -{ +struct de10nano_mgr { // reg32 is a static base class interface and implementation // of a generic 32 bit register that avoids the use of a virtual // class and ugly bit shift manipulations. - struct reg32 - { - reg32 (uint32_t offset, uint32_t reset = 0) : - m_offset (offset), - m_reset (reset) + struct reg32 { + explicit reg32(uint32_t offset, uint32_t reset = 0) : + m_offset(offset), + m_reset(reset) {} - void map (uint8_t *base) - { + void map(uint8_t *base) { m_addr = reinterpret_cast(base + m_offset); - m_reg = reinterpret_cast((uint8_t*)this+sizeof(reg32)); + m_reg = reinterpret_cast(reinterpret_cast(this)+sizeof(reg32)); } - uint32_t read () - { + uint32_t read() { *m_reg = *m_addr; return *m_reg; } - void write () { *m_addr = *m_reg; } - void write (uint32_t value) { *m_addr = *m_reg = value; } - void clear () { *m_reg = 0; } - void reset () { *m_reg = m_reset; } - void print (const char *name, bool addr = false) - { + void write() { *m_addr = *m_reg; } + void write(uint32_t value) { *m_addr = *m_reg = value; } + void clear() { *m_reg = 0; } + void reset() { *m_reg = m_reset; } + void print(const char *name, bool addr = false) { if (addr) - printf ("DE10-Nano-Mgr: %16s: 0x%08x addr: %p\n", name, read(), m_addr); + printf("DE10-Nano-Mgr: %16s: 0x%08x addr: %p\n", name, read(), m_addr); else - printf ("DE10-Nano-Mgr: %16s: 0x%08x\n", name, read()); + printf("DE10-Nano-Mgr: %16s: 0x%08x\n", name, read()); } uint32_t m_offset, m_reset, *m_reg; volatile uint32_t *m_addr; - private: // Do not use this class on its own. - reg32 (const reg32 &rhs); + private: // Do not use this class on its own. + reg32(const reg32 &rhs); }; // Register definitions. All registers are of 32 bit size. // Add one structure for each register, making sure that all // bit fields come first and pack exactly into 32 bits. - struct data : public reg32 - { - data () : reg32 (0x0, 0x0) {} + struct data : public reg32 { + data() : reg32(0x0, 0x0) {} uint32_t value; } data; - struct stat : public reg32 - { - stat () : reg32 (0x0, 0x45) {} - enum mode_values - { + struct stat : public reg32 { + stat() : reg32(0x0, 0x45) {} + enum mode_values { FPGA_POWER_OFF = 0x0, FPGA_RESET_PHASE = 0x1, FPGA_CONFIG_PHASE = 0x2, @@ -103,17 +93,14 @@ struct de10nano_mgr FPGA_ZOMBIE_MODE = 0x5 }; - enum msel_values - { + enum msel_values { FPP16_AESN_ZIPN = 0x0, FPP32_AESO_ZIPY = 0xA }; - const char * mode_str () - { + const char * mode_str() { const char *str = "UNKNOWN"; - switch (mode) - { + switch (mode) { case FPGA_POWER_OFF : str = "POWER_OFF" ; break; case FPGA_RESET_PHASE : str = "RESET_PHASE" ; break; case FPGA_CONFIG_PHASE : str = "CONFIG_PHASE" ; break; @@ -124,231 +111,207 @@ struct de10nano_mgr return str; } - bool msel_is_invalid () - { + bool msel_is_invalid() { return msel & 0x10 || (msel & 0x3) == 0x3; } - void print (bool addr = false, bool fields = true) - { - reg32::print ("stat", addr); - if (fields) - { - printf ("DE10-Nano-Mgr: %16s: %x\n", "msel", msel); - printf ("DE10-Nano-Mgr: %16s: %s\n", "mode", mode_str()); + void print(bool addr = false, bool fields = true) { + reg32::print("stat", addr); + if (fields) { + printf("DE10-Nano-Mgr: %16s: %x\n", "msel", msel); + printf("DE10-Nano-Mgr: %16s: %s\n", "mode", mode_str()); } } - uint32_t mode : 3; // 2:0 RW - uint32_t msel : 5; // 7:3 RO - uint32_t rsvd : 24; // 31:8 + uint32_t mode : 3; // 2:0 RW + uint32_t msel : 5; // 7:3 RO + uint32_t rsvd : 24; // 31:8 } stat; - struct ctrl : public reg32 - { - ctrl () : reg32 (0x4, 0x200) {} - - uint32_t en : 1; // 0 RW - uint32_t nce : 1; // 1 RW - uint32_t nconfigpull : 1; // 2 RW - uint32_t nstatuspull : 1; // 3 RW - uint32_t confdonepull : 1; // 4 RW - uint32_t prreq : 1; // 5 RW - uint32_t cdratio : 2; // 7:6 RW - uint32_t axicfgen : 1; // 8 RW - uint32_t cfgwdth : 1; // 9 RW - uint32_t rsvd : 22; // 31:10 - - void print (bool addr = false, bool fields = true) - { - reg32::print ("ctrl", addr); - if (fields) - { - printf ("DE10-Nano-Mgr: %16s: %x\n", "en" , en ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "nce" , nce ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "nconfigpull" , nconfigpull ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "nstatuspull" , nstatuspull ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "confdonepull", confdonepull ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "prreq" , prreq ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "cdratio" , cdratio ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "axicfgen" , axicfgen ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "cfgwdth" , cfgwdth ); + struct ctrl : public reg32 { + ctrl() : reg32(0x4, 0x200) {} + + uint32_t en : 1; // 0 RW + uint32_t nce : 1; // 1 RW + uint32_t nconfigpull : 1; // 2 RW + uint32_t nstatuspull : 1; // 3 RW + uint32_t confdonepull : 1; // 4 RW + uint32_t prreq : 1; // 5 RW + uint32_t cdratio : 2; // 7:6 RW + uint32_t axicfgen : 1; // 8 RW + uint32_t cfgwdth : 1; // 9 RW + uint32_t rsvd : 22; // 31:10 + + void print(bool addr = false, bool fields = true) { + reg32::print("ctrl", addr); + if (fields) { + printf("DE10-Nano-Mgr: %16s: %x\n", "en" , en); + printf("DE10-Nano-Mgr: %16s: %x\n", "nce" , nce); + printf("DE10-Nano-Mgr: %16s: %x\n", "nconfigpull" , nconfigpull); + printf("DE10-Nano-Mgr: %16s: %x\n", "nstatuspull" , nstatuspull); + printf("DE10-Nano-Mgr: %16s: %x\n", "confdonepull", confdonepull); + printf("DE10-Nano-Mgr: %16s: %x\n", "prreq" , prreq); + printf("DE10-Nano-Mgr: %16s: %x\n", "cdratio" , cdratio); + printf("DE10-Nano-Mgr: %16s: %x\n", "axicfgen" , axicfgen); + printf("DE10-Nano-Mgr: %16s: %x\n", "cfgwdth" , cfgwdth); } } } ctrl; - struct dclkcnt : public reg32 - { - dclkcnt () : reg32 (0x8, 0x0) {} - void print () { return reg32::print ("dclkcnt"); } + struct dclkcnt : public reg32 { + dclkcnt() : reg32(0x8, 0x0) {} + void print() { return reg32::print("dclkcnt"); } - uint32_t cnt; // RW + uint32_t cnt; // RW } dclkcnt; - struct dclkstat : public reg32 - { - dclkstat () : reg32 (0xC, 0x0) {} - void print () { return reg32::print ("dclkstat"); } + struct dclkstat : public reg32 { + dclkstat() : reg32(0xC, 0x0) {} + void print() { return reg32::print("dclkstat"); } - uint32_t dcntdone : 1; // RW + uint32_t dcntdone : 1; // RW uint32_t rsvd : 31; } dclkstat; - struct gpio_inten : public reg32 - { - gpio_inten () : reg32 (0x830, 0x0) {} - void print () { return reg32::print ("gpio_inten"); } + struct gpio_inten : public reg32 { + gpio_inten() : reg32(0x830, 0x0) {} + void print() { return reg32::print("gpio_inten"); } - uint32_t value : 32; // RW + uint32_t value : 32; // RW } gpio_inten; - struct gpio_porta_eoi : public reg32 - { - gpio_porta_eoi () : reg32 (0x84C, 0x0) {} - void print () { return reg32::print ("gpio_porta_eoi"); } - - uint32_t ns : 1; // 0 WO - uint32_t cd : 1; // 1 WO - uint32_t id : 1; // 2 WO - uint32_t crc : 1; // 3 WO - uint32_t ccd : 1; // 4 WO - uint32_t prr : 1; // 5 WO - uint32_t pre : 1; // 6 WO - uint32_t prd : 1; // 7 WO - uint32_t ncp : 1; // 8 WO - uint32_t nsp : 1; // 9 WO - uint32_t cdp : 1; // 10 WO - uint32_t fpo : 1; // 11 WO - uint32_t rsvd : 20; // 31:12 + struct gpio_porta_eoi : public reg32 { + gpio_porta_eoi() : reg32(0x84C, 0x0) {} + void print() { return reg32::print("gpio_porta_eoi"); } + + uint32_t ns : 1; // 0 WO + uint32_t cd : 1; // 1 WO + uint32_t id : 1; // 2 WO + uint32_t crc : 1; // 3 WO + uint32_t ccd : 1; // 4 WO + uint32_t prr : 1; // 5 WO + uint32_t pre : 1; // 6 WO + uint32_t prd : 1; // 7 WO + uint32_t ncp : 1; // 8 WO + uint32_t nsp : 1; // 9 WO + uint32_t cdp : 1; // 10 WO + uint32_t fpo : 1; // 11 WO + uint32_t rsvd : 20; // 31:12 } gpio_porta_eoi; - struct gpio_ext_porta : public reg32 - { - gpio_ext_porta () : reg32 (0x850, 0x0) {} - void print (bool addr = false, bool fields = true) - { - reg32::print ("gpio_ext_porta", addr); - if (fields) - { - printf ("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS" , ns ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE" , cd ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "INIT_DONE" , id ); - printf ("DE10-Nano-Mgr: %16s: %x\n", "CRC_ERROR" , crc); - printf ("DE10-Nano-Mgr: %16s: %x\n", "CVP_CONF_DONE" , ccd); - printf ("DE10-Nano-Mgr: %16s: %x\n", "PR_READY" , prr); - printf ("DE10-Nano-Mgr: %16s: %x\n", "PR_ERROR" , pre); - printf ("DE10-Nano-Mgr: %16s: %x\n", "PR_DONE" , prd); - printf ("DE10-Nano-Mgr: %16s: %x\n", "nCONFIG_PIN" , ncp); - printf ("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS_PIN" , nsp); - printf ("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE_PIN" , cdp); - printf ("DE10-Nano-Mgr: %16s: %x\n", "FPGA_POWER_ON" , fpo); + struct gpio_ext_porta : public reg32 { + gpio_ext_porta() : reg32(0x850, 0x0) {} + void print(bool addr = false, bool fields = true) { + reg32::print("gpio_ext_porta", addr); + if (fields) { + printf("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS" , ns); + printf("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE" , cd); + printf("DE10-Nano-Mgr: %16s: %x\n", "INIT_DONE" , id); + printf("DE10-Nano-Mgr: %16s: %x\n", "CRC_ERROR" , crc); + printf("DE10-Nano-Mgr: %16s: %x\n", "CVP_CONF_DONE" , ccd); + printf("DE10-Nano-Mgr: %16s: %x\n", "PR_READY" , prr); + printf("DE10-Nano-Mgr: %16s: %x\n", "PR_ERROR" , pre); + printf("DE10-Nano-Mgr: %16s: %x\n", "PR_DONE" , prd); + printf("DE10-Nano-Mgr: %16s: %x\n", "nCONFIG_PIN" , ncp); + printf("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS_PIN" , nsp); + printf("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE_PIN" , cdp); + printf("DE10-Nano-Mgr: %16s: %x\n", "FPGA_POWER_ON" , fpo); } } - uint32_t ns : 1; // 0 RO - uint32_t cd : 1; // 1 RO - uint32_t id : 1; // 2 RO - uint32_t crc : 1; // 3 RO - uint32_t ccd : 1; // 4 RO - uint32_t prr : 1; // 5 RO - uint32_t pre : 1; // 6 RO - uint32_t prd : 1; // 7 RO - uint32_t ncp : 1; // 8 RO - uint32_t nsp : 1; // 9 RO - uint32_t cdp : 1; // 10 RO - uint32_t fpo : 1; // 11 RO - uint32_t rsvd : 20; // 31:12 + uint32_t ns : 1; // 0 RO + uint32_t cd : 1; // 1 RO + uint32_t id : 1; // 2 RO + uint32_t crc : 1; // 3 RO + uint32_t ccd : 1; // 4 RO + uint32_t prr : 1; // 5 RO + uint32_t pre : 1; // 6 RO + uint32_t prd : 1; // 7 RO + uint32_t ncp : 1; // 8 RO + uint32_t nsp : 1; // 9 RO + uint32_t cdp : 1; // 10 RO + uint32_t fpo : 1; // 11 RO + uint32_t rsvd : 20; // 31:12 } gpio_ext_porta; - struct monitor - { + struct monitor { // This is used to both break a polling loop if the specified number // of milliseconds have passed and to relax the polling yielding the // cpu every millisecond. - monitor () : msg (""), m_status (true), m_ticks (0), m_counter (0) - { - m_epoc_us = time_stamp (); + monitor() : msg(""), m_status(true), m_ticks(0), m_counter(0) { + m_epoc_us = time_stamp(); } - void init (const char *message, uint32_t ticks_ms = 1000) - { + void init(const char *message, uint32_t ticks_ms = 1000) { msg = message; m_ticks = m_counter = ticks_ms; - m_init_us = time_stamp (); - printf ("DE10-Nano-Mgr: %-32s : ", msg); + m_init_us = time_stamp(); + printf("DE10-Nano-Mgr: %-32s : ", msg); } - bool status () { return m_status; } + bool status() { return m_status; } - void reset () { m_counter = m_ticks; } + void reset() { m_counter = m_ticks; } - void done (bool status = true) - { - uint32_t elapsed = time_stamp (m_init_us); + void done(bool status = true) { + uint32_t elapsed = time_stamp(m_init_us); const char *rs = "FAIL"; - if (!m_counter) - { + if (!m_counter) { status = false; rs = "TOUT"; - } - else if (status) + } else if (status) { rs = "PASS"; - printf ("\rDE10-Nano-Mgr: %-32s : %s in %u us\n", msg, rs, elapsed); - if (!status) - { + } + printf("\rDE10-Nano-Mgr: %-32s : %s in %u us\n", msg, rs, elapsed); + if (!status) { m_status = false; throw 1; } } - ~monitor () - { - uint32_t elapsed = time_stamp (m_epoc_us); + ~monitor() { + uint32_t elapsed = time_stamp(m_epoc_us); const char *rs = m_status ? "SUCCESS" : "FAILURE"; - printf ("DE10-Nano-Mgr: EXIT %s in %u us\n", rs, elapsed); + printf("DE10-Nano-Mgr: EXIT %s in %u us\n", rs, elapsed); } - uint64_t time_stamp (uint64_t base_us = 0) - { + uint64_t time_stamp(uint64_t base_us = 0) { struct timeval tv; - gettimeofday (&tv, NULL); + gettimeofday(&tv, NULL); return tv.tv_sec * 1000000L + tv.tv_usec - base_us; } - bool operator () (bool cond) - { - if (m_counter) - { + bool operator() (bool cond) { + if (m_counter) { if (!cond) return false; m_counter--; - usleep (1000); + usleep(1000); } return m_counter; } const char *msg; - private: + + private: bool m_status; uint32_t m_ticks, m_counter; uint64_t m_init_us, m_epoc_us; }; - enum BaseAddr - { + enum BaseAddr { REGS_BASE_ADDR = 0xFF706000U, DATA_BASE_ADDR = 0xFFB90000U }; - de10nano_mgr () - { - m_page_size = sysconf (_SC_PAGE_SIZE); + de10nano_mgr() { + m_page_size = sysconf(_SC_PAGE_SIZE); #ifdef MOCK_DEVMEM - m_regs_base = (uint8_t*) malloc (m_page_size); - m_data_base = (uint8_t*) malloc (m_page_size); + m_regs_base = reinterpret_cast(malloc(m_page_size)); + m_data_base = reinterpret_cast(malloc(m_page_size)); #else - m_regs_base = map_mem (REGS_BASE_ADDR); - m_data_base = map_mem (DATA_BASE_ADDR); - #endif // MOCK_DEVMEM + m_regs_base = map_mem(REGS_BASE_ADDR); + m_data_base = map_mem(DATA_BASE_ADDR); + #endif // MOCK_DEVMEM data.map(m_data_base); stat.map(m_regs_base); ctrl.map(m_regs_base); @@ -359,21 +322,19 @@ struct de10nano_mgr gpio_ext_porta.map(m_regs_base); } - ~de10nano_mgr () - { + ~de10nano_mgr() { #ifdef MOCK_DEVMEM - free (m_regs_base); - free (m_data_base); + free(m_regs_base); + free(m_data_base); #else - unmap_mem (m_regs_base); - unmap_mem (m_data_base); - #endif // MOCK_DEVMEM + unmap_mem(m_regs_base); + unmap_mem(m_data_base); + #endif // MOCK_DEVMEM } - bool mapped () const { return m_regs_base && m_data_base; } + bool mapped() const { return m_regs_base && m_data_base; } - void print (bool addr = false) - { + void print(bool addr = false) { stat.print(addr, false); ctrl.print(addr, false); gpio_inten.print(); @@ -381,24 +342,20 @@ struct de10nano_mgr gpio_ext_porta.print(addr, false); } - private: - - uint32_t msel_to_cfgwdth (uint32_t msel) - { - return (msel & 0b1000) >> 3; + private: + uint32_t msel_to_cfgwdth(uint32_t msel) { + return(msel & 0b1000) >> 3; } - uint32_t msel_to_cdratio (uint32_t msel) - { - uint32_t cfgwdth = msel_to_cfgwdth (msel); + uint32_t msel_to_cdratio(uint32_t msel) { + uint32_t cfgwdth = msel_to_cfgwdth(msel); uint32_t cdratio = msel & 0b11; if (cfgwdth && cdratio) cdratio++; return cdratio; } - uint8_t * map_mem (off_t addr, size_t pages = 1) - { + uint8_t * map_mem(off_t addr, size_t pages = 1) { if (m_page_size <= 0) { return NULL; } int mem_fd = open("/dev/mem", O_SYNC | O_RDWR); @@ -408,34 +365,30 @@ struct de10nano_mgr MAP_SHARED, mem_fd, addr & ~(pages*m_page_size-1)); if (vbase == MAP_FAILED) { return NULL; } - close (mem_fd); - return (uint8_t*) vbase; + close(mem_fd); + return reinterpret_cast(vbase); } - void unmap_mem (void *base, size_t pages = 1) - { + void unmap_mem(void *base, size_t pages = 1) { if (base) - munmap (base, pages * m_page_size); + munmap(base, pages * m_page_size); } uint8_t *m_regs_base, *m_data_base; size_t m_page_size; - public: - + public: // Configuration sequence documented at page A-34. - bool program_rbf (const char *rbf) - { + bool program_rbf(const char *rbf) { monitor mon; int rbf_fd; uint32_t count = 0; - printf ("DE10-Nano-Mgr: Programming FPGA from image %s\n", rbf); + printf("DE10-Nano-Mgr: Programming FPGA from image %s\n", rbf); try { - - mon.init ("Open RBF file"); - rbf_fd = open (rbf, (O_RDONLY | O_SYNC)); - mon.done (rbf_fd >= 0); + mon.init("Open RBF file"); + rbf_fd = open(rbf, (O_RDONLY | O_SYNC)); + mon.done(rbf_fd >= 0); // 1. Set the cdratio and cfgwdth bits of the ctrl register in the // FPGA manager registers (fpgamgrregs) to match the characteristics @@ -448,61 +401,60 @@ struct de10nano_mgr // 4. Set the nconfigpull bit of the ctrl register to 1 to pull // down the nCONFIG pin and put the FPGA portion of the device // into the reset phase. - mon.init ("Enable FPGA configuration"); + mon.init("Enable FPGA configuration"); stat.read(); - if (stat.msel_is_invalid()) - printf ("DE10-Nano-Mgr: msel %x is not a valid HPS configuration\n", stat.msel); - else - { + if (stat.msel_is_invalid()) { + printf("DE10-Nano-Mgr: msel %x is not a valid HPS configuration\n", stat.msel); + } else { ctrl.read(); - ctrl.cdratio = msel_to_cdratio (stat.msel); - ctrl.cfgwdth = msel_to_cfgwdth (stat.msel); + ctrl.cdratio = msel_to_cdratio(stat.msel); + ctrl.cfgwdth = msel_to_cfgwdth(stat.msel); ctrl.nce = 0; ctrl.en = 1; ctrl.nconfigpull = 1; ctrl.write(); } - mon.done (!stat.msel_is_invalid()); + mon.done(!stat.msel_is_invalid()); // 5. Poll the mode bit of the stat register and wait until // the FPGA enters the reset phase. - mon.init ("Wait for FPGA to reset"); + mon.init("Wait for FPGA to reset"); do { stat.read(); } while (mon(stat.mode != stat::FPGA_RESET_PHASE)); - mon.done (); + mon.done(); stat.print(); // 6. Set the nconfigpull bit of the ctrl register to 0 to // release the FPGA from reset. - mon.init ("Release FPGA from reset"); + mon.init("Release FPGA from reset"); ctrl.nconfigpull = 0; ctrl.write(); - mon.done (); + mon.done(); // 7. Read the mode bit of the stat register and wait until // the FPGA enters the configuration phase. - mon.init ("Wait for configuration phase"); + mon.init("Wait for configuration phase"); do { stat.read(); } while (mon(stat.mode != stat::FPGA_CONFIG_PHASE)); - mon.done (); + mon.done(); stat.print(); // 8. Clear the interrupt bit of nSTATUS (ns) in the gpio interrupt // register (fpgamgrregs.mon.gpio_porta_eoi). - mon.init ("Clear nSTATUS interrupt bit"); + mon.init("Clear nSTATUS interrupt bit"); gpio_porta_eoi.clear(); gpio_porta_eoi.ns = 1; gpio_porta_eoi.write(); - mon.done (); + mon.done(); // 9. Set the axicfgen bit of the ctrl register to 1 to enable // sending configuration data to the FPGA. - mon.init ("Enable configuration on AXI"); + mon.init("Enable configuration on AXI"); ctrl.axicfgen = 1; ctrl.write(); - mon.done (); + mon.done(); // 10. Write the configuration image to the configuration data register // (data) in the FPGA manager module configuration data registers @@ -510,75 +462,73 @@ struct de10nano_mgr // transfer the configuration image from a peripheral device to the // FPGA manager. ssize_t bytes; - mon.init ("Write configuration Image"); + mon.init("Write configuration Image"); do { data.value = 0; - bytes = read (rbf_fd, &data.value, sizeof(data.value)); - if (bytes > 0) - { - if (!(count % (1<<16))) - { + bytes = read(rbf_fd, &data.value, sizeof(data.value)); + if (bytes > 0) { + if (!(count % (1<<16))) { printf("\rDE10-Nano-Mgr: %-32s : %u B", mon.msg, count); - fflush (stdout); + fflush(stdout); } data.write(); count += bytes; } } while (bytes == 4); - mon.done (count > 0); + mon.done(count > 0); printf("DE10-Nano-Mgr: %-32s : written %u B\n", mon.msg, count); - close (rbf_fd); + close(rbf_fd); // 11. Use the fpgamgrregs.mon.gpio_ext_porta registers to monitor // the CONF_DONE (cd) and nSTATUS (ns) bits. - mon.init ("Wait for CONF_DONE"); + mon.init("Wait for CONF_DONE"); do { gpio_ext_porta.read(); } while (mon(gpio_ext_porta.cd != 1 && gpio_ext_porta.ns != 1)); - mon.done (); + mon.done(); stat.print(); // 12. Set the axicfgen bit of the ctrl register to 0 to disable // configuration data on AXI slave. - mon.init ("Disable configuration on AXI"); + mon.init("Disable configuration on AXI"); ctrl.axicfgen = 0; ctrl.write(); - mon.done (); + mon.done(); // 13. Clear any previous DONE status by writing a 1 to the dcntdone // bit of the DCLK status register (dclkstat) to clear the completed // status flag. - mon.init ("Clear DCLK DONE status"); + mon.init("Clear DCLK DONE status"); dclkstat.dcntdone = 1; dclkstat.write(); - mon.done (); + mon.done(); // 14. Send the DCLKs required by the FPGA to enter the // initialization phase. - mon.init ("Send DCLK for init phase"); + mon.init("Send DCLK for init phase"); dclkcnt.cnt = 4; dclkcnt.write(); - mon.done (); + mon.done(); // 15. Poll the dcntdone bit of the DCLK status register (dclkstat) // until it changes to 1, which indicates that all the DCLKs have // been sent. - mon.init ("Wait for DCLK"); + mon.init("Wait for DCLK"); do { dclkstat.read(); } while (mon(dclkstat.dcntdone != 1)); - mon.done (); + mon.done(); // 16. Write a 1 to the dcntdone bit of the DCLK status register to // clear the completed status flag. - mon.init ("Clear DCLK status flag"); + mon.init("Clear DCLK status flag"); dclkstat.dcntdone = 1; dclkstat.write(); - mon.done (); + mon.done(); // 17. Read the mode bit of the stat register to wait for the FPGA // to enter user mode. - mon.init ("Wait for FPGA user mode"); + mon.init("Wait for FPGA user mode"); do { stat.read(); } while (mon(stat.mode != stat::FPGA_USER_MODE)); @@ -586,20 +536,19 @@ struct de10nano_mgr // 18. Set the en bit of the ctrl register to 0 to allow the // external pins to drive the configuration input signals. - mon.init ("Release control"); + mon.init("Release control"); ctrl.en = 0; ctrl.write(); mon.done(); } - catch (int i) - { - close (rbf_fd); + catch(int i) { + close(rbf_fd); printf("DE10-Nano-Mgr: %-32s : written %u B\n", mon.msg, count); - print (); + print(); } return mon.status(); } }; -#endif // DE10NANO_FPGA_MGR_H +#endif // VTA_DE10NANO_DE10NANO_MGR_H_ From f62a296254b394740c0f52af6db5624ea461219a Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Wed, 4 Mar 2020 15:39:46 -0800 Subject: [PATCH 09/19] [VTA] Appease make lint. --- vta/python/vta/exec/rpc_server.py | 10 +++++----- vta/python/vta/program_bitstream.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vta/python/vta/exec/rpc_server.py b/vta/python/vta/exec/rpc_server.py index de019753d23e..81ecc52268eb 100644 --- a/vta/python/vta/exec/rpc_server.py +++ b/vta/python/vta/exec/rpc_server.py @@ -69,12 +69,12 @@ def program_fpga(file_name): # pylint: disable=import-outside-toplevel env = get_env() if env.TARGET == "pynq": - from pynq import xlnk - # Reset xilinx driver - xlnk.Xlnk().xlnk_reset() + from pynq import xlnk + # Reset xilinx driver + xlnk.Xlnk().xlnk_reset() elif env.TARGET == "de10nano": - # Load the de10nano program function. - load_vta_dll() + # Load the de10nano program function. + load_vta_dll() path = tvm.get_global_func("tvm.rpc.server.workpath")(file_name) program_bitstream.bitstream_program(env.TARGET, path) logging.info("Program FPGA with %s ", file_name) diff --git a/vta/python/vta/program_bitstream.py b/vta/python/vta/program_bitstream.py index ccdb45116f5c..d9b4277da42f 100644 --- a/vta/python/vta/program_bitstream.py +++ b/vta/python/vta/program_bitstream.py @@ -51,7 +51,7 @@ def pynq_bitstream_program(bitstream_path): def de10nano_bitstream_program(bitstream_path): from tvm import get_global_func program = get_global_func("vta.de10nano.program") - program (bitstream_path) + program(bitstream_path) def bitstream_program(target, bitstream): if target in ['pynq', 'ultra96']: From dfae2fe366eb15560f843ee6db2a073d2f8dfd05 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Wed, 4 Mar 2020 15:51:53 -0800 Subject: [PATCH 10/19] [VTA] Disable pylint import not top level error. --- vta/python/vta/program_bitstream.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vta/python/vta/program_bitstream.py b/vta/python/vta/program_bitstream.py index d9b4277da42f..62cb5f21d02a 100644 --- a/vta/python/vta/program_bitstream.py +++ b/vta/python/vta/program_bitstream.py @@ -49,6 +49,7 @@ def pynq_bitstream_program(bitstream_path): bitstream.download() def de10nano_bitstream_program(bitstream_path): + # pylint: disable=import-outside-toplevel from tvm import get_global_func program = get_global_func("vta.de10nano.program") program(bitstream_path) From 44d47a81b90cf05ebf7693da90564a9084523f0f Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Thu, 5 Mar 2020 10:23:43 -0800 Subject: [PATCH 11/19] [VTA][Chisel,de10nano] Linting changes. * Use CamelCase class names. * Use C++ style C include header files. * Add comments to Chisel makefile. --- vta/hardware/chisel/Makefile | 14 +++++ vta/src/de10nano/de10nano_driver.cc | 2 +- vta/src/de10nano/de10nano_mgr.h | 81 ++++++++++++++--------------- 3 files changed, 54 insertions(+), 43 deletions(-) diff --git a/vta/hardware/chisel/Makefile b/vta/hardware/chisel/Makefile index 1b3afaf7d18a..84272fea4eac 100644 --- a/vta/hardware/chisel/Makefile +++ b/vta/hardware/chisel/Makefile @@ -36,14 +36,28 @@ CONFIG = DefaultDe10Config TOP = VTA TOP_TEST = Test BUILD_NAME = build +# Set USE_TRACE = 1 to generate a trace during simulation. USE_TRACE = 0 +# With USE_TRACE = 1, default trace format is VCD. +# Set USE_TRACE_FST = 1 to use the FST format. +# Note that although FST is around two orders of magnitude smaller than VCD +# it is also currently much slower to produce (verilator limitation). But if +# you are low on disk space it may be your only option. USE_TRACE_FST = 0 +# With USE_TRACE = 1, USE_TRACE_DETAILED = 1 will generate traces that also +# include non-interface internal signal names starting with an underscore. +# This will significantly increase the trace size and should only be used +# on a per need basis for difficult debug problems. USE_TRACE_DETAILED = 0 USE_THREADS = $(shell nproc) VTA_LIBNAME = libvta_hw UNITTEST_NAME = all CXX = g++ +# A debug build with DEBUG = 1 is useful to trace the simulation with a +# debugger. DEBUG = 0 +# With DEBUG = 1, SANITIZE = 1 turns on address sanitizing to verify that +# the verilator build is sane. To be used if you know what you are doing. SANITIZE = 0 config_test = $(TOP_TEST)$(CONFIG) diff --git a/vta/src/de10nano/de10nano_driver.cc b/vta/src/de10nano/de10nano_driver.cc index 33c278e2c5dd..94d000114dfc 100644 --- a/vta/src/de10nano/de10nano_driver.cc +++ b/vta/src/de10nano/de10nano_driver.cc @@ -157,7 +157,7 @@ int VTADeviceRun(VTADeviceHandle handle, } void VTAProgram(const char *rbf) { - de10nano_mgr mgr; + De10NanoMgr mgr; CHECK(mgr.mapped()) << "de10nano: mapping of /dev/mem failed"; CHECK(mgr.program_rbf(rbf)) << "Programming of the de10nano failed.\n" "This is usually due to the use of an RBF file that is incompatible " diff --git a/vta/src/de10nano/de10nano_mgr.h b/vta/src/de10nano/de10nano_mgr.h index 8cdfd74846ab..3ee0d9486807 100644 --- a/vta/src/de10nano/de10nano_mgr.h +++ b/vta/src/de10nano/de10nano_mgr.h @@ -23,33 +23,30 @@ #ifndef VTA_DE10NANO_DE10NANO_MGR_H_ #define VTA_DE10NANO_DE10NANO_MGR_H_ -extern "C" { - #include - #include - #include - #include - #include - #include - #include - #include - #include -} +#include +#include +#include +#include +#include +#include +#include +#include // Register definition and address map taken from cv_5v4.pdf, // Cyclone V Hard Processor System Technical Reference Manual, // chapter 5: FPGA Manager. -struct de10nano_mgr { - // reg32 is a static base class interface and implementation +struct De10NanoMgr { + // Reg32 is a static base class interface and implementation // of a generic 32 bit register that avoids the use of a virtual // class and ugly bit shift manipulations. - struct reg32 { - explicit reg32(uint32_t offset, uint32_t reset = 0) : + struct Reg32 { + explicit Reg32(uint32_t offset, uint32_t reset = 0) : m_offset(offset), m_reset(reset) {} void map(uint8_t *base) { m_addr = reinterpret_cast(base + m_offset); - m_reg = reinterpret_cast(reinterpret_cast(this)+sizeof(reg32)); + m_reg = reinterpret_cast(reinterpret_cast(this)+sizeof(Reg32)); } uint32_t read() { *m_reg = *m_addr; @@ -70,20 +67,20 @@ struct de10nano_mgr { volatile uint32_t *m_addr; private: // Do not use this class on its own. - reg32(const reg32 &rhs); + Reg32(const Reg32 &rhs); }; // Register definitions. All registers are of 32 bit size. // Add one structure for each register, making sure that all // bit fields come first and pack exactly into 32 bits. - struct data : public reg32 { - data() : reg32(0x0, 0x0) {} + struct data : public Reg32 { + data() : Reg32(0x0, 0x0) {} uint32_t value; } data; - struct stat : public reg32 { - stat() : reg32(0x0, 0x45) {} + struct stat : public Reg32 { + stat() : Reg32(0x0, 0x45) {} enum mode_values { FPGA_POWER_OFF = 0x0, FPGA_RESET_PHASE = 0x1, @@ -116,7 +113,7 @@ struct de10nano_mgr { } void print(bool addr = false, bool fields = true) { - reg32::print("stat", addr); + Reg32::print("stat", addr); if (fields) { printf("DE10-Nano-Mgr: %16s: %x\n", "msel", msel); printf("DE10-Nano-Mgr: %16s: %s\n", "mode", mode_str()); @@ -128,8 +125,8 @@ struct de10nano_mgr { uint32_t rsvd : 24; // 31:8 } stat; - struct ctrl : public reg32 { - ctrl() : reg32(0x4, 0x200) {} + struct ctrl : public Reg32 { + ctrl() : Reg32(0x4, 0x200) {} uint32_t en : 1; // 0 RW uint32_t nce : 1; // 1 RW @@ -143,7 +140,7 @@ struct de10nano_mgr { uint32_t rsvd : 22; // 31:10 void print(bool addr = false, bool fields = true) { - reg32::print("ctrl", addr); + Reg32::print("ctrl", addr); if (fields) { printf("DE10-Nano-Mgr: %16s: %x\n", "en" , en); printf("DE10-Nano-Mgr: %16s: %x\n", "nce" , nce); @@ -158,31 +155,31 @@ struct de10nano_mgr { } } ctrl; - struct dclkcnt : public reg32 { - dclkcnt() : reg32(0x8, 0x0) {} - void print() { return reg32::print("dclkcnt"); } + struct dclkcnt : public Reg32 { + dclkcnt() : Reg32(0x8, 0x0) {} + void print() { return Reg32::print("dclkcnt"); } uint32_t cnt; // RW } dclkcnt; - struct dclkstat : public reg32 { - dclkstat() : reg32(0xC, 0x0) {} - void print() { return reg32::print("dclkstat"); } + struct dclkstat : public Reg32 { + dclkstat() : Reg32(0xC, 0x0) {} + void print() { return Reg32::print("dclkstat"); } uint32_t dcntdone : 1; // RW uint32_t rsvd : 31; } dclkstat; - struct gpio_inten : public reg32 { - gpio_inten() : reg32(0x830, 0x0) {} - void print() { return reg32::print("gpio_inten"); } + struct gpio_inten : public Reg32 { + gpio_inten() : Reg32(0x830, 0x0) {} + void print() { return Reg32::print("gpio_inten"); } uint32_t value : 32; // RW } gpio_inten; - struct gpio_porta_eoi : public reg32 { - gpio_porta_eoi() : reg32(0x84C, 0x0) {} - void print() { return reg32::print("gpio_porta_eoi"); } + struct gpio_porta_eoi : public Reg32 { + gpio_porta_eoi() : Reg32(0x84C, 0x0) {} + void print() { return Reg32::print("gpio_porta_eoi"); } uint32_t ns : 1; // 0 WO uint32_t cd : 1; // 1 WO @@ -199,10 +196,10 @@ struct de10nano_mgr { uint32_t rsvd : 20; // 31:12 } gpio_porta_eoi; - struct gpio_ext_porta : public reg32 { - gpio_ext_porta() : reg32(0x850, 0x0) {} + struct gpio_ext_porta : public Reg32 { + gpio_ext_porta() : Reg32(0x850, 0x0) {} void print(bool addr = false, bool fields = true) { - reg32::print("gpio_ext_porta", addr); + Reg32::print("gpio_ext_porta", addr); if (fields) { printf("DE10-Nano-Mgr: %16s: %x\n", "nSTATUS" , ns); printf("DE10-Nano-Mgr: %16s: %x\n", "CONF_DONE" , cd); @@ -303,7 +300,7 @@ struct de10nano_mgr { DATA_BASE_ADDR = 0xFFB90000U }; - de10nano_mgr() { + De10NanoMgr() { m_page_size = sysconf(_SC_PAGE_SIZE); #ifdef MOCK_DEVMEM m_regs_base = reinterpret_cast(malloc(m_page_size)); @@ -322,7 +319,7 @@ struct de10nano_mgr { gpio_ext_porta.map(m_regs_base); } - ~de10nano_mgr() { + ~De10NanoMgr() { #ifdef MOCK_DEVMEM free(m_regs_base); free(m_data_base); From 6e55dd41dc1a96cbc48fc1d3439a313472c58e7a Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Thu, 5 Mar 2020 14:15:06 -0800 Subject: [PATCH 12/19] [VTA][de10nano] * Reorder C and C++ includes in de10nano_mgr.h. * Restore lint as default target in Chisel Makefile. --- vta/hardware/chisel/Makefile | 2 +- vta/src/de10nano/de10nano_mgr.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vta/hardware/chisel/Makefile b/vta/hardware/chisel/Makefile index 84272fea4eac..b55bd2e66a54 100644 --- a/vta/hardware/chisel/Makefile +++ b/vta/hardware/chisel/Makefile @@ -147,7 +147,7 @@ else lib_path = $(vta_dir)/$(BUILD_NAME)/$(VTA_LIBNAME).so endif -default: lib +default: lint lib lint: sbt scalastyle diff --git a/vta/src/de10nano/de10nano_mgr.h b/vta/src/de10nano/de10nano_mgr.h index 3ee0d9486807..a054640b4191 100644 --- a/vta/src/de10nano/de10nano_mgr.h +++ b/vta/src/de10nano/de10nano_mgr.h @@ -24,13 +24,13 @@ #define VTA_DE10NANO_DE10NANO_MGR_H_ #include +#include +#include +#include #include #include #include #include -#include -#include -#include // Register definition and address map taken from cv_5v4.pdf, // Cyclone V Hard Processor System Technical Reference Manual, From 9367fd89fd5378e41746c8c8661201b89fe60ab0 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Thu, 5 Mar 2020 15:58:52 -0800 Subject: [PATCH 13/19] [VTA][de10nano] Do not use f string in pkg_config.py. --- vta/python/vta/pkg_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index f7afad6bf038..bf39787161df 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -78,7 +78,7 @@ def __init__(self, cfg, proj_root): # add pynq drivers for any board that uses pynq driver stack (see pynq.io) self.lib_source += glob.glob("%s/vta/src/pynq/*.cc" % (proj_root)) elif self.TARGET in ["de10nano"]: - self.lib_source += glob.glob(f"{proj_root}/vta/src/de10nano/*.cc") + self.lib_source += glob.glob("%s/vta/src/de10nano/*.cc" % (proj_root)) self.include_path += [ f"-I{proj_root}/vta/src/de10nano", f"-I{proj_root}/3rdparty" From 2280435405693af1fa40fa900d956774adf7f820 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Thu, 5 Mar 2020 16:06:19 -0800 Subject: [PATCH 14/19] [VTA][de10nano] Remove overlooked f strings in pkg_config.py. --- vta/python/vta/pkg_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index bf39787161df..72872684b274 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -80,8 +80,8 @@ def __init__(self, cfg, proj_root): elif self.TARGET in ["de10nano"]: self.lib_source += glob.glob("%s/vta/src/de10nano/*.cc" % (proj_root)) self.include_path += [ - f"-I{proj_root}/vta/src/de10nano", - f"-I{proj_root}/3rdparty" + "-I%s/vta/src/de10nano" % proj_root, + "-I%/3rdparty" % proj_root ] # Linker flags From 8f157eedebcf81b7a48fb11726d8964eb0cc8bb0 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Thu, 5 Mar 2020 16:20:32 -0800 Subject: [PATCH 15/19] [VTA][de10nano] Fixed typo. --- vta/python/vta/pkg_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index 72872684b274..c15f6df58740 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -81,7 +81,7 @@ def __init__(self, cfg, proj_root): self.lib_source += glob.glob("%s/vta/src/de10nano/*.cc" % (proj_root)) self.include_path += [ "-I%s/vta/src/de10nano" % proj_root, - "-I%/3rdparty" % proj_root + "-I%s/3rdparty" % proj_root ] # Linker flags From eb611848c49aabf5a2ddc41a48a6791f3397923f Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Thu, 5 Mar 2020 17:19:52 -0800 Subject: [PATCH 16/19] [VTA][TSIM] Check if gcc has align-new. --- vta/hardware/chisel/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vta/hardware/chisel/Makefile b/vta/hardware/chisel/Makefile index b55bd2e66a54..21149dfb83e3 100644 --- a/vta/hardware/chisel/Makefile +++ b/vta/hardware/chisel/Makefile @@ -60,6 +60,9 @@ DEBUG = 0 # the verilator build is sane. To be used if you know what you are doing. SANITIZE = 0 +CXX_MAJOR := $(shell $(CXX) -dumpversion | sed 's/\..*//') +CXX_HAS_ALIGN_NEW := $(shell [ $(CXX_MAJOR) -ge 7 ] && echo true) + config_test = $(TOP_TEST)$(CONFIG) vta_dir = $(abspath ../../) tvm_dir = $(abspath ../../../) @@ -83,7 +86,10 @@ else cxx_flags = -O0 -g -Wall endif -cxx_flags += -std=c++11 -faligned-new +cxx_flags += -std=c++11 -Wno-maybe-uninitialized +ifeq ($(CXX_HAS_ALIGN_NEW),true) + cxx_flags += -faligned-new +endif cxx_flags += -DVL_TSIM_NAME=V$(TOP_TEST) cxx_flags += -DVL_PRINTF=printf cxx_flags += -DVL_USER_FINISH From 8a28dded8174022e1eefbff6ad68e210fc51edcf Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Fri, 6 Mar 2020 13:08:49 -0800 Subject: [PATCH 17/19] [VTA][Chisel] Make adaptive DMA transfer default. --- .../chisel/src/main/scala/core/TensorStore.scala | 14 ++++++-------- .../chisel/src/main/scala/core/TensorUtil.scala | 14 ++++++-------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala index 030f91172cda..9b4bf748a3a5 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala @@ -62,8 +62,6 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( val tag = Reg(UInt(8.W)) val set = Reg(UInt(8.W)) - // Dynamically adjust the size of DMA transfers to avoid crossing page boundaries. - final val ADAPTIVE_DMA_XFER_ENABLE = true val xfer_bytes = Reg(chiselTypeOf(io.vme_wr.cmd.bits.addr)) val xstride_bytes = dec.xstride << log2Ceil(tensorLength * tensorWidth) val maskOffset = VecInit(Seq.fill(M_DRAM_OFFSET_BITS)(true.B)).asUInt @@ -74,12 +72,12 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( val xfer_split_addr = waddr_cur + xfer_bytes val xfer_stride_addr = waddr_nxt + xstride_bytes - val xfer_init_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_init_addr % xmax_bytes else xmax_bytes - val xfer_init_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_init_bytes >> pulse_bytes_bits else xmax - val xfer_split_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_split_addr % xmax_bytes else xmax_bytes - val xfer_split_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_split_bytes >> pulse_bytes_bits else xmax - val xfer_stride_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_stride_addr % xmax_bytes else xmax_bytes - val xfer_stride_pulses= if (ADAPTIVE_DMA_XFER_ENABLE) xfer_stride_bytes >> pulse_bytes_bits else xmax + val xfer_init_bytes = xmax_bytes - xfer_init_addr % xmax_bytes + val xfer_init_pulses = xfer_init_bytes >> pulse_bytes_bits + val xfer_split_bytes = xmax_bytes - xfer_split_addr % xmax_bytes + val xfer_split_pulses = xfer_split_bytes >> pulse_bytes_bits + val xfer_stride_bytes = xmax_bytes - xfer_stride_addr % xmax_bytes + val xfer_stride_pulses= xfer_stride_bytes >> pulse_bytes_bits val sIdle :: sWriteCmd :: sWriteData :: sReadMem :: sWriteAck :: Nil = Enum(5) val state = RegInit(sIdle) diff --git a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala index e7e738faee54..d0a8ba7ef647 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala @@ -270,8 +270,6 @@ class TensorDataCtrl(tensorType: String = "none", val xmax = (1 << mp.lenBits).U val ycnt = Reg(chiselTypeOf(dec.ysize)) - // Dynamically adjust the size of DMA transfers to avoid crossing page boundaries. - final val ADAPTIVE_DMA_XFER_ENABLE = true val xfer_bytes = Reg(UInt(mp.addrBits.W)) val pulse_bytes_bits = log2Ceil(mp.dataBits >> 3) val xstride_bytes = dec.xstride << log2Ceil(elemBytes) @@ -280,12 +278,12 @@ class TensorDataCtrl(tensorType: String = "none", val xfer_split_addr = caddr + xfer_bytes val xfer_stride_addr = baddr + xstride_bytes - val xfer_init_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_init_addr % xmax_bytes else xmax_bytes - val xfer_init_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_init_bytes >> pulse_bytes_bits else xmax - val xfer_split_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_split_addr % xmax_bytes else xmax_bytes - val xfer_split_pulses = if (ADAPTIVE_DMA_XFER_ENABLE) xfer_split_bytes >> pulse_bytes_bits else xmax - val xfer_stride_bytes = if (ADAPTIVE_DMA_XFER_ENABLE) xmax_bytes - xfer_stride_addr % xmax_bytes else xmax_bytes - val xfer_stride_pulses= if (ADAPTIVE_DMA_XFER_ENABLE) xfer_stride_bytes >> pulse_bytes_bits else xmax + val xfer_init_bytes = xmax_bytes - xfer_init_addr % xmax_bytes + val xfer_init_pulses = xfer_init_bytes >> pulse_bytes_bits + val xfer_split_bytes = xmax_bytes - xfer_split_addr % xmax_bytes + val xfer_split_pulses = xfer_split_bytes >> pulse_bytes_bits + val xfer_stride_bytes = xmax_bytes - xfer_stride_addr % xmax_bytes + val xfer_stride_pulses= xfer_stride_bytes >> pulse_bytes_bits val stride = xcnt === len & xrem === 0.U & From 1554991e4076582853175ea6d2106c15b0c7a8b8 Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Fri, 6 Mar 2020 13:24:29 -0800 Subject: [PATCH 18/19] [VTA][RPC] Renamed VTA_PYNQ_RPC_* to VTA_RPC_*. Issue: With more FPGA targets coming online the initial method of using individual environment variables to specify target IP and port does not scale well. Solution: Use a single VTA_RPC_HOST, VTA_RPC_PORT pair to be changed every time a different target is used. For instance in a script used to benchmark all targets. * Replaced every instance of VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT with VTA_RPC_HOST and VTA_RPC_PORT, respectively. --- docs/vta/install.md | 4 ++-- vta/python/vta/testing/util.py | 6 +++--- vta/tests/python/de10nano/test_program_rpc.py | 4 ++-- vta/tests/python/pynq/test_program_rpc.py | 4 ++-- vta/tutorials/frontend/deploy_classification.py | 4 ++-- vta/tutorials/frontend/deploy_detection.py | 4 ++-- vta/tutorials/matrix_multiply.py | 4 ++-- vta/tutorials/optimize/convolution_opt.py | 4 ++-- vta/tutorials/optimize/matrix_multiply_opt.py | 4 ++-- vta/tutorials/vta_get_started.py | 4 ++-- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/vta/install.md b/docs/vta/install.md index dfa0eae71929..0738050c81c7 100644 --- a/docs/vta/install.md +++ b/docs/vta/install.md @@ -146,8 +146,8 @@ Tips regarding the Pynq RPC Server: Before running the examples on your development machine, you'll need to configure your host environment as follows: ```bash # On the Host-side -export VTA_PYNQ_RPC_HOST=192.168.2.99 -export VTA_PYNQ_RPC_PORT=9091 +export VTA_RPC_HOST=192.168.2.99 +export VTA_RPC_PORT=9091 ``` In addition, you'll need to edit the `vta_config.json` file on the host to indicate that we are targeting the Pynq platform, by setting the `TARGET` field to `"pynq"`. diff --git a/vta/python/vta/testing/util.py b/vta/python/vta/testing/util.py index 009038b2d5ed..afbf00ddac8c 100644 --- a/vta/python/vta/testing/util.py +++ b/vta/python/vta/testing/util.py @@ -59,8 +59,8 @@ def run(run_func): tracker_port = os.environ.get("TVM_TRACKER_PORT", None) # Otherwise, we can set the variables below to directly # obtain a remote from a test device - pynq_host = os.environ.get("VTA_PYNQ_RPC_HOST", None) - pynq_port = os.environ.get("VTA_PYNQ_RPC_PORT", None) + pynq_host = os.environ.get("VTA_RPC_HOST", None) + pynq_port = os.environ.get("VTA_RPC_PORT", None) # Run device from fleet node if env variables are defined if tracker_host and tracker_port: remote = autotvm.measure.request_remote(env.TARGET, @@ -75,7 +75,7 @@ def run(run_func): run_func(env, remote) else: raise RuntimeError( - "Please set the VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT environment variables") + "Please set the VTA_RPC_HOST and VTA_RPC_PORT environment variables") else: raise RuntimeError("Unknown target %s" % env.TARGET) diff --git a/vta/tests/python/de10nano/test_program_rpc.py b/vta/tests/python/de10nano/test_program_rpc.py index ce287bc39ac4..6dbd4571d636 100644 --- a/vta/tests/python/de10nano/test_program_rpc.py +++ b/vta/tests/python/de10nano/test_program_rpc.py @@ -19,8 +19,8 @@ from tvm import rpc from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime -host = os.environ.get("VTA_PYNQ_RPC_HOST", "de10nano") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "de10nano") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) def program_rpc_bitstream(path=None): """Program the FPGA on the RPC server diff --git a/vta/tests/python/pynq/test_program_rpc.py b/vta/tests/python/pynq/test_program_rpc.py index fb0873586d44..ad6e43e59e02 100644 --- a/vta/tests/python/pynq/test_program_rpc.py +++ b/vta/tests/python/pynq/test_program_rpc.py @@ -20,8 +20,8 @@ from tvm import rpc from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime -host = os.environ.get("VTA_PYNQ_RPC_HOST", "pynq") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "pynq") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) def program_rpc_bitstream(path=None): """Program the FPGA on the RPC server diff --git a/vta/tutorials/frontend/deploy_classification.py b/vta/tutorials/frontend/deploy_classification.py index 15cba4373056..d8c517c6e02d 100644 --- a/vta/tutorials/frontend/deploy_classification.py +++ b/vta/tutorials/frontend/deploy_classification.py @@ -109,8 +109,8 @@ # Otherwise if you have a device you want to program directly from # the host, make sure you've set the variables below to the IP of # your board. - device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") - device_port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091") + device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") + device_port = os.environ.get("VTA_RPC_PORT", "9091") if not tracker_host or not tracker_port: remote = rpc.connect(device_host, int(device_port)) else: diff --git a/vta/tutorials/frontend/deploy_detection.py b/vta/tutorials/frontend/deploy_detection.py index 0d1dbddcb1ec..1559d138213f 100644 --- a/vta/tutorials/frontend/deploy_detection.py +++ b/vta/tutorials/frontend/deploy_detection.py @@ -149,8 +149,8 @@ # Otherwise if you have a device you want to program directly from # the host, make sure you've set the variables below to the IP of # your board. - device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") - device_port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091") + device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") + device_port = os.environ.get("VTA_RPC_PORT", "9091") if not tracker_host or not tracker_port: remote = rpc.connect(device_host, int(device_port)) else: diff --git a/vta/tutorials/matrix_multiply.py b/vta/tutorials/matrix_multiply.py index 1b5b56a3c469..efbebf7ff688 100644 --- a/vta/tutorials/matrix_multiply.py +++ b/vta/tutorials/matrix_multiply.py @@ -47,8 +47,8 @@ env = vta.get_env() # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. diff --git a/vta/tutorials/optimize/convolution_opt.py b/vta/tutorials/optimize/convolution_opt.py index 2616fb28c89a..9d05d4b922cb 100644 --- a/vta/tutorials/optimize/convolution_opt.py +++ b/vta/tutorials/optimize/convolution_opt.py @@ -51,8 +51,8 @@ env = vta.get_env() # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. diff --git a/vta/tutorials/optimize/matrix_multiply_opt.py b/vta/tutorials/optimize/matrix_multiply_opt.py index 597a7e8ecf7f..e038ac4b4e2d 100644 --- a/vta/tutorials/optimize/matrix_multiply_opt.py +++ b/vta/tutorials/optimize/matrix_multiply_opt.py @@ -50,8 +50,8 @@ env = vta.get_env() # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. diff --git a/vta/tutorials/vta_get_started.py b/vta/tutorials/vta_get_started.py index bec442a157b7..ab416874b71b 100644 --- a/vta/tutorials/vta_get_started.py +++ b/vta/tutorials/vta_get_started.py @@ -71,8 +71,8 @@ from vta.testing import simulator # We read the Pynq RPC host IP address and port number from the OS environment -host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") -port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) +host = os.environ.get("VTA_RPC_HOST", "192.168.2.99") +port = int(os.environ.get("VTA_RPC_PORT", "9091")) # We configure both the bitstream and the runtime system on the Pynq # to match the VTA configuration specified by the vta_config.json file. From 500c44173cf0c6f48365b790e5ede662633432cc Mon Sep 17 00:00:00 2001 From: Pasquale Cocchini Date: Sun, 8 Mar 2020 20:27:21 -0700 Subject: [PATCH 19/19] [VTA][Chisel] Comply with new linter. --- .../chisel/src/main/scala/core/TensorLoad.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala index f10594329cd7..5ab690d8637c 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala @@ -197,7 +197,7 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( tag := tag + 1.U } - when(state === sIdle || dataCtrlDone || (set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U)) { + when(state === sIdle || dataCtrlDone || (set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U)) { set := 0.U }.elsewhen((io.vme_rd.data.fire() || isZeroPad) && tag === (tp.numMemBlock - 1).U) { set := set + 1.U @@ -208,7 +208,10 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( when(state === sIdle) { waddr_cur := dec.sram_offset waddr_nxt := dec.sram_offset - }.elsewhen((io.vme_rd.data.fire() || isZeroPad) && set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U) { + }.elsewhen((io.vme_rd.data.fire() || isZeroPad) + && set === (tp.tensorLength - 1).U + && tag === (tp.numMemBlock - 1).U) + { waddr_cur := waddr_cur + 1.U }.elsewhen(dataCtrl.io.stride && io.vme_rd.data.fire()) { waddr_cur := waddr_nxt + dec.xsize @@ -257,10 +260,8 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( } // done - val done_no_pad = io.vme_rd.data.fire() & dataCtrl.io.done & - dec.xpad_1 === 0.U & dec.ypad_1 === 0.U - val done_x_pad = state === sXPad1 & xPadCtrl1.io.done & - dataCtrlDone & dec.ypad_1 === 0.U + val done_no_pad = io.vme_rd.data.fire() & dataCtrl.io.done & dec.xpad_1 === 0.U & dec.ypad_1 === 0.U + val done_x_pad = state === sXPad1 & xPadCtrl1.io.done & dataCtrlDone & dec.ypad_1 === 0.U val done_y_pad = state === sYPad1 & dataCtrlDone & yPadCtrl1.io.done io.done := done_no_pad | done_x_pad | done_y_pad