diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c4685cb --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,24 @@ +name: CI +on: + pull_request: + branches: [master] + push: + branches: [master] + # CI runs every 12 hours + schedule: [cron: "0 */12 * * *"] + +jobs: + ci-check: + name: CI Build and Simulate + runs-on: ubuntu-latest + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v2 + - name: build and simulate + run : | + ./setup.sh + ./run.sh + - name: Setup tmate session + if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3245ab7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +**/build/** +**/verilog/** +**/*.log +img/*.drawio +**/output/** +**/.Xil/** \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..a2890b5 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lib/blue_wrapper"] + path = lib/blue_wrapper + url = https://github.com/wengwz/blue-wrapper.git diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/Makefile.base b/Makefile.base new file mode 100755 index 0000000..c201f8b --- /dev/null +++ b/Makefile.base @@ -0,0 +1,26 @@ +TRANSFLAGS = -aggressive-conditions -lift # -split-if +RECOMPILEFLAGS = -u -show-compiles +SCHEDFLAGS = -show-schedule -sched-dot # -show-rule-rel dMemInit_request_put doExecute +# -show-elab-progress +DEBUGFLAGS = -check-assert \ + -continue-after-errors \ + -keep-fires \ + -keep-inlined-boundaries \ + -show-method-bvi \ + -show-method-conf \ + -show-module-use \ + -show-range-conflict \ + -show-stats \ + -warn-action-shadowing \ + -warn-method-urgency \ + -promote-warnings ALL +VERILOGFLAGS = -verilog -remove-dollar -remove-unused-modules # -use-dpi -verilog-filter cmd +BLUESIMFLAGS = -parallel-sim-link 16 # -systemc +BUILDDIR = build +OUTDIR = -bdir $(BUILDDIR) -info-dir $(BUILDDIR) -simdir $(BUILDDIR) -vdir $(BUILDDIR) +WORKDIR = -fdir $(abspath .) +LIBSRCDIR = $(abspath ../lib/blue_wrapper/src) +BSVSRCDIR = -p +:$(abspath ../src):$(LIBSRCDIR) +DIRFLAGS = $(BSVSRCDIR) $(OUTDIR) $(WORKDIR) +MISCFLAGS = -print-flags -show-timestamps -show-version -steps 6000000 # -D macro +RUNTIMEFLAGS = +RTS -K4095M -RTS diff --git a/Makefile.test b/Makefile.test new file mode 100755 index 0000000..af59283 --- /dev/null +++ b/Makefile.test @@ -0,0 +1,21 @@ +TESTDIR ?= $(abspath ../test) +LOGDIR ?= $(abspath ../tmp) + +TESTBENCHS = \ + TestStreamUtils.bsv \ + TestDmaCore.bsv + +TestStreamUtils.bsv = mkStreamConcatTb \ + mkStreamSplitTb +TestDmaCore.bsv = mkChunkComputerTb + +all: $(TESTBENCHS) + +%.bsv: + $(foreach testcase, $($@), $(shell cd $(TESTDIR) && make simulate TESTFILE=$@ TOPMODULE=$(testcase) > $(LOGDIR)/$@-$(testcase).log 2>&1)) + +clean: + rm -f $(LOGDIR)/*.log + +.PHONY: all TESTBENCHS %.bsv clean +.DEFAULT_GOAL := all diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/backend/Makefile b/backend/Makefile new file mode 100644 index 0000000..ce908c3 --- /dev/null +++ b/backend/Makefile @@ -0,0 +1,60 @@ +include ../Makefile.base + +TCLDIR ?= xdc +CLK ?= rdma_clock +OOC ?= 1 +VLOGDIR ?= verilog +OUTPUTDIR ?= output +LOGFILE ?= run_vivado.log +RUNTOPHASE ?= synth # synth place route all +# vu13p, u200 +TARGETPLATFORM ?= vu13p +# xcvu13p-fhgb2104-2-i, xcu200-fsgd2104-2-e +PARTNAME = xcvu13p-fhgb2104-2-i +TARGETFILE ?= ../src/XilBdmaDmaWrapper.bsv +#MODULE List: mkRawSimpleDmaController mkRawBypassDmaController mkRawTestDmaController +TOPMODULE ?= mkRawTestDmaController + +BACKENDDIR ?= . + +VERILOG_TOPMODULE ?= top +MAX_NET_PATH_NUM ?= 100000 + +export DIR_RTL = $(VLOGDIR) +export DIR_BOARD = $(TARGETPLATFORM) +export DIR_XDC = $(TARGETPLATFORM)/$(TCLDIR) +export DIR_OOC_SCRIPTS = $(BACKENDDIR)/ooc_tcl_and_xdc +export DIR_IPS = $(BACKENDDIR)/ips +export DIR_IP_GENERATED = $(BUILDDIR)/ips +export DIR_BSV_GENERATED = $(BACKENDDIR)/verilog +export VERILOG_TOPMODULE +export TARGET_CLOCKS = $(CLK) +export DIR_OUTPUT = $(OUTPUTDIR) +export OOCSYNTH = $(OOC) +export RUNTO = $(RUNTOPHASE) +export PART = $(PARTNAME) +export MAX_NET_PATH_NUM + +compile: + mkdir -p $(BUILDDIR) + bsc -elab -sim -verbose $(BLUESIMFLAGS) $(DEBUGFLAGS) $(DIRFLAGS) $(MISCFLAGS) $(RECOMPILEFLAGS) $(RUNTIMEFLAGS) $(SCHEDFLAGS) $(TRANSFLAGS) -g $(TOPMODULE) $(TARGETFILE) + +verilog: compile + mkdir -p $(VLOGDIR) + bsc $(VERILOGFLAGS) $(DIRFLAGS) $(MISCFLAGS) $(RECOMPILEFLAGS) $(RUNTIMEFLAGS) $(TRANSFLAGS) -g $(TOPMODULE) $(TARGETFILE) + bluetcl listVlogFiles.tcl -bdir $(BUILDDIR) -vdir $(BUILDDIR) $(TOPMODULE) $(TOPMODULE) | grep -i '\.v' | xargs -I {} cp {} $(VLOGDIR) + +vivado: + vivado -mode tcl -nolog -nojournal -source ./build_top.tcl 2>&1 | tee $(LOGFILE) + +vivado_synth: + vivado -mode tcl -nolog -nojournal -source ./build_top.tcl -tclargs synth 2>&1 | tee $(LOGFILE) + +vivado_prw: + vivado -mode tcl -nolog -nojournal -source ./build_top.tcl -tclargs prw 2>&1 | tee $(LOGFILE) + +clean: + rm -rf $(BUILDDIR) $(OUTPUTDIR) $(VLOGDIR) .Xil *.jou *.log + +.PHONY: verilog vivado +.DEFAULT_GOAL := verilog \ No newline at end of file diff --git a/backend/batch_insert_ila.tcl b/backend/batch_insert_ila.tcl new file mode 100644 index 0000000..589a585 --- /dev/null +++ b/backend/batch_insert_ila.tcl @@ -0,0 +1,228 @@ +###################################################################### +# Automatically inserts ILA instances in a batch flow, and calls "implement_debug_core". Can also be used in a GUI flow +# This should ONLY be invoked after synthesis, and before opt_design. If opt_design is called first, marked nets may be missing and not found +# Warning: Currently will skip a net if it has no obvious clock domain on the driver. Nets connected to input buffers will be dropped unless "mark_debug_clock" is attached to the net. +# Nets attached to VIO cores have the "mark_debug" attribute, and will be filtered out unless the "mark_debug_valid" attribute is attached. +# Supports the following additional attributes beyond "mark_debug" +# attribute mark_debug_valid of X : signal is "true"; -- Marks a net for ILA capture, even if net is also attached to a VIO core +# attribute mark_debug_clock of X : signal is "inst1_bufg/clock"; -- Specifies clock net to use for capturing this net. May create a new ILA core for that clock domain +# attribute mark_debug_depth of X : signal is "4096"; -- overrides default depth for this ILA core. valid values: 1024, 2048, ... 132072. Last attribute that is scanned will win. +# attribute mark_debug_adv_trigger of X : signal is "true"; -- specifies that advanced trigger capability will be added to ILA core +# Engineer: J. McCluskey + +# Script version: +# https://forums.xilinx.com/xlnx/attachments/xlnx/Vivado/4090/1/new_batch_insert_ila.zip + +proc batch_insert_ila { depth } { + ################################################################## + # sequence through debug nets and organize them by clock in the + # clock_list array. Also create max and min array for bus indices + set dbgs [get_nets -hierarchical -filter {MARK_DEBUG}] + if {[llength $dbgs] == 0} { + puts "No nets have the MARK_DEBUG attribute. No ILA cores created" + return + } else { + #process list of nets to find and reject nets that are attached to VIO cores. This has a side effect that VIO nets can't be monitored with an ILA + # This can be overridden by using the attribute "mark_debug_valid" = "true" on a net like this. + set net_list {} + foreach net $dbgs { + if { [get_property -quiet MARK_DEBUG_VALID $net] != "true" } { + set pin_list [get_pins -of_objects [get_nets -segments $net]] + set not_vio_net 1 + foreach pin $pin_list { + if { [get_property IS_DEBUG_CORE [get_cells -of_object $pin]] == 1 } { + # It seems this net is attached to a debug core (i.e. VIO core) already, so we should skip adding it to the netlist + set not_vio_net 0 + break + } + } + if { $not_vio_net == 1 } { lappend net_list $net; } + } else { + lappend net_list $net + } + } + } + # check again to see if we have any nets left now + if {[llength $net_list] == 0} { + puts "All nets with MARK_DEBUG are already connected to VIO cores. No ILA cores created" + return + } + # Now that the netlist has been filtered, determine bus names and clock domains + foreach d $net_list { + # name is root name of a bus, index is the bit index in the + # bus + set name [regsub {\[[[:digit:]]+\]$} $d {}] + set index [regsub {^.*\[([[:digit:]]+)\]$} $d {\1}] + if {[string is integer -strict $index]} { + if {![info exists max($name)]} { + set max($name) $index + set min($name) $index + } elseif {$index > $max($name)} { + set max($name) $index + } elseif {$index < $min($name)} { + set min($name) $index + } + } else { + set max($name) -1 + } + # Now we search for the local clock net associated with the target net. There may be ambiguities or no answer in some cases + if {![info exists clocks($name)]} { + # does MARK_DEBUG_CLOCK decorate this net? If not, then search backwards to the driver cell + set clk_name [get_property -quiet MARK_DEBUG_CLOCK $d] + if { [llength $clk_name] == 0 } { + # trace to the clock net, tracing backwards via the driver pin. + set driver_pin [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects [ get_nets -segments $d ]] + set driver_cell [get_cells -of_objects $driver_pin] + if { [get_property IS_SEQUENTIAL $driver_cell] == 1 } { + set timing_arc [get_timing_arcs -to $driver_pin] + set cell_clock_pin [get_pins -filter {IS_CLOCK} [get_property FROM_PIN $timing_arc]] + if { [llength $cell_clock_pin] > 1 } { + puts "Error: in batch_insert_ila. Found more than 1 clock pin in driver cell $driver_cell with timing arc $timing_arc for net $d" + continue + } + } else { + # our driver cell is a LUT or LUTMEM in combinatorial mode, we need to trace further. + set paths [get_timing_paths -quiet -through $driver_pin ] + if { [llength $paths] > 0 } { + # note that here we arbitrarily select the start point of the FIRST timing path... there might be multiple clocks with timing paths for this net. + # use MARK_DEBUG_CLOCK to specify another clock in this case. + set cell_clock_pin [get_pins [get_property STARTPOINT_PIN [lindex $paths 0]]] + } else { + # Can't find any timing path, so skip the net, and warn the user. + puts "Critical Warning: from batch_insert_ila.tcl Can't trace any clock domain on driver of net $d" + puts "Please attach the attribute MARK_DEBUG_CLOCK with a string containing the net name of the desired sampling clock, .i.e." + puts "attribute mark_debug_clock of $d : signal is \"inst_bufg/clk\";" + continue + } + } + # clk_net will usually be a list of net segments, which needs filtering to determine the net connected to the driver pin + set clk_net [get_nets -segments -of_objects $cell_clock_pin] + } else { + set clk_net [get_nets -segments $clk_name] + if { [llength $clk_net] == 0 } { puts "MARK_DEBUG_CLOCK attribute on net $d does not match any known net. Please fix."; continue; } + } + # trace forward to net actually connected to clock buffer output, not any of the lower level segment names + set clocks($name) [get_nets -of_objects [get_pins -filter {DIRECTION == "OUT" && IS_LEAF == TRUE } -of_objects $clk_net]] + if {![info exists clock_list($clocks($name))]} { + # found a new clock + puts "New clock found is $clocks($name)" + set clock_list($clocks($name)) [list $name] + set ila_depth($clocks($name)) $depth + set ila_adv_trigger($clocks($name)) false + } else { + lappend clock_list($clocks($name)) $name + } + # Does this net have a "MARK_DEBUG_DEPTH" attribute attached? + set clk_depth [get_property -quiet MARK_DEBUG_DEPTH $d] + if { [llength $clk_depth] != 0 } { + set ila_depth($clocks($name)) $clk_depth + } + # Does this net have a "MARK_DEBUG_ADV_TRIGGER" attribute attached? + set trigger [get_property -quiet MARK_DEBUG_ADV_TRIGGER $d] + if { $trigger == "true" } { + set ila_adv_trigger($clocks($name)) true + } + } + } + set ila_count 0 + set trig_out "" + set trig_out_ack "" + if { [llength [array names clock_list]] > 1 } { + set enable_trigger true + } else { + set enable_trigger false + } + foreach c [array names clock_list] { + # Now build and connect an ILA core for each clock domain + [incr ila_count ] + set ila_inst "ila_$ila_count" + ################################################################## + # first verify if depth is a member of the set, 1024, 2048, 4096, 8192, ... 131072 + if { $ila_depth($c) < 1024 || [expr $ila_depth($c) & ($ila_depth($c) - 1)] || $ila_depth($c) > 131072 } { + # Depth is not right... lets fix it, and continue + if { $ila_depth($c) < 1024 } { + set new_depth 1024 + } elseif { $ila_depth($c) > 131072 } { + set new_depth 131072 + } else { + # round value to next highest power of 2, (in log space) + set new_depth [expr 1 << int( log($ila_depth($c))/log(2) + .9999 )] + } + puts "Can't create ILA core $ila_inst with depth of $ila_depth($c)! Changed capture depth to $new_depth" + set ila_depth($c) $new_depth + } + # create ILA and connect its clock + puts "Creating ILA $ila_inst with capture depth $ila_depth($c) and advanced trigger = $ila_adv_trigger($c)" + if { [expr [string range [version -short] 0 3] < 2014] } { + create_debug_core $ila_inst labtools_ila_v3 + } else { + create_debug_core $ila_inst ila + } + if { $ila_adv_trigger($c) } { set mu_cnt 4; } else { set mu_cnt 2; } + set_property C_DATA_DEPTH $ila_depth($c) [get_debug_cores $ila_inst] + set_property C_TRIGIN_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_TRIGOUT_EN $enable_trigger [get_debug_cores $ila_inst] + set_property C_ADV_TRIGGER $ila_adv_trigger($c) [get_debug_cores $ila_inst] + set_property C_INPUT_PIPE_STAGES 1 [get_debug_cores $ila_inst] + set_property C_EN_STRG_QUAL true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU true [get_debug_cores $ila_inst] + set_property ALL_PROBE_SAME_MU_CNT $mu_cnt [get_debug_cores $ila_inst] + set_property port_width 1 [get_debug_ports $ila_inst/clk] + connect_debug_port $ila_inst/clk $c + # hookup trigger ports in a circle if more than one ILA is created + if { $enable_trigger == true } { + create_debug_port $ila_inst trig_in + create_debug_port $ila_inst trig_in_ack + create_debug_port $ila_inst trig_out + create_debug_port $ila_inst trig_out_ack + if { $trig_out != "" } { + connect_debug_port $ila_inst/trig_in [get_nets $trig_out] + } + if { $trig_out_ack != "" } { + connect_debug_port $ila_inst/trig_in_ack [get_nets $trig_out_ack] + } + set trig_out ${ila_inst}_trig_out_$ila_count + create_net $trig_out + connect_debug_port $ila_inst/trig_out [get_nets $trig_out] + set trig_out_ack ${ila_inst}_trig_out_ack_$ila_count + create_net $trig_out_ack + connect_debug_port $ila_inst/trig_out_ack [get_nets $trig_out_ack] + } + ################################################################## + # add probes + set nprobes 0 + foreach n [lsort $clock_list($c)] { + set nets {} + if {$max($n) < 0} { + lappend nets [get_nets $n] + } else { + # n is a bus name + for {set i $min($n)} {$i <= $max($n)} {incr i} { + lappend nets [get_nets $n[$i]] + } + } + set prb probe$nprobes + if {$nprobes > 0} { + create_debug_port $ila_inst probe + } + set_property port_width [llength $nets] [get_debug_ports $ila_inst/$prb] + connect_debug_port $ila_inst/$prb $nets + incr nprobes + } + } + # at this point, we need to complete the circular connection of trigger outputs and acks + if { $enable_trigger == true } { + connect_debug_port ila_1/trig_in [get_nets $trig_out] + connect_debug_port ila_1/trig_in_ack [get_nets $trig_out_ack] + } + set project_found [get_projects -quiet] + if { $project_found != "New Project" } { + puts "Saving constraints now in project [current_project -quiet]" + save_constraints_as debug_constraints.xdc + } + ################################################################## + implement_debug_core + ################################################################## + # write out probe info file + write_debug_probes -force debug_nets.ltx +} \ No newline at end of file diff --git a/backend/build_top.tcl b/backend/build_top.tcl new file mode 100644 index 0000000..db70c97 --- /dev/null +++ b/backend/build_top.tcl @@ -0,0 +1,332 @@ +set dir_output $::env(DIR_OUTPUT) +set dir_rtl $::env(DIR_RTL) +set dir_xdc $::env(DIR_XDC) +set dir_ooc_scripts $::env(DIR_OOC_SCRIPTS) +set dir_ips $::env(DIR_IPS) +set dir_board $::env(DIR_BOARD) +set dir_ip_gen $::env(DIR_IP_GENERATED) +set dir_bsv_gen $::env(DIR_BSV_GENERATED) +set part $::env(PART) +set top_module $::env(VERILOG_TOPMODULE) +set target_clks $::env(TARGET_CLOCKS) +set max_net_path_num $::env(MAX_NET_PATH_NUM) + +set current_time [clock format [clock seconds] -format "%Y-%m-%d-%H-%M-%S"] + +set_param general.maxthreads 16 +set device [get_parts $part]; # xcvu13p-fhgb2104-2-i; #xcu200-fsgd2104-2-e +set_part $device + +# create_project -in_memory +set device [get_parts $part] +set_part $device +# set_property board_part xilinx.com:au200:1.3 [current_project] + +set ooc_module_names { \ + mkRawTestDmaController \ +} + +proc runGenerateIP {args} { + global dir_output part device dir_ips dir_xdc device dir_ip_gen + + file mkdir $dir_output + file mkdir $dir_ip_gen + + # read_xdc [ glob $dir_xdc/*.xdc ] + + foreach file [ glob $dir_ips/**/*.tcl ] { + source $file + } + + report_property $device -file $dir_output/pre_synth_dev_prop.rpt + reset_target all [ get_ips * ] + generate_target all [ get_ips * ] + +} + +proc runSynthIP {args} { + global dir_output top_module dir_ip_gen dir_xdc + + # read_xdc [ glob $dir_xdc/*.xdc ] + + read_ip [glob $dir_ip_gen/**/*.xci] + # The following line will generate a .dcp checkpoint file, so no need to create by ourselves + synth_ip [ get_ips * ] -quiet +} + + +proc runSynthOOC {args} { + global dir_output part dir_bsv_gen dir_ooc_scripts dir_ooc_scripts max_net_path_num + global ooc_module_names + + foreach ooc_top $ooc_module_names { + source ooc_tcl_and_xdc/bsv_ooc_module_common.tcl + } +} + + +proc addExtFiles {args} { + global dir_output part device dir_rtl dir_xdc dir_board dir_ip_gen dir_bsv_gen + global ooc_module_names + + read_ip [glob $dir_ip_gen/**/*.xci] + read_verilog [ glob $dir_rtl/*.v ] + read_verilog [ glob $dir_bsv_gen/*.v ] + read_verilog [ glob $dir_board/*.v ] + + read_xdc [ glob $dir_xdc/*.xdc ] +} + + +proc runSynthDesign {args} { + global dir_output top_module max_net_path_num + + synth_design -top $top_module -flatten_hierarchy none + + source batch_insert_ila.tcl + batch_insert_ila 256 + + write_checkpoint -force $dir_output/post_synth_design.dcp + write_xdc -force -exclude_physical $dir_output/post_synth.xdc +} + + +proc runPostSynthReport {args} { + global dir_output target_clks max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_synth_design.dcp + } + + xilinx::designutils::report_failfast -max_paths 10000 -detailed_reports synth -file $dir_output/post_synth_failfast.rpt + + # Check 1) slack, 2) requirement, 3) src and dst clocks, 4) datapath delay, 5) logic level, 6) skew and uncertainty. + report_timing_summary -report_unconstrained -warn_on_violation -file $dir_output/post_synth_timing_summary.rpt + # report_timing -of_objects [get_timing_paths -setup -to [get_clocks $target_clks] -max_paths $max_net_path_num -filter { LOGIC_LEVELS >= 4 && LOGIC_LEVELS <= 40 }] -file $dir_output/post_synth_long_paths.rpt + # Check 1) endpoints without clock, 2) combo loop and 3) latch. + check_timing -override_defaults no_clock -file $dir_output/post_synth_check_timing.rpt + report_clock_networks -file $dir_output/post_synth_clock_networks.rpt; # Show unconstrained clocks + report_clock_interaction -delay_type min_max -significant_digits 3 -file $dir_output/post_synth_clock_interaction.rpt; # Pay attention to Clock pair Classification, Inter-CLock Constraints, Path Requirement (WNS) + report_high_fanout_nets -timing -load_type -max_nets $max_net_path_num -file $dir_output/post_synth_fanout.rpt + report_exceptions -ignored -file $dir_output/post_synth_exceptions.rpt; # -ignored -ignored_objects -write_valid_exceptions -write_merged_exceptions + + # 1 LUT + 1 net have delay 0.5ns, if cycle period is Tns, logic level is 2T at most + # report_design_analysis -timing -max_paths $max_net_path_num -file $dir_output/post_synth_design_timing.rpt + report_design_analysis -setup -max_paths $max_net_path_num -file $dir_output/post_synth_design_setup_timing.rpt + # report_design_analysis -logic_level_dist_paths $max_net_path_num -min_level $MIN_LOGIC_LEVEL -max_level $MAX_LOGIC_LEVEL -file $dir_output/post_synth_design_logic_level.rpt + report_design_analysis -logic_level_dist_paths $max_net_path_num -logic_level_distribution -file $dir_output/post_synth_design_logic_level_dist.rpt + + report_datasheet -file $dir_output/post_synth_datasheet.rpt + + + report_drc -file $dir_output/post_synth_drc.rpt + report_drc -ruledeck methodology_checks -file $dir_output/post_synth_drc_methodology.rpt + report_drc -ruledeck timing_checks -file $dir_output/post_synth_drc_timing.rpt + + # intra-clock skew < 300ps, inter-clock skew < 500ps + + # Check 1) LUT on clock tree (TIMING-14), 2) hold constraints for multicycle path constraints (XDCH-1). + report_methodology -file $dir_output/post_synth_methodology.rpt + report_timing -max $max_net_path_num -slack_less_than 0 -file $dir_output/post_synth_timing.rpt + + report_compile_order -constraints -file $dir_output/post_synth_constraints.rpt; # Verify IP constraints included + report_utilization -file $dir_output/post_synth_util.rpt; # -cells -pblocks + report_cdc -file $dir_output/post_synth_cdc.rpt + report_clocks -file $dir_output/post_synth_clocks.rpt; # Verify clock settings + + # Use IS_SEQUENTIAL for -from/-to + # Instantiate XPM_CDC modules + # write_xdc -force -exclude_physical -exclude_timing -constraints INVALID + + report_qor_assessment -report_all_suggestions -csv_output_dir $dir_output -file $dir_output/post_synth_qor_assess.rpt +} + + +proc runPlacement {args} { + global dir_output top_module current_time max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_synth_design.dcp + } + + #source ./pblock.tcl + + opt_design -remap -verbose + + if {[dict exist $args -directive]} { + set directive [dict get $args -directive] + place_design -verbose -directive ${directive} + } else { + set directive "" + place_design -verbose + } +} + + +proc runPostPlacementReport {args} { + global dir_output target_clks max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_place.dcp + } + + xilinx::designutils::report_failfast -by_slr -detailed_reports impl -file $dir_output/post_place_failfast.rpt + set slr_nets [xilinx::designutils::get_inter_slr_nets] + set slr_nets_exclude_clock [filter $slr_nets "TYPE != GLOBAL_CLOCK"] + set slr_net_exclude_clock_num [llength $slr_nets_exclude_clock] + if {$slr_net_exclude_clock_num > 0} { + report_timing -through $slr_nets_exclude_clock -nworst 1 -max $slr_net_exclude_clock_num -unique_pins -file $dir_output/post_place_slr_nets.rpt + } +} + + +proc runRoute {args} { + global dir_output top_module + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_place.dcp + } + + route_design + + proc runPPO { {num_iters 1} {enable_phys_opt 1} } { + for {set idx 0} {$idx < $num_iters} {incr idx} { + place_design -post_place_opt; # Better to run after route + if {$enable_phys_opt != 0} { + phys_opt_design + } + route_design + if {[get_property SLACK [get_timing_paths ]] >= 0} { + break; # Stop if timing closure + } + } + } + + # runPPO 4 1; # num_iters=4, enable_phys_opt=1 + + write_checkpoint -force $dir_output/post_route.dcp + write_xdc -force -exclude_physical $dir_output/post_route.xdc + + write_verilog -force $dir_output/post_impl_netlist.v -mode timesim -sdf_anno true +} + + +proc runPostRouteReport {args} { + global dir_output target_clks max_net_path_num + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_route.dcp + } + + report_timing_summary -report_unconstrained -warn_on_violation -file $dir_output/post_route_timing_summary.rpt + # report_timing -of_objects [get_timing_paths -hold -to [get_clocks $target_clks] -max_paths $max_net_path_num -filter { LOGIC_LEVELS >= 4 && LOGIC_LEVELS <= 40 }] -file $dir_output/post_route_long_paths.rpt + report_methodology -file $dir_output/post_route_methodology.rpt + report_timing -max $max_net_path_num -slack_less_than 0 -file $dir_output/post_route_timing.rpt + + report_route_status -file $dir_output/post_route_status.rpt + report_drc -file $dir_output/post_route_drc.rpt + report_drc -ruledeck methodology_checks -file $dir_output/post_route_drc_methodology.rpt + report_drc -ruledeck timing_checks -file $dir_output/post_route_drc_timing.rpt + # Check unique control sets < 7.5% of total slices, at most 15% + report_control_sets -verbose -file $dir_output/post_route_control_sets.rpt + + report_power -file $dir_output/post_route_power.rpt + report_power_opt -file $dir_output/post_route_power_opt.rpt + report_utilization -file $dir_output/post_route_util.rpt + report_ram_utilization -detail -file $dir_output/post_route_ram_utils.rpt + # Check fanout < 25K + report_high_fanout_nets -file $dir_output/post_route_fanout.rpt + + report_design_analysis -hold -max_paths $max_net_path_num -file $dir_output/post_route_design_hold_timing.rpt + # Check initial estimated router congestion level no more than 5, type (global, long, short) and top cells + report_design_analysis -congestion -file $dir_output/post_route_congestion.rpt + # Check difficult modules (>15K cells) with high Rent Exponent (complex logic cone) >= 0.65 and/or Avg. Fanout >= 4 + report_design_analysis -complexity -file $dir_output/post_route_complexity.rpt; # -hierarchical_depth + # If congested, check problematic cells using report_utilization -cells + # If congested, try NetDelay* for UltraScale+, or try SpredLogic* for UltraScale in implementation strategy + + xilinx::designutils::report_failfast -detailed_reports impl -file $dir_output/post_route_failfast.rpt + # xilinx::ultrafast::report_io_reg -file $dir_output/post_route_io_reg.rpt + report_io -file $dir_output/post_route_io.rpt + report_pipeline_analysis -file $dir_output/post_route_pipeline.rpt + report_qor_assessment -report_all_suggestions -csv_output_dir $dir_output -file $dir_output/post_route_qor_assess.rpt + report_qor_suggestions -report_all_suggestions -csv_output_dir $dir_output -file $dir_output/post_route_qor_suggest.rpt +} + +proc runWriteBitStream {args} { + global dir_output top_module + + if {[dict get $args -open_checkpoint] == true} { + open_checkpoint $dir_output/post_route.dcp + } + + set_property CONFIG_MODE SPIx4 [current_design] + set_property BITSTREAM.CONFIG.SPI_BUSWIDTH 4 [current_design] + + write_bitstream -force $dir_output/top.bit +} + +proc runProgramDevice {args} { + global dir_output top_module + + open_hw_manager + connect_hw_server -allow_non_jtag + open_hw_target + current_hw_device [get_hw_devices xcvu13p_0] + refresh_hw_device -update_hw_probes false [lindex [get_hw_devices xcvu13p_0] 0] + + # set_property PROBES.FILE {/home/mingheng/xdma_0_ex/xdma_0_ex.runs/impl_1/top.ltx} [get_hw_devices xcvu13p_0] + # set_property FULL_PROBES.FILE {/home/mingheng/xdma_0_ex/xdma_0_ex.runs/impl_1/top.ltx} [get_hw_devices xcvu13p_0] + + set_property PROGRAM.FILE $dir_output/top.bit [get_hw_devices xcvu13p_0] + program_hw_devices [get_hw_devices xcvu13p_0] +} + +if {$argc == 0} { + set synth 1 + set prw 1 + set directive ExtraNetDelay_high + set redirect 0 +} elseif {$argc > 0} { + set op [lindex $argv 0] + + if {$op eq "synth"} { + set synth 1 + set prw 0 + } elseif {$op eq "prw"} { + set synth 0 + set prw 1 + if {$argc == 1} { + set directive ExtraNetDelay_high + set redirect 0 + } elseif {$argc == 2} { + set directive [lindex $argv 1] + set redirect 1 + } + } +} +if {$synth} { + runGenerateIP -open_checkpoint false + runSynthIP -open_checkpoint false + runSynthOOC + addExtFiles -open_checkpoint false + runSynthDesign -open_checkpoint false + runPostSynthReport -open_checkpoint false +} + +if {$prw} { + if {!$synth} { + read_xdc $dir_output/post_synth.xdc + open_checkpoint $dir_output/post_synth_design.dcp + } + if {$redirect} { + set dir_output "$dir_output/$directive" + } + runPlacement -open_checkpoint -false -directive $directive + + # runPostPlacementReport -open_checkpoint false + runRoute -open_checkpoint false + # runPostRouteReport -open_checkpoint false + runWriteBitStream -open_checkpoint false + # runProgramDevice -open_checkpoint false +} diff --git a/backend/ips/pcie/pcie.tcl b/backend/ips/pcie/pcie.tcl new file mode 100644 index 0000000..f1051b5 --- /dev/null +++ b/backend/ips/pcie/pcie.tcl @@ -0,0 +1,39 @@ +create_ip -name pcie4_uscale_plus -vendor xilinx.com -library ip -version 1.3 \ + -module_name pcie4_uscale_plus_0 -dir $dir_ip_gen -force + +set_property -dict [list CONFIG.PL_LINK_CAP_MAX_LINK_SPEED {8.0_GT/s} \ + CONFIG.PL_LINK_CAP_MAX_LINK_WIDTH {X16} \ + CONFIG.AXISTEN_IF_EXT_512_RQ_STRADDLE {true} \ + CONFIG.AXISTEN_IF_EXT_512_RC_STRADDLE {true} \ + CONFIG.AXISTEN_IF_EXT_512_RC_4TLP_STRADDLE {false} \ + CONFIG.axisten_if_enable_client_tag {true} \ + CONFIG.PF0_DEVICE_ID {903F} \ + CONFIG.PF2_DEVICE_ID {943F} \ + CONFIG.PF3_DEVICE_ID {963F} \ + CONFIG.pf0_bar0_size {4} \ + CONFIG.pf0_bar1_enabled {true} \ + CONFIG.pf0_bar1_type {Memory} \ + CONFIG.pf0_bar1_scale {Megabytes} \ + CONFIG.pf0_bar1_size {2} \ + CONFIG.pf0_dev_cap_max_payload {512_bytes} \ + CONFIG.extended_tag_field {true} \ + CONFIG.pf1_bar0_size {4} \ + CONFIG.pf1_bar1_enabled {true} \ + CONFIG.pf1_bar1_type {Memory} \ + CONFIG.pf1_bar1_scale {Megabytes} \ + CONFIG.pf1_bar1_size {2} \ + CONFIG.axisten_if_width {512_bit} \ + CONFIG.pf2_bar0_size {4} \ + CONFIG.pf2_bar1_enabled {true} \ + CONFIG.pf2_bar1_type {Memory} \ + CONFIG.pf1_bar1_scale {Megabytes} \ + CONFIG.pf1_bar1_size {2} \ + CONFIG.pf3_bar0_size {4} \ + CONFIG.pf3_bar1_enabled {true} \ + CONFIG.pf3_bar1_type {Memory} \ + CONFIG.pf3_bar1_scale {Megabytes} \ + CONFIG.pf3_bar1_size {2} \ + CONFIG.mode_selection {Advanced} \ + CONFIG.coreclk_freq {500} \ + CONFIG.plltype {QPLL1} \ + CONFIG.axisten_freq {250}] [get_ips pcie4_uscale_plus_0] \ No newline at end of file diff --git a/backend/listVlogFiles.tcl b/backend/listVlogFiles.tcl new file mode 100644 index 0000000..7e30e6f --- /dev/null +++ b/backend/listVlogFiles.tcl @@ -0,0 +1,223 @@ +#!/bin/sh + +# \ +exec $BLUESPECDIR/bin/bluetcl "$0" "$@" + +package require utils + +proc usage {} { + puts "" + puts "usage: $::argv0 top_package_name top_module" + puts "Options:" + puts " -q Do not print section headers" + puts " -p Bluespec search path" + puts " -bdir Bluespec bdir directory" + puts " -vdir Bluespec vdir directory" + puts " -generated Print synthesized BSV modules" + puts " -primitives Print Bluespec primitive modules" + puts " -imported Print imported modules" + puts " -no-inline-fns Print modules for no-inline functions" + puts " -all Alias for -generated -primitives -imported -no-inline-fns" + puts "" + puts " e.g: -bdir build -p build:+ -vdir rtl mkTop fpga_a" +} + +set boolOptions [list -- -q -generated -primitives -imported -no-inline-fns -all] +set valOptions [list -p -bdir -vdir] + +if { [catch [list ::utils::scanOptions $boolOptions $valOptions true OPT "$argv"] opts] } { + puts stderr $opts + usage + exit 1 +} + +if {[llength $opts] == 0} { + puts stderr "A package name argument is required" + usage + exit 1 +} + +if {[llength $opts] == 1} { + puts stderr "A top module name is required" + usage + exit 1 +} + +if {[llength $opts] > 2} { + puts stderr "Too many arguments" + usage + exit 1 +} + +if { [info exists OPT(-p)] } { + Bluetcl::flags set -p $OPT(-p) +} +if { [info exists OPT(-bdir)] } { + Bluetcl::flags set -bdir $OPT(-bdir) +} +if { [info exists OPT(-vdir)] } { + Bluetcl::flags set -vdir $OPT(-vdir) +} + +if {![info exists OPT(-all)] && ![info exists OPT(-generated)] && + ![info exists OPT(-no-inline-fns)] && ![info exists OPT(-primitives)] && + ![info exists OPT(-imported)]} { + set OPT(-all) 1 +} + +set top_pkg [lindex $opts 0] +set top_mod [lindex $opts 1] + +# Assume -verilog +Bluetcl::flags set -verilog + +# Load the module information +Bluetcl::module load $top_pkg + +# Walk the hierarchy extracting module information +set mods_to_process [list $top_pkg] +set already_done [list] +set is_noinline 0 +while {[llength $mods_to_process] > 0} { + set this_mod [utils::head $mods_to_process] + set mods_to_process [utils::tail $mods_to_process] + set res [Bluetcl::module submods $this_mod] + set this_mod_type [lindex $res 0] + if {$this_mod_type == "user" && $is_noinline != 0} { + set this_mod_type "no-inline-fn" + } + array set mod_info [list $this_mod $this_mod_type] + lappend already_done $this_mod + set sub_mods [lindex $res 1] + set no_inlines [lindex $res 2] + foreach mod $sub_mods { + set this_sub_mod [utils::snd $mod] + if {[lsearch -exact $already_done $this_sub_mod] == -1 && + [lsearch -exact $mods_to_process $this_sub_mod] == -1 } { + lappend mods_to_process $this_sub_mod + } + } + set is_noinline 1 + foreach mod $no_inlines { + set this_sub_mod [utils::snd $mod] + if {[lsearch -exact $already_done $this_sub_mod] == -1 && + [lsearch -exact $mods_to_process $this_sub_mod] == -1 } { + lappend mods_to_process $this_sub_mod + } + } + set is_noinline 0 +} + +# Procedure to locate a file for a given module +proc lookupfile {name path exts} { + foreach dir $path { + foreach ext $exts { + set fname [join [list $name $ext] "."] + set fpath [file join $dir $fname] + if {[file exist $fpath]} { + return [file normalize $fpath] + } + } + } + return "" +} + +# Procedure to add a file to a list, avoiding duplication +proc addfile {name flName} { + upvar 1 $flName file_list + + set matched 0 + foreach f $file_list { + if {$f == $name} { + set matched 1 + break + } + } + if {$matched == 0} { + lappend file_list $name + } +} + +# Identify the location of each module's file +set user_mods [list] +set noinline_fns [list] +set primitives [list] +set imported [list] + +set vdir [lindex [Bluetcl::flags show vdir] 1] +set bsdir $::env(BLUESPECDIR) + +set libs [list [file join $bsdir "Verilog"] [file join $bsdir "Libraries"]] +set vsearch [split [lindex [Bluetcl::flags show p] 1] ":"] +set vdir_and_libs [concat $vdir $libs] +set vsearch_and_libs [concat $vsearch $libs] + +foreach mod [array names mod_info] { + set mod_type $mod_info($mod) + + # The Probe primitive has no associated Verilog module + if {$mod_type == "primitive" && $mod == "Probe"} { + continue + } + + # Add the module info to the correct list + switch -exact $mod_type { + "user" {addfile [lookupfile $mod $vdir_and_libs {v}] user_mods} + "no-inline-fn" {addfile [lookupfile $mod $vdir {v}] noinline_fns} + "primitive" {addfile [lookupfile $mod $libs {v}] primitives} + "import" {addfile [lookupfile $mod $vsearch_and_libs {v vhd vhdl}] imported} + } + + # Some primitives use other primitives + if {$mod_type == "primitive"} { + switch -exact $mod { + "MakeReset" {addfile [lookupfile "SyncReset" $libs {v}] primitives} + "MakeResetA" {addfile [lookupfile "SyncResetA" $libs {v}] primitives} + "SyncFIFOLevel" {addfile [lookupfile "ClockGen" $libs {v}] primitives + addfile [lookupfile "SyncHandshake" $libs {v}] primitives + } + "SyncFIFO" {addfile [lookupfile "ClockGen" $libs {v}] primitives} + "SyncRegister " {addfile [lookupfile "ClockGen" $libs {v}] primitives + addfile [lookupfile "SyncHandshake" $libs {v}] primitives + } + } + } +} + +if {[llength $user_mods] > 0 && ([info exists OPT(-generated)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# Synthesized user modules:" + } + foreach file $user_mods { + puts $file + } +} + +if {[llength $noinline_fns] > 0 && ([info exists OPT(-no-inline-fns)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# No-inlined functions:" + } + foreach file $noinline_fns { + puts $file + } +} + +if {[llength $imported] > 0 && ([info exists OPT(-imported)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# Imported modules:" + } + foreach file $imported { + puts $file + } +} + +if {[llength $primitives] > 0 && ([info exists OPT(-primitives)] || [info exists OPT(-all)])} { + if {![info exists OPT(-q)]} { + puts "# Bluespec library primitives:" + } + foreach file $primitives { + puts $file + } +} + +exit \ No newline at end of file diff --git a/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.tcl b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.tcl new file mode 100644 index 0000000..2143d31 --- /dev/null +++ b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.tcl @@ -0,0 +1,18 @@ +set dir_ooc_out ${dir_output}/ooc/${ooc_top} +file mkdir $dir_ooc_out + +read_verilog [ glob $dir_bsv_gen/*.v ] +read_xdc ${dir_ooc_scripts}/bsv_ooc_module_common.xdc -mode out_of_context +synth_design -top $ooc_top -mode out_of_context -flatten_hierarchy none +write_checkpoint -force ${dir_ooc_out}/${ooc_top}.dcp + + + +report_timing_summary -report_unconstrained -warn_on_violation -file $dir_ooc_out/post_synth_timing_summary.rpt +check_timing -override_defaults no_clock -file $dir_ooc_out/post_synth_check_timing.rpt +report_design_analysis -logic_level_dist_paths $max_net_path_num -logic_level_distribution -file $dir_ooc_out/post_synth_design_logic_level_dist.rpt +xilinx::designutils::report_failfast -max_paths $max_net_path_num -detailed_reports synth -file $dir_ooc_out/post_synth_failfast.rpt +report_drc -file $dir_ooc_out/post_synth_drc.rpt +report_methodology -file $dir_ooc_out/post_synth_methodology.rpt +report_timing -max $max_net_path_num -slack_less_than 0 -file $dir_ooc_out/post_synth_timing.rpt +report_utilization -file $dir_ooc_out/post_synth_util.rpt; # -cells -pblocks \ No newline at end of file diff --git a/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.xdc b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.xdc new file mode 100644 index 0000000..a8e37d7 --- /dev/null +++ b/backend/ooc_tcl_and_xdc/bsv_ooc_module_common.xdc @@ -0,0 +1 @@ +create_clock -name bsv_clk -period 4 [get_ports CLK] \ No newline at end of file diff --git a/backend/u200/top.v b/backend/u200/top.v new file mode 100644 index 0000000..42d39fa --- /dev/null +++ b/backend/u200/top.v @@ -0,0 +1,705 @@ +//----------------------------------------------------------------------------- +// +// (c) Copyright 2012-2012 Xilinx, Inc. All rights reserved. +// +// This file contains confidential and proprietary information +// of Xilinx, Inc. and is protected under U.S. and +// international copyright and other intellectual property +// laws. +// +// DISCLAIMER +// This disclaimer is not a license and does not grant any +// rights to the materials distributed herewith. Except as +// otherwise provided in a valid license issued to you by +// Xilinx, and to the maximum extent permitted by applicable +// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND +// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES +// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING +// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON- +// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and +// (2) Xilinx shall not be liable (whether in contract or tort, +// including negligence, or under any other theory of +// liability) for any loss or damage of any kind or nature +// related to, arising under or in connection with these +// materials, including for any direct, or any indirect, +// special, incidental, or consequential loss or damage +// (including loss of data, profits, goodwill, or any type of +// loss or damage suffered as a result of any action brought +// by a third party) even if such damage or loss was +// reasonably foreseeable or Xilinx had been advised of the +// possibility of the same. +// +// CRITICAL APPLICATIONS +// Xilinx products are not designed or intended to be fail- +// safe, or for use in any application requiring fail-safe +// performance, such as life-support or safety devices or +// systems, Class III medical devices, nuclear facilities, +// applications related to the deployment of airbags, or any +// other applications that could lead to death, personal +// injury, or severe property or environmental damage +// (individually and collectively, "Critical +// Applications"). Customer assumes the sole risk and +// liability of any use of Xilinx products in Critical +// Applications, subject only to applicable laws and +// regulations governing limitations on product liability. +// +// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS +// PART OF THIS FILE AT ALL TIMES. +// +//----------------------------------------------------------------------------- +// +// Project : UltraScale+ FPGA PCI Express v4.0 Integrated Block +// File : xilinx_pcie4_uscale_ep.v +// Version : 1.3 +//----------------------------------------------------------------------------- +//-- +//-- Description: PCI Express Endpoint example FPGA design +//-- +//------------------------------------------------------------------------------ +`define PCIE4_NEW_PINS 1 +`timescale 1ps / 1ps + +(* DowngradeIPIdentifiedWarnings = "yes" *) +module top # ( +/* +*/ + parameter [4:0] PL_LINK_CAP_MAX_LINK_WIDTH = 16, // 1- X1, 2 - X2, 4 - X4, 8 - X8, 16 - X16 + parameter C_DATA_WIDTH = 512, // RX/TX interface data width + parameter AXISTEN_IF_MC_RX_STRADDLE = 1, + parameter PL_LINK_CAP_MAX_LINK_SPEED = 4, // 1- GEN1, 2 - GEN2, 4 - GEN3, 8 - GEN4 + parameter KEEP_WIDTH = C_DATA_WIDTH / 32, + parameter EXT_PIPE_SIM = "FALSE", // This Parameter has effect on selecting Enable External PIPE Interface in GUI. + parameter AXISTEN_IF_CC_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_CQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RC_ALIGNMENT_MODE = "FALSE", + parameter AXI4_CQ_TUSER_WIDTH = 183, + parameter AXI4_CC_TUSER_WIDTH = 81, + parameter AXI4_RQ_TUSER_WIDTH = 137, + parameter AXI4_RC_TUSER_WIDTH = 161, + parameter AXISTEN_IF_ENABLE_CLIENT_TAG = 1, + parameter RQ_AVAIL_TAG_IDX = 8, + parameter RQ_AVAIL_TAG = 256, + parameter AXISTEN_IF_RQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_CC_PARITY_CHECK = 0, + parameter AXISTEN_IF_RC_PARITY_CHECK = 0, + parameter AXISTEN_IF_CQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_ENABLE_RX_MSG_INTFC = "FALSE", + parameter [17:0] AXISTEN_IF_ENABLE_MSG_ROUTE = 18'h2FFFF +) ( + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txp, + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txn, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxp, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxn, + + + + + output led_0, + output led_1, + output led_2, + + // Board LED Logic IO + // 300 MHz clock for the board + input wire clk_300MHz_p, + input wire clk_300MHz_n, + + input sys_clk_p, + input sys_clk_n, + + input sys_rst_n +); + + // Local Parameters derived from user selection + localparam TCQ = 1; + + wire user_lnk_up; + wire phy_rdy_out; + + + //----------------------------------------------------------------------------------------------------------------// + // AXI Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire user_clk; + wire user_reset; + + wire s_axis_rq_tlast; + wire [C_DATA_WIDTH-1:0] s_axis_rq_tdata; + wire [AXI4_RQ_TUSER_WIDTH-1:0] s_axis_rq_tuser; + wire [KEEP_WIDTH-1:0] s_axis_rq_tkeep; + wire [3:0] s_axis_rq_tready; + wire s_axis_rq_tvalid; + + wire [C_DATA_WIDTH-1:0] m_axis_rc_tdata; + wire [AXI4_RC_TUSER_WIDTH-1:0] m_axis_rc_tuser; + wire m_axis_rc_tlast; + wire [KEEP_WIDTH-1:0] m_axis_rc_tkeep; + wire m_axis_rc_tvalid; + wire m_axis_rc_tready; + + wire [C_DATA_WIDTH-1:0] m_axis_cq_tdata; + wire [AXI4_CQ_TUSER_WIDTH-1:0] m_axis_cq_tuser; + wire m_axis_cq_tlast; + wire [KEEP_WIDTH-1:0] m_axis_cq_tkeep; + wire m_axis_cq_tvalid; + wire m_axis_cq_tready; + + wire [C_DATA_WIDTH-1:0] s_axis_cc_tdata; + wire [AXI4_CC_TUSER_WIDTH-1:0] s_axis_cc_tuser; + wire s_axis_cc_tlast; + wire [KEEP_WIDTH-1:0] s_axis_cc_tkeep; + wire s_axis_cc_tvalid; + wire [3:0] s_axis_cc_tready; + + wire [3:0] pcie_tfc_nph_av; + wire [3:0] pcie_tfc_npd_av; + //----------------------------------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire pcie_cq_np_req; + wire [5:0] pcie_cq_np_req_count; + wire [5:0] pcie_rq_seq_num0; + wire pcie_rq_seq_num_vld0; + wire [5:0] pcie_rq_seq_num1; + wire pcie_rq_seq_num_vld1; + + //----------------------------------------------------------------------------------------------------------------// + // EP and RP // + //----------------------------------------------------------------------------------------------------------------// + + wire cfg_phy_link_down; + wire [2:0] cfg_negotiated_width; + wire [1:0] cfg_current_speed; + wire [1:0] cfg_max_payload; + wire [2:0] cfg_max_read_req; + wire [15:0] cfg_function_status; + wire [11:0] cfg_function_power_state; + wire [503:0] cfg_vf_status; + wire [1:0] cfg_link_power_state; + + // Error Reporting Interface + wire cfg_err_cor_out; + wire cfg_err_nonfatal_out; + wire cfg_err_fatal_out; + + wire [5:0] cfg_ltssm_state; + wire [3:0] cfg_rcb_status; + wire [1:0] cfg_obff_enable; + wire cfg_pl_status_change; + + // Management Interface + wire [9:0] cfg_mgmt_addr; + wire cfg_mgmt_write; + wire [31:0] cfg_mgmt_write_data; + wire [3:0] cfg_mgmt_byte_enable; + wire cfg_mgmt_read; + wire [31:0] cfg_mgmt_read_data; + wire cfg_mgmt_read_write_done; + wire cfg_mgmt_type1_cfg_reg_access; + wire cfg_msg_received; + wire [7:0] cfg_msg_received_data; + wire [4:0] cfg_msg_received_type; + wire cfg_msg_transmit; + wire [2:0] cfg_msg_transmit_type; + wire [31:0] cfg_msg_transmit_data; + wire cfg_msg_transmit_done; + wire [7:0] cfg_fc_ph; + wire [11:0] cfg_fc_pd; + wire [7:0] cfg_fc_nph; + wire [11:0] cfg_fc_npd; + wire [7:0] cfg_fc_cplh; + wire [11:0] cfg_fc_cpld; + wire [2:0] cfg_fc_sel; + wire [2:0] cfg_per_func_status_control; + wire [3:0] cfg_per_function_number; + wire cfg_per_function_output_request; + + wire [63:0] cfg_dsn; + wire cfg_power_state_change_interrupt; + wire cfg_power_state_change_ack; + wire cfg_err_cor_in; + wire cfg_err_uncor_in; + + wire [3:0] cfg_flr_in_process; + wire [1:0] cfg_flr_done; + wire [251:0] cfg_vf_flr_in_process; + wire cfg_vf_flr_done; + wire [7:0] cfg_vf_flr_func_num; + + wire cfg_link_training_enable; + + //----------------------------------------------------------------------------------------------------------------// + // EP Only // + //----------------------------------------------------------------------------------------------------------------// + + // Interrupt Interface Signals + wire [3:0] cfg_interrupt_int; + wire [1:0] cfg_interrupt_pending; + wire cfg_interrupt_sent; + + wire [3:0] cfg_interrupt_msi_enable; + wire [11:0] cfg_interrupt_msi_mmenable; + wire cfg_interrupt_msi_mask_update; + wire [31:0] cfg_interrupt_msi_data; + wire [1:0] cfg_interrupt_msi_select; + wire [31:0] cfg_interrupt_msi_int; + wire [63:0] cfg_interrupt_msi_pending_status; + wire cfg_interrupt_msi_sent; + wire cfg_interrupt_msi_fail; + wire [2:0] cfg_interrupt_msi_attr; + wire cfg_interrupt_msi_tph_present; + wire [1:0] cfg_interrupt_msi_tph_type; + wire [7:0] cfg_interrupt_msi_tph_st_tag; + wire [7:0] cfg_interrupt_msi_function_number; + +// EP only + wire cfg_hot_reset_out; + wire cfg_config_space_enable; + wire cfg_req_pm_transition_l23_ready; + +// RP only + wire cfg_hot_reset_in; + + wire [7:0] cfg_ds_port_number; + wire [7:0] cfg_ds_bus_number; + wire [4:0] cfg_ds_device_number; + + //----------------------------------------------------------------------------------------------------------------// + // System(SYS) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire sys_clk; + wire sys_clk_gt; + wire sys_rst_n_c; + //----------------------------------------------------------------------------------------------------------------------- + + IBUF sys_reset_n_ibuf (.O(sys_rst_n_c), .I(sys_rst_n)); + + IBUFDS_GTE4 refclk_ibuf (.O(sys_clk_gt), .ODIV2(sys_clk), .I(sys_clk_p), .CEB(1'b0), .IB(sys_clk_n)); + + + + // assign LED outputs + assign led_0 = 0; + assign led_1 = 0; + assign led_2 = 0; + +//------------------------------------------------------------------------------------------------------------------// +// PCIe Core Top Level Wrapper // +//------------------------------------------------------------------------------------------------------------------// +// Core Top Level Wrapper + pcie4_uscale_plus_0 pcie4_uscale_plus_0_i ( + //---------------------------------------------------------------------------------------// + // PCI Express (pci_exp) Interface // + //---------------------------------------------------------------------------------------// + + // Tx + .pci_exp_txn ( pci_exp_txn ), + .pci_exp_txp ( pci_exp_txp ), + + // Rx + .pci_exp_rxn ( pci_exp_rxn ), + .pci_exp_rxp ( pci_exp_rxp ), + + //---------------------------------------------------------------------------------------// + // AXI Interface // + //---------------------------------------------------------------------------------------// + + .user_clk ( user_clk ), + .user_reset ( user_reset ), + .user_lnk_up ( user_lnk_up ), + .phy_rdy_out ( phy_rdy_out ), + + .s_axis_rq_tlast ( s_axis_rq_tlast ), + .s_axis_rq_tdata ( s_axis_rq_tdata ), + .s_axis_rq_tuser ( s_axis_rq_tuser ), + .s_axis_rq_tkeep ( s_axis_rq_tkeep ), + .s_axis_rq_tready ( s_axis_rq_tready ), + .s_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .m_axis_rc_tdata ( m_axis_rc_tdata ), + .m_axis_rc_tuser ( m_axis_rc_tuser ), + .m_axis_rc_tlast ( m_axis_rc_tlast ), + .m_axis_rc_tkeep ( m_axis_rc_tkeep ), + .m_axis_rc_tvalid ( m_axis_rc_tvalid ), + .m_axis_rc_tready ( m_axis_rc_tready ), + + .m_axis_cq_tdata ( m_axis_cq_tdata ), + .m_axis_cq_tuser ( m_axis_cq_tuser ), + .m_axis_cq_tlast ( m_axis_cq_tlast ), + .m_axis_cq_tkeep ( m_axis_cq_tkeep ), + .m_axis_cq_tvalid ( m_axis_cq_tvalid ), + .m_axis_cq_tready ( m_axis_cq_tready ), + + .s_axis_cc_tdata ( s_axis_cc_tdata ), + .s_axis_cc_tuser ( s_axis_cc_tuser ), + .s_axis_cc_tlast ( s_axis_cc_tlast ), + .s_axis_cc_tkeep ( s_axis_cc_tkeep ), + .s_axis_cc_tvalid ( s_axis_cc_tvalid ), + .s_axis_cc_tready ( s_axis_cc_tready ), + + + + //---------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //---------------------------------------------------------------------------------------// + .pcie_tfc_nph_av ( pcie_tfc_nph_av ), + .pcie_tfc_npd_av ( pcie_tfc_npd_av ), + + .pcie_rq_seq_num0 ( pcie_rq_seq_num0 ) , + .pcie_rq_seq_num_vld0 ( pcie_rq_seq_num_vld0 ) , + .pcie_rq_seq_num1 ( pcie_rq_seq_num1 ) , + .pcie_rq_seq_num_vld1 ( pcie_rq_seq_num_vld1 ) , + .pcie_rq_tag0 ( ) , + .pcie_rq_tag1 ( ) , + .pcie_rq_tag_av ( ) , + .pcie_rq_tag_vld0 ( ) , + .pcie_rq_tag_vld1 ( ) , + .pcie_cq_np_req ( {1'b1,pcie_cq_np_req} ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_phy_link_status ( ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status ), + .cfg_function_power_state ( cfg_function_power_state ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_vf_power_state ( ), + .cfg_link_power_state ( cfg_link_power_state ), + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), + + .cfg_local_error_out ( ), + .cfg_local_error_valid ( ), + + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rx_pm_state ( ), + .cfg_tx_pm_state ( ), + .cfg_rcb_status ( cfg_rcb_status ), + + .cfg_obff_enable ( cfg_obff_enable ), + .cfg_pl_status_change ( cfg_pl_status_change ), + + .cfg_tph_requester_enable ( ), + .cfg_tph_st_mode ( ), + .cfg_vf_tph_requester_enable ( ), + .cfg_vf_tph_st_mode ( ), + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), + .cfg_mgmt_debug_access (1'b0), + .cfg_mgmt_function_number (8'b0), + .cfg_pm_aspm_l1_entry_reject (1'b0), + .cfg_pm_aspm_tx_l0s_entry_disable (1'b1), + + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + + //-------------------------------------------------------------------------------// + // EP and RP // + //-------------------------------------------------------------------------------// + .cfg_bus_number ( ), + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process ), + .cfg_flr_done ( {2'b0,cfg_flr_done} ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_link_training_enable ( cfg_link_training_enable ), + // EP only + .cfg_hot_reset_out ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_in ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_vf_flr_func_num (cfg_vf_flr_func_num), + + //-------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------// + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( {2'b0,cfg_interrupt_pending} ), + .cfg_interrupt_sent ( cfg_interrupt_sent ), + + + + // MSI Interface + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status [31:0]), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_pending_status_function_num ( 2'b0), + .cfg_interrupt_msi_pending_status_data_enable ( 1'b0), + + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + + //--------------------------------------------------------------------------------------// + // System(SYS) Interface // + //--------------------------------------------------------------------------------------// + + .sys_clk ( sys_clk ), + .sys_clk_gt ( sys_clk_gt ), + .sys_reset ( sys_rst_n_c ) + ); + +//------------------------------------------------------------------------------------------------------------------// +// PIO Example Design Top Level // +//------------------------------------------------------------------------------------------------------------------// + mkRawTestDmaController dmac_i ( + .CLK ( user_clk ), + .RST_N ( ~user_reset ), + .user_lnk_up ( user_lnk_up ), + // .sys_rst ( sys_rst_n_c ), + + //-------------------------------------------------------------------------------------// + // AXI Interface // + //-------------------------------------------------------------------------------------// + + .m_axis_rq_tlast ( s_axis_rq_tlast ), + .m_axis_rq_tdata ( s_axis_rq_tdata ), + .m_axis_rq_tuser ( s_axis_rq_tuser ), + .m_axis_rq_tkeep ( s_axis_rq_tkeep ), + .m_axis_rq_tready ( s_axis_rq_tready[0] ), + .m_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .s_axis_rc_tdata ( m_axis_rc_tdata ), + .s_axis_rc_tuser ( m_axis_rc_tuser ), + .s_axis_rc_tlast ( m_axis_rc_tlast ), + .s_axis_rc_tkeep ( m_axis_rc_tkeep ), + .s_axis_rc_tvalid ( m_axis_rc_tvalid ), + .s_axis_rc_tready ( m_axis_rc_tready ), + + .s_axis_cq_tdata ( m_axis_cq_tdata ), + .s_axis_cq_tuser ( m_axis_cq_tuser ), + .s_axis_cq_tlast ( m_axis_cq_tlast ), + .s_axis_cq_tkeep ( m_axis_cq_tkeep ), + .s_axis_cq_tvalid ( m_axis_cq_tvalid ), + .s_axis_cq_tready ( m_axis_cq_tready ), + + .m_axis_cc_tdata ( s_axis_cc_tdata ), + .m_axis_cc_tuser ( s_axis_cc_tuser ), + .m_axis_cc_tlast ( s_axis_cc_tlast ), + .m_axis_cc_tkeep ( s_axis_cc_tkeep ), + .m_axis_cc_tvalid ( s_axis_cc_tvalid ), + .m_axis_cc_tready ( s_axis_cc_tready[0] ), + + + // .pcie_rq_seq_num ( 'h0), + // .pcie_rq_seq_num_vld ( 'h0), + // .pcie_rq_tag ( 'h0), + // .pcie_rq_tag_vld ( 'h0), + .pcie_tfc_nph_av ( pcie_tfc_nph_av[1:0]), + .pcie_tfc_npd_av ( pcie_tfc_npd_av[1:0]), + .pcie_cq_np_req ( pcie_cq_np_req ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + + //--------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //--------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------// + // EP and RP // + //--------------------------------------------------------------------------------// + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status [7:0] ), + .cfg_function_power_state ( cfg_function_power_state [5:0] ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_link_power_state ( cfg_link_power_state ), + + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), +// .cfg_ltr_enable ( 1'b0 ), + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rcb_status ( cfg_rcb_status [1:0]), + .cfg_obff_enable ( cfg_obff_enable ), +// .cfg_pl_status_change ( cfg_pl_status_change ), + + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), +// .cfg_mgmt_type1_cfg_reg_access ( cfg_mgmt_type1_cfg_reg_access ), + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + +// .cfg_per_func_status_control ( cfg_per_func_status_control ), +// .cfg_per_function_number ( cfg_per_function_number ), +// .cfg_per_function_output_request ( cfg_per_function_output_request ), + + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process [1:0] ), + .cfg_flr_done ( cfg_flr_done ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_vf_flr_func_num ( cfg_vf_flr_func_num ), + + .cfg_link_training_enable ( cfg_link_training_enable ), + + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_hot_reset_in ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_out ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_function_number ( ), + + //-------------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------------// + + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable[0] ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable[5:0] ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status ), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( cfg_interrupt_pending ), + .cfg_interrupt_sent ( cfg_interrupt_sent ) + + //------------------------------------------------------------------------------------// + // DMA IFC + //------------------------------------------------------------------------------------// +// .s_axis_c2h_0_tvalid (0), +// .s_axis_c2h_0_tdata (0), +// .s_axis_c2h_0_tkeep (0), +// .s_axis_c2h_0_tlast (0), +// .s_axis_c2h_0_tuser (0), +// .s_axis_c2h_0_tready ( ), + +// .s_desc_c2h_0_valid (0), +// .s_desc_c2h_0_start_addr (0), +// .s_desc_c2h_0_byte_cnt (0), +// .s_desc_c2h_0_is_write (0), +// .s_desc_c2h_0_ready ( ), + +// .m_axis_c2h_0_tvalid ( ), +// .m_axis_c2h_0_tdata ( ), +// .m_axis_c2h_0_tkeep ( ), +// .m_axis_c2h_0_tlast ( ), +// .m_axis_c2h_0_tuser ( ), +// .m_axis_c2h_0_tready (0), + +// .s_axis_c2h_1_tvalid (0), +// .s_axis_c2h_1_tdata (0), +// .s_axis_c2h_1_tkeep (0), +// .s_axis_c2h_1_tlast (0), +// .s_axis_c2h_1_tuser (0), +// .s_axis_c2h_1_tready ( ), + +// .s_desc_c2h_1_valid (0), +// .s_desc_c2h_1_start_addr (0), +// .s_desc_c2h_1_byte_cnt (0), +// .s_desc_c2h_1_is_write (0), +// .s_desc_c2h_1_ready ( ), + +// .m_axis_c2h_1_tvalid ( ), +// .m_axis_c2h_1_tdata ( ), +// .m_axis_c2h_1_tkeep ( ), +// .m_axis_c2h_1_tlast ( ), +// .m_axis_c2h_1_tuser ( ), +// .m_axis_c2h_1_tready (0), + +// .s_h2c_value_valid (0), +// .s_h2c_value_data (0), +// .s_h2c_value_ready ( ), + +// .m_h2c_value_address ( ), +// .m_h2c_value_is_write ( ), +// .m_h2c_value_valid ( ), +// .m_h2c_value_ready (0), + +// .m_h2c_desc_data ( ), +// .m_h2c_desc_valid ( ), +// .m_h2c_desc_ready (0) + ); + +endmodule diff --git a/backend/u200/xdc/u200_pcie.xdc b/backend/u200/xdc/u200_pcie.xdc new file mode 100644 index 0000000..cbe0c05 --- /dev/null +++ b/backend/u200/xdc/u200_pcie.xdc @@ -0,0 +1,162 @@ +##----------------------------------------------------------------------------- +## +## (c) Copyright 2012-2012 Xilinx, Inc. All rights reserved. +## +## This file contains confidential and proprietary information +## of Xilinx, Inc. and is protected under U.S. and +## international copyright and other intellectual property +## laws. +## +## DISCLAIMER +## This disclaimer is not a license and does not grant any +## rights to the materials distributed herewith. Except as +## otherwise provided in a valid license issued to you by +## Xilinx, and to the maximum extent permitted by applicable +## law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND +## WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES +## AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING +## BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON- +## INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and +## (2) Xilinx shall not be liable (whether in contract or tort, +## including negligence, or under any other theory of +## liability) for any loss or damage of any kind or nature +## related to, arising under or in connection with these +## materials, including for any direct, or any indirect, +## special, incidental, or consequential loss or damage +## (including loss of data, profits, goodwill, or any type of +## loss or damage suffered as a result of any action brought +## by a third party) even if such damage or loss was +## reasonably foreseeable or Xilinx had been advised of the +## possibility of the same. +## +## CRITICAL APPLICATIONS +## Xilinx products are not designed or intended to be fail- +## safe, or for use in any application requiring fail-safe +## performance, such as life-support or safety devices or +## systems, Class III medical devices, nuclear facilities, +## applications related to the deployment of airbags, or any +## other applications that could lead to death, personal +## injury, or severe property or environmental damage +## (individually and collectively, "Critical +## Applications"). Customer assumes the sole risk and +## liability of any use of Xilinx products in Critical +## Applications, subject only to applicable laws and +## regulations governing limitations on product liability. +## +## THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS +## PART OF THIS FILE AT ALL TIMES. +## +##----------------------------------------------------------------------------- +## +## Project : UltraScale+ FPGA PCI Express v4.0 Integrated Block +## File : xilinx_pcie4_uscale_plus_x1y2.xdc +## Version : 1.3 +##----------------------------------------------------------------------------- +# +############################################################################### +# Vivado - PCIe GUI / User Configuration +############################################################################### +# +# Family # virtexuplus +# Part # xcu200 +# Package # fsgd2104 +# Speed grade # -2 +# PCIe Block # X1Y2 +# Xilinx BNo # 15 +# +# Link Speed # Gen3 - 8.0 Gb/s +# Link Width # X16 +# AXIST Width # 512-bit +# AXIST Frequ # 250 MHz = User Clock +# Core Clock # 500 MHz +# Pipe Clock # 125 MHz (Gen1) : 250 MHz (Gen2/Gen3/Gen4) +# PLL TYPE # QPLL1 +# MSI-X TYPE # HARD +# +# master_gt_quad_inx # 3 +# master_gt_container # 32 +# gt_type # gtye4 +# +# Xilinx Reference Board is AU200 +# +############################################################################### +# User Time Names / User Time Groups / Time Specs +############################################################################### +create_clock -name sys_clk -period 10 [get_ports sys_clk_p] +# +#set_false_path -from [get_ports sys_rst_n] +#set_property PULLUP true [get_ports sys_rst_n] + +set_property IOSTANDARD POD12 [get_ports sys_rst_n] + +set_property PACKAGE_PIN BD21 [get_ports sys_rst_n] + +# +set_property PACKAGE_PIN AM10 [get_ports sys_clk_n] +set_property PACKAGE_PIN AM11 [get_ports sys_clk_p] +# + +# LEDs for ZCU117 +set_property PACKAGE_PIN BC21 [get_ports led_0] +# sys_reset +set_property PACKAGE_PIN BB21 [get_ports led_1] +# user_link_up +set_property PACKAGE_PIN BA20 [get_ports led_2] +# + +set_property IOSTANDARD LVCMOS12 [get_ports led_0] +set_property IOSTANDARD LVCMOS12 [get_ports led_1] +set_property IOSTANDARD LVCMOS12 [get_ports led_2] +# +set_property DRIVE 8 [get_ports led_0] +set_property DRIVE 8 [get_ports led_1] +set_property DRIVE 8 [get_ports led_2] + +# +# +# Clock for the 300 MHz clock is already created in the Clock Wizard IP. +# 300 MHz clock pin constraints. +set_property IOSTANDARD DIFF_SSTL12 [get_ports clk_300MHz_p] +set_property IOSTANDARD DIFF_SSTL12 [get_ports clk_300MHz_n] +set_property PACKAGE_PIN AY37 [get_ports clk_300MHz_p] +set_property PACKAGE_PIN AY38 [get_ports clk_300MHz_n] +# +# +# CLOCK_ROOT LOCKing to Reduce CLOCK SKEW +# Add/Edit Clock Routing Option to improve clock path skew +# +# BITFILE/BITSTREAM compress options +# ############################################################################## +# Flash Programming Example Settings: These should be modified to match the target board. +# ############################################################################## +# +# +# sys_clk vs TXOUTCLK +set_clock_groups -name async18 -asynchronous -group [get_clocks {sys_clk}] -group [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ *gen_channel_container[32].*gen_gtye4_channel_inst[3].GTYE4_CHANNEL_PRIM_INST/TXOUTCLK}]] +set_clock_groups -name async19 -asynchronous -group [get_clocks -of_objects [get_pins -hierarchical -filter {NAME =~ *gen_channel_container[32].*gen_gtye4_channel_inst[3].GTYE4_CHANNEL_PRIM_INST/TXOUTCLK}]] -group [get_clocks {sys_clk}] +# +# +# +# +# +# +# ASYNC CLOCK GROUPINGS +# sys_clk vs user_clk +set_clock_groups -name async5 -asynchronous -group [get_clocks {sys_clk}] -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_userclk/O]] +set_clock_groups -name async6 -asynchronous -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_userclk/O]] -group [get_clocks {sys_clk}] +# sys_clk vs pclk +set_clock_groups -name async1 -asynchronous -group [get_clocks {sys_clk}] -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_pclk/O]] +set_clock_groups -name async2 -asynchronous -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_pclk/O]] -group [get_clocks {sys_clk}] +# +# +# +# Add/Edit Pblock slice constraints for 512b soft logic to improve timing +#create_pblock soft_512b; add_cells_to_pblock [get_pblocks soft_512b] [get_cells {pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_pcie_4_0_pipe_inst/pcie_4_0_init_ctrl_inst pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_pcie_4_0_pipe_inst/pcie4_0_512b_intfc_mod}] +# Keep This Logic Left/Right Side Of The PCIe Block (Whichever is near to the FPGA Boundary) +#resize_pblock [get_pblocks soft_512b] -add {SLICE_X157Y300:SLICE_X168Y370} +#set_property EXCLUDE_PLACEMENT 1 [get_pblocks soft_512b] +# +set_clock_groups -name async24 -asynchronous -group [get_clocks -of_objects [get_pins pcie4_uscale_plus_0_i/inst/pcie4_uscale_plus_0_gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_intclk/O]] -group [get_clocks {sys_clk}] +# +#create_waiver -type METHODOLOGY -id {LUTAR-1} -user "pcie4_uscale_plus" -desc "user link up is synchroized in the user clk so it is safe to ignore" -internal -scoped -tags 1024539 -objects [get_cells { pcie_app_uscale_i/PIO_i/len_i[5]_i_4 }] -objects [get_pins { pcie4_uscale_plus_0_i/inst/user_lnk_up_cdc/arststages_ff_reg[0]/CLR pcie4_uscale_plus_0_i/inst/user_lnk_up_cdc/arststages_ff_reg[1]/CLR }] + diff --git a/backend/vu13p/top.v b/backend/vu13p/top.v new file mode 100644 index 0000000..aae71ea --- /dev/null +++ b/backend/vu13p/top.v @@ -0,0 +1,643 @@ +`timescale 1ps / 1ps +`define ENABLE_CMAC_RS_FEC + +module top#( + parameter [4:0] PL_LINK_CAP_MAX_LINK_WIDTH = 16, // 1- X1, 2 - X2, 4 - X4, 8 - X8, 16 - X16 + parameter C_DATA_WIDTH = 512, // RX/TX interface data width + parameter AXISTEN_IF_MC_RX_STRADDLE = 1, + parameter PL_LINK_CAP_MAX_LINK_SPEED = 4, // 1- GEN1, 2 - GEN2, 4 - GEN3, 8 - GEN4 + parameter KEEP_WIDTH = C_DATA_WIDTH / 32, + parameter EXT_PIPE_SIM = "FALSE", // This Parameter has effect on selecting Enable External PIPE Interface in GUI. + parameter AXISTEN_IF_CC_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_CQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RQ_ALIGNMENT_MODE = "FALSE", + parameter AXISTEN_IF_RC_ALIGNMENT_MODE = "FALSE", + parameter AXI4_CQ_TUSER_WIDTH = 183, + parameter AXI4_CC_TUSER_WIDTH = 81, + parameter AXI4_RQ_TUSER_WIDTH = 137, + parameter AXI4_RC_TUSER_WIDTH = 161, + parameter AXISTEN_IF_ENABLE_CLIENT_TAG = 0, + parameter RQ_AVAIL_TAG_IDX = 8, + parameter RQ_AVAIL_TAG = 256, + parameter AXISTEN_IF_RQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_CC_PARITY_CHECK = 0, + parameter AXISTEN_IF_RC_PARITY_CHECK = 0, + parameter AXISTEN_IF_CQ_PARITY_CHECK = 0, + parameter AXISTEN_IF_ENABLE_RX_MSG_INTFC = "FALSE", + parameter [17:0] AXISTEN_IF_ENABLE_MSG_ROUTE = 18'h2FFFF + +)( + // PCIe and XDMA + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txp, + output [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_txn, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxp, + input [(PL_LINK_CAP_MAX_LINK_WIDTH - 1) : 0] pci_exp_rxn, + + input sys_clk_p, + input sys_clk_n, + input sys_rst_n, + + input board_sys_clk_n, + input board_sys_clk_p +); + + + wire user_lnk_up; + + //----------------------------------------------------------------------------------------------------------------// + // AXI Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire user_clk; + wire user_reset; + + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_rq_tlast; + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] s_axis_rq_tdata; + (*mark_debug, mark_debug_clock="user_clk" *)wire [AXI4_RQ_TUSER_WIDTH-1:0] s_axis_rq_tuser; + wire [KEEP_WIDTH-1:0] s_axis_rq_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire [3:0] s_axis_rq_tready; + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_rq_tvalid; + + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] m_axis_rc_tdata; + (*mark_debug, mark_debug_clock="user_clk" *)wire [AXI4_RC_TUSER_WIDTH-1:0] m_axis_rc_tuser; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_rc_tlast; + wire [KEEP_WIDTH-1:0] m_axis_rc_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_rc_tvalid; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_rc_tready; + + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] m_axis_cq_tdata; + (*mark_debug, mark_debug_clock="user_clk" *)wire [AXI4_CQ_TUSER_WIDTH-1:0] m_axis_cq_tuser; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_cq_tlast; + wire [KEEP_WIDTH-1:0] m_axis_cq_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_cq_tvalid; + (*mark_debug, mark_debug_clock="user_clk" *)wire m_axis_cq_tready; + + (*mark_debug, mark_debug_clock="user_clk" *)wire [C_DATA_WIDTH-1:0] s_axis_cc_tdata; + (*mark_debug, mark_debug_clock="user_clk" *)wire [AXI4_CC_TUSER_WIDTH-1:0] s_axis_cc_tuser; + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_cc_tlast; + wire [KEEP_WIDTH-1:0] s_axis_cc_tkeep; + (*mark_debug, mark_debug_clock="user_clk" *)wire s_axis_cc_tvalid; + (*mark_debug, mark_debug_clock="user_clk" *)wire [3:0] s_axis_cc_tready; + + (*mark_debug, mark_debug_clock="user_clk" *)wire [3:0] pcie_tfc_nph_av; + (*mark_debug, mark_debug_clock="user_clk" *)wire [3:0] pcie_tfc_npd_av; + //----------------------------------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire pcie_cq_np_req; + wire [5:0] pcie_cq_np_req_count; + wire [5:0] pcie_rq_seq_num0; + wire pcie_rq_seq_num_vld0; + wire [5:0] pcie_rq_seq_num1; + wire pcie_rq_seq_num_vld1; + + //----------------------------------------------------------------------------------------------------------------// + // EP and RP // + //----------------------------------------------------------------------------------------------------------------// + + wire cfg_phy_link_down; + wire [2:0] cfg_negotiated_width; + wire [1:0] cfg_current_speed; + wire [1:0] cfg_max_payload; + wire [2:0] cfg_max_read_req; + wire [15:0] cfg_function_status; + wire [11:0] cfg_function_power_state; + wire [503:0] cfg_vf_status; + wire [1:0] cfg_link_power_state; + + // Error Reporting Interface + wire cfg_err_cor_out; + wire cfg_err_nonfatal_out; + wire cfg_err_fatal_out; + (*mark_debug, mark_debug_clock="user_clk" *)wire [4:0] cfg_local_error_out; + (*mark_debug, mark_debug_clock="user_clk" *)wire cfg_local_error_valid; + + wire [5:0] cfg_ltssm_state; + wire [3:0] cfg_rcb_status; + wire [1:0] cfg_obff_enable; + wire cfg_pl_status_change; + + // Management Interface + wire [9:0] cfg_mgmt_addr; + wire cfg_mgmt_write; + wire [31:0] cfg_mgmt_write_data; + wire [3:0] cfg_mgmt_byte_enable; + wire cfg_mgmt_read; + wire [31:0] cfg_mgmt_read_data; + wire cfg_mgmt_read_write_done; + wire cfg_mgmt_type1_cfg_reg_access; + wire cfg_msg_received; + wire [7:0] cfg_msg_received_data; + wire [4:0] cfg_msg_received_type; + wire cfg_msg_transmit; + wire [2:0] cfg_msg_transmit_type; + wire [31:0] cfg_msg_transmit_data; + wire cfg_msg_transmit_done; + (*mark_debug, mark_debug_clock="user_clk" *)wire [7:0] cfg_fc_ph; + (*mark_debug, mark_debug_clock="user_clk" *)wire [11:0] cfg_fc_pd; + (*mark_debug, mark_debug_clock="user_clk" *)wire [7:0] cfg_fc_nph; + (*mark_debug, mark_debug_clock="user_clk" *)wire [11:0] cfg_fc_npd; + (*mark_debug, mark_debug_clock="user_clk" *)wire [7:0] cfg_fc_cplh; + (*mark_debug, mark_debug_clock="user_clk" *)wire [11:0] cfg_fc_cpld; + (*mark_debug, mark_debug_clock="user_clk" *)wire [2:0] cfg_fc_sel; + wire [2:0] cfg_per_func_status_control; + wire [3:0] cfg_per_function_number; + wire cfg_per_function_output_request; + + wire [63:0] cfg_dsn; + wire cfg_power_state_change_interrupt; + wire cfg_power_state_change_ack; + wire cfg_err_cor_in; + wire cfg_err_uncor_in; + + wire [3:0] cfg_flr_in_process; + wire [1:0] cfg_flr_done; + wire [251:0] cfg_vf_flr_in_process; + wire cfg_vf_flr_done; + wire [7:0] cfg_vf_flr_func_num; + + wire cfg_link_training_enable; + + //----------------------------------------------------------------------------------------------------------------// + // EP Only // + //----------------------------------------------------------------------------------------------------------------// + + // Interrupt Interface Signals + wire [3:0] cfg_interrupt_int; + wire [1:0] cfg_interrupt_pending; + wire cfg_interrupt_sent; + + wire [3:0] cfg_interrupt_msi_enable; + wire [11:0] cfg_interrupt_msi_mmenable; + wire cfg_interrupt_msi_mask_update; + wire [31:0] cfg_interrupt_msi_data; + wire [1:0] cfg_interrupt_msi_select; + wire [31:0] cfg_interrupt_msi_int; + wire [63:0] cfg_interrupt_msi_pending_status; + wire cfg_interrupt_msi_sent; + wire cfg_interrupt_msi_fail; + wire [2:0] cfg_interrupt_msi_attr; + wire cfg_interrupt_msi_tph_present; + wire [1:0] cfg_interrupt_msi_tph_type; + wire [7:0] cfg_interrupt_msi_tph_st_tag; + wire [7:0] cfg_interrupt_msi_function_number; + +// EP only + wire cfg_hot_reset_out; + wire cfg_config_space_enable; + wire cfg_req_pm_transition_l23_ready; + +// RP only + wire cfg_hot_reset_in; + + wire [7:0] cfg_ds_port_number; + wire [7:0] cfg_ds_bus_number; + wire [4:0] cfg_ds_device_number; + + //----------------------------------------------------------------------------------------------------------------// + // System(SYS) Interface // + //----------------------------------------------------------------------------------------------------------------// + + wire sys_clk; + wire sys_clk_gt; + wire global_reset_100mhz_clk; + wire sys_rst_n_c; + + + wire [33 : 0] tlpSizeDebugPort; + wire RDY_tlpSizeDebugPort; + + + + // Ref clock buffer + IBUFDS_GTE4 # (.REFCLK_HROW_CK_SEL(2'b00)) refclk_ibuf (.O(sys_clk_gt), .ODIV2(sys_clk), .I(sys_clk_p), .CEB(1'b0), .IB(sys_clk_n)); + // Reset buffer + IBUF sys_reset_n_ibuf (.O(sys_rst_n_c), .I(sys_rst_n)); + + + IBUFDS IBUFDS_inst ( + .O(global_reset_100mhz_clk), // 1-bit output: Buffer output + .I(board_sys_clk_p), // 1-bit input: Diff_p buffer input (connect directly to top-level port) + .IB(board_sys_clk_n) // 1-bit input: Diff_n buffer input (connect directly to top-level port) + ); + + pcie4_uscale_plus_0 pcie4_uscale_plus_0_i ( + //---------------------------------------------------------------------------------------// + // PCI Express (pci_exp) Interface // + //---------------------------------------------------------------------------------------// + + // Tx + .pci_exp_txn ( pci_exp_txn ), + .pci_exp_txp ( pci_exp_txp ), + + // Rx + .pci_exp_rxn ( pci_exp_rxn ), + .pci_exp_rxp ( pci_exp_rxp ), + + //---------------------------------------------------------------------------------------// + // AXI Interface // + //---------------------------------------------------------------------------------------// + + .user_clk ( user_clk ), + .user_reset ( user_reset ), + .user_lnk_up ( user_lnk_up ), + // .phy_rdy_out ( phy_rdy_out ), + + .s_axis_rq_tlast ( s_axis_rq_tlast ), + .s_axis_rq_tdata ( s_axis_rq_tdata ), + .s_axis_rq_tuser ( s_axis_rq_tuser ), + .s_axis_rq_tkeep ( s_axis_rq_tkeep ), + .s_axis_rq_tready ( s_axis_rq_tready ), + .s_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .m_axis_rc_tdata ( m_axis_rc_tdata ), + .m_axis_rc_tuser ( m_axis_rc_tuser ), + .m_axis_rc_tlast ( m_axis_rc_tlast ), + .m_axis_rc_tkeep ( m_axis_rc_tkeep ), + .m_axis_rc_tvalid ( m_axis_rc_tvalid ), + .m_axis_rc_tready ( m_axis_rc_tready ), + + .m_axis_cq_tdata ( m_axis_cq_tdata ), + .m_axis_cq_tuser ( m_axis_cq_tuser ), + .m_axis_cq_tlast ( m_axis_cq_tlast ), + .m_axis_cq_tkeep ( m_axis_cq_tkeep ), + .m_axis_cq_tvalid ( m_axis_cq_tvalid ), + .m_axis_cq_tready ( m_axis_cq_tready ), + + .s_axis_cc_tdata ( s_axis_cc_tdata ), + .s_axis_cc_tuser ( s_axis_cc_tuser ), + .s_axis_cc_tlast ( s_axis_cc_tlast ), + .s_axis_cc_tkeep ( s_axis_cc_tkeep ), + .s_axis_cc_tvalid ( s_axis_cc_tvalid ), + .s_axis_cc_tready ( s_axis_cc_tready ), + + + + //---------------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //---------------------------------------------------------------------------------------// + .pcie_tfc_nph_av ( pcie_tfc_nph_av ), + .pcie_tfc_npd_av ( pcie_tfc_npd_av ), + + .pcie_rq_seq_num0 ( pcie_rq_seq_num0 ) , + .pcie_rq_seq_num_vld0 ( pcie_rq_seq_num_vld0 ) , + .pcie_rq_seq_num1 ( pcie_rq_seq_num1 ) , + .pcie_rq_seq_num_vld1 ( pcie_rq_seq_num_vld1 ) , + .pcie_rq_tag0 ( ) , + .pcie_rq_tag1 ( ) , + .pcie_rq_tag_av ( ) , + .pcie_rq_tag_vld0 ( ) , + .pcie_rq_tag_vld1 ( ) , + .pcie_cq_np_req ( {1'b1,pcie_cq_np_req} ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_phy_link_status ( ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status ), + .cfg_function_power_state ( cfg_function_power_state ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_vf_power_state ( ), + .cfg_link_power_state ( cfg_link_power_state ), + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), + + .cfg_local_error_out (cfg_local_error_out ), + .cfg_local_error_valid (cfg_local_error_valid ), + + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rx_pm_state ( ), + .cfg_tx_pm_state ( ), + .cfg_rcb_status ( cfg_rcb_status ), + + .cfg_obff_enable ( cfg_obff_enable ), + .cfg_pl_status_change ( cfg_pl_status_change ), + + .cfg_tph_requester_enable ( ), + .cfg_tph_st_mode ( ), + .cfg_vf_tph_requester_enable ( ), + .cfg_vf_tph_st_mode ( ), + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), + .cfg_mgmt_debug_access (1'b0), + .cfg_mgmt_function_number (8'b0), + .cfg_pm_aspm_l1_entry_reject (1'b0), + .cfg_pm_aspm_tx_l0s_entry_disable (1'b1), + + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + + //-------------------------------------------------------------------------------// + // EP and RP // + //-------------------------------------------------------------------------------// + .cfg_bus_number ( ), + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process ), + .cfg_flr_done ( {2'b0,cfg_flr_done} ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_link_training_enable ( cfg_link_training_enable ), + // EP only + .cfg_hot_reset_out ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_in ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_vf_flr_func_num (cfg_vf_flr_func_num), + + //-------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------// + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( {2'b0,cfg_interrupt_pending} ), + .cfg_interrupt_sent ( cfg_interrupt_sent ), + + + + // MSI Interface + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status [31:0]), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_pending_status_function_num ( 2'b0), + .cfg_interrupt_msi_pending_status_data_enable ( 1'b0), + + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + + //--------------------------------------------------------------------------------------// + // System(SYS) Interface // + //--------------------------------------------------------------------------------------// + + .sys_clk ( sys_clk ), + .sys_clk_gt ( sys_clk_gt ), + .sys_reset ( sys_rst_n_c ) + ); + +//------------------------------------------------------------------------------------------------------------------// +// PIO Example Design Top Level // +//------------------------------------------------------------------------------------------------------------------// + mkRawTestDmaController dmac_i ( + .CLK ( user_clk ), + .RST_N ( ~user_reset ), + .user_lnk_up ( user_lnk_up ), + // .sys_rst ( sys_rst_n_c ), + + //-------------------------------------------------------------------------------------// + // AXI Interface // + //-------------------------------------------------------------------------------------// + + .m_axis_rq_tlast ( s_axis_rq_tlast ), + .m_axis_rq_tdata ( s_axis_rq_tdata ), + .m_axis_rq_tuser ( s_axis_rq_tuser ), + .m_axis_rq_tkeep ( s_axis_rq_tkeep ), + .m_axis_rq_tready ( s_axis_rq_tready[0] ), + .m_axis_rq_tvalid ( s_axis_rq_tvalid ), + + .s_axis_rc_tdata ( m_axis_rc_tdata ), + .s_axis_rc_tuser ( m_axis_rc_tuser ), + .s_axis_rc_tlast ( m_axis_rc_tlast ), + .s_axis_rc_tkeep ( m_axis_rc_tkeep ), + .s_axis_rc_tvalid ( m_axis_rc_tvalid ), + .s_axis_rc_tready ( m_axis_rc_tready ), + + .s_axis_cq_tdata ( m_axis_cq_tdata ), + .s_axis_cq_tuser ( m_axis_cq_tuser ), + .s_axis_cq_tlast ( m_axis_cq_tlast ), + .s_axis_cq_tkeep ( m_axis_cq_tkeep ), + .s_axis_cq_tvalid ( m_axis_cq_tvalid ), + .s_axis_cq_tready ( m_axis_cq_tready ), + + .m_axis_cc_tdata ( s_axis_cc_tdata ), + .m_axis_cc_tuser ( s_axis_cc_tuser ), + .m_axis_cc_tlast ( s_axis_cc_tlast ), + .m_axis_cc_tkeep ( s_axis_cc_tkeep ), + .m_axis_cc_tvalid ( s_axis_cc_tvalid ), + .m_axis_cc_tready ( s_axis_cc_tready[0] ), + + + // .pcie_rq_seq_num ( 'h0), + // .pcie_rq_seq_num_vld ( 'h0), + // .pcie_rq_tag ( 'h0), + // .pcie_rq_tag_vld ( 'h0), + .pcie_tfc_nph_av ( pcie_tfc_nph_av[1:0]), + .pcie_tfc_npd_av ( pcie_tfc_npd_av[1:0]), + .pcie_cq_np_req ( pcie_cq_np_req ), + .pcie_cq_np_req_count ( pcie_cq_np_req_count ), + + //--------------------------------------------------------------------------------// + // Configuration (CFG) Interface // + //--------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------// + // EP and RP // + //--------------------------------------------------------------------------------// + .cfg_phy_link_down ( cfg_phy_link_down ), + .cfg_negotiated_width ( cfg_negotiated_width ), + .cfg_current_speed ( cfg_current_speed ), + .cfg_max_payload ( cfg_max_payload ), + .cfg_max_read_req ( cfg_max_read_req ), + .cfg_function_status ( cfg_function_status [7:0] ), + .cfg_function_power_state ( cfg_function_power_state [5:0] ), + .cfg_vf_status ( cfg_vf_status ), + .cfg_link_power_state ( cfg_link_power_state ), + + // Error Reporting Interface + .cfg_err_cor_out ( cfg_err_cor_out ), + .cfg_err_nonfatal_out ( cfg_err_nonfatal_out ), + .cfg_err_fatal_out ( cfg_err_fatal_out ), +// .cfg_ltr_enable ( 1'b0 ), + .cfg_ltssm_state ( cfg_ltssm_state ), + .cfg_rcb_status ( cfg_rcb_status [1:0]), + .cfg_obff_enable ( cfg_obff_enable ), +// .cfg_pl_status_change ( cfg_pl_status_change ), + + // Management Interface + .cfg_mgmt_addr ( cfg_mgmt_addr ), + .cfg_mgmt_write ( cfg_mgmt_write ), + .cfg_mgmt_write_data ( cfg_mgmt_write_data ), + .cfg_mgmt_byte_enable ( cfg_mgmt_byte_enable ), + .cfg_mgmt_read ( cfg_mgmt_read ), + .cfg_mgmt_read_data ( cfg_mgmt_read_data ), + .cfg_mgmt_read_write_done ( cfg_mgmt_read_write_done ), +// .cfg_mgmt_type1_cfg_reg_access ( cfg_mgmt_type1_cfg_reg_access ), + .cfg_msg_received ( cfg_msg_received ), + .cfg_msg_received_data ( cfg_msg_received_data ), + .cfg_msg_received_type ( cfg_msg_received_type ), + .cfg_msg_transmit ( cfg_msg_transmit ), + .cfg_msg_transmit_type ( cfg_msg_transmit_type ), + .cfg_msg_transmit_data ( cfg_msg_transmit_data ), + .cfg_msg_transmit_done ( cfg_msg_transmit_done ), + + .cfg_fc_ph ( cfg_fc_ph ), + .cfg_fc_pd ( cfg_fc_pd ), + .cfg_fc_nph ( cfg_fc_nph ), + .cfg_fc_npd ( cfg_fc_npd ), + .cfg_fc_cplh ( cfg_fc_cplh ), + .cfg_fc_cpld ( cfg_fc_cpld ), + .cfg_fc_sel ( cfg_fc_sel ), + +// .cfg_per_func_status_control ( cfg_per_func_status_control ), +// .cfg_per_function_number ( cfg_per_function_number ), +// .cfg_per_function_output_request ( cfg_per_function_output_request ), + + .cfg_dsn ( cfg_dsn ), + .cfg_power_state_change_ack ( cfg_power_state_change_ack ), + .cfg_power_state_change_interrupt ( cfg_power_state_change_interrupt ), + .cfg_err_cor_in ( cfg_err_cor_in ), + .cfg_err_uncor_in ( cfg_err_uncor_in ), + + .cfg_flr_in_process ( cfg_flr_in_process [1:0] ), + .cfg_flr_done ( cfg_flr_done ), + .cfg_vf_flr_in_process ( cfg_vf_flr_in_process ), + .cfg_vf_flr_done ( cfg_vf_flr_done ), + .cfg_vf_flr_func_num ( cfg_vf_flr_func_num ), + + .cfg_link_training_enable ( cfg_link_training_enable ), + + .cfg_ds_port_number ( cfg_ds_port_number ), + .cfg_hot_reset_in ( cfg_hot_reset_out ), + .cfg_config_space_enable ( cfg_config_space_enable ), + .cfg_req_pm_transition_l23_ready ( cfg_req_pm_transition_l23_ready ), + + // RP only + .cfg_hot_reset_out ( cfg_hot_reset_in ), + + .cfg_ds_bus_number ( cfg_ds_bus_number ), + .cfg_ds_device_number ( cfg_ds_device_number ), + .cfg_ds_function_number ( ), + + //-------------------------------------------------------------------------------------// + // EP Only // + //-------------------------------------------------------------------------------------// + + .cfg_interrupt_msi_enable ( cfg_interrupt_msi_enable[0] ), + .cfg_interrupt_msi_mmenable ( cfg_interrupt_msi_mmenable[5:0] ), + .cfg_interrupt_msi_mask_update ( cfg_interrupt_msi_mask_update ), + .cfg_interrupt_msi_data ( cfg_interrupt_msi_data ), + .cfg_interrupt_msi_select ( cfg_interrupt_msi_select ), + .cfg_interrupt_msi_int ( cfg_interrupt_msi_int ), + .cfg_interrupt_msi_pending_status ( cfg_interrupt_msi_pending_status ), + .cfg_interrupt_msi_sent ( cfg_interrupt_msi_sent ), + .cfg_interrupt_msi_fail ( cfg_interrupt_msi_fail ), + .cfg_interrupt_msi_attr ( cfg_interrupt_msi_attr ), + .cfg_interrupt_msi_tph_present ( cfg_interrupt_msi_tph_present ), + .cfg_interrupt_msi_tph_type ( cfg_interrupt_msi_tph_type ), + .cfg_interrupt_msi_tph_st_tag ( cfg_interrupt_msi_tph_st_tag ), + .cfg_interrupt_msi_function_number ( cfg_interrupt_msi_function_number ), + + // Interrupt Interface Signals + .cfg_interrupt_int ( cfg_interrupt_int ), + .cfg_interrupt_pending ( cfg_interrupt_pending ), + .cfg_interrupt_sent ( cfg_interrupt_sent ), + + // debug + .tlpSizeDebugPort(tlpSizeDebugPort), + .RDY_tlpSizeDebugPort(RDY_tlpSizeDebugPort) + + + //------------------------------------------------------------------------------------// + // DMA IFC + //------------------------------------------------------------------------------------// +// .s_axis_c2h_0_tvalid (0), +// .s_axis_c2h_0_tdata (0), +// .s_axis_c2h_0_tkeep (0), +// .s_axis_c2h_0_tlast (0), +// .s_axis_c2h_0_tuser (0), +// .s_axis_c2h_0_tready ( ), + +// .s_desc_c2h_0_valid (0), +// .s_desc_c2h_0_start_addr (0), +// .s_desc_c2h_0_byte_cnt (0), +// .s_desc_c2h_0_is_write (0), +// .s_desc_c2h_0_ready ( ), + +// .m_axis_c2h_0_tvalid ( ), +// .m_axis_c2h_0_tdata ( ), +// .m_axis_c2h_0_tkeep ( ), +// .m_axis_c2h_0_tlast ( ), +// .m_axis_c2h_0_tuser ( ), +// .m_axis_c2h_0_tready (0), + +// .s_axis_c2h_1_tvalid (0), +// .s_axis_c2h_1_tdata (0), +// .s_axis_c2h_1_tkeep (0), +// .s_axis_c2h_1_tlast (0), +// .s_axis_c2h_1_tuser (0), +// .s_axis_c2h_1_tready ( ), + +// .s_desc_c2h_1_valid (0), +// .s_desc_c2h_1_start_addr (0), +// .s_desc_c2h_1_byte_cnt (0), +// .s_desc_c2h_1_is_write (0), +// .s_desc_c2h_1_ready ( ), + +// .m_axis_c2h_1_tvalid ( ), +// .m_axis_c2h_1_tdata ( ), +// .m_axis_c2h_1_tkeep ( ), +// .m_axis_c2h_1_tlast ( ), +// .m_axis_c2h_1_tuser ( ), +// .m_axis_c2h_1_tready (0), + +// .s_h2c_value_valid (0), +// .s_h2c_value_data (0), +// .s_h2c_value_ready ( ), + +// .m_h2c_value_address ( ), +// .m_h2c_value_is_write ( ), +// .m_h2c_value_valid ( ), +// .m_h2c_value_ready (0), + +// .m_h2c_desc_data ( ), +// .m_h2c_desc_valid ( ), +// .m_h2c_desc_ready (0) + ); + + +endmodule \ No newline at end of file diff --git a/backend/vu13p/xdc/board_system_clock.xdc b/backend/vu13p/xdc/board_system_clock.xdc new file mode 100644 index 0000000..29ff30f --- /dev/null +++ b/backend/vu13p/xdc/board_system_clock.xdc @@ -0,0 +1,5 @@ +set_property PACKAGE_PIN BA23 [get_ports {board_sys_clk_n}] +set_property IOSTANDARD DIFF_SSTL12 [get_ports {board_sys_clk_n}] + +set_property PACKAGE_PIN AY23 [get_ports {board_sys_clk_p}] +set_property IOSTANDARD DIFF_SSTL12 [get_ports {board_sys_clk_p}] \ No newline at end of file diff --git a/backend/vu13p/xdc/constraint.xdc b/backend/vu13p/xdc/constraint.xdc new file mode 100644 index 0000000..18d5249 --- /dev/null +++ b/backend/vu13p/xdc/constraint.xdc @@ -0,0 +1,24 @@ +# reset and clock +set_property LOC [get_package_pins -filter {PIN_FUNC =~ *_PERSTN0_65}] [get_ports sys_rst_n] +set_property LOC [get_package_pins -of_objects [get_bels [get_sites -filter {NAME =~ *COMMON*} -of_objects [get_iobanks -of_objects [get_sites GTYE4_CHANNEL_X1Y23]]]/REFCLK0P]] [get_ports sys_clk_p] +set_property LOC [get_package_pins -of_objects [get_bels [get_sites -filter {NAME =~ *COMMON*} -of_objects [get_iobanks -of_objects [get_sites GTYE4_CHANNEL_X1Y23]]]/REFCLK0N]] [get_ports sys_clk_n] + +# bitstream +set_property BITSTREAM.CONFIG.EXTMASTERCCLK_EN div-1 [current_design] +set_property BITSTREAM.CONFIG.BPI_SYNC_MODE Type1 [current_design] +set_property BITSTREAM.GENERAL.COMPRESS TRUE [current_design] +set_property BITSTREAM.CONFIG.UNUSEDPIN Pulldown [current_design] + +# global voltage level +set_property PULLUP true [get_ports sys_rst_n] +set_property CONFIG_MODE BPI16 [current_design] +set_property CONFIG_VOLTAGE 1.8 [current_design] +set_property IOSTANDARD LVCMOS18 [get_ports sys_rst_n] + +# clock +create_clock -name sys_clk -period 10 [get_ports sys_clk_p] +create_clock -name board_sys_clk -period 10 [get_ports board_sys_clk_p] + + +# # very tricky one +# set_property USER_SLR_ASSIGNMENT SLR2 [get_cells xdma_0_i/inst/pcie4_ip_i/inst/user_reset_reg] \ No newline at end of file diff --git a/backend/vu13p/xdc/pcie.xdc b/backend/vu13p/xdc/pcie.xdc new file mode 100644 index 0000000..d7a9211 --- /dev/null +++ b/backend/vu13p/xdc/pcie.xdc @@ -0,0 +1,67 @@ +set_property PACKAGE_PIN AR26 [get_ports sys_rst_n] +set_property PACKAGE_PIN AK10 [get_ports {sys_clk_n}] +set_property PACKAGE_PIN AK11 [get_ports {sys_clk_p}] +set_property PACKAGE_PIN AF1 [get_ports {pci_exp_rxn[0]}] +set_property PACKAGE_PIN AF2 [get_ports {pci_exp_rxp[0]}] +set_property PACKAGE_PIN AF6 [get_ports {pci_exp_txn[0]}] +set_property PACKAGE_PIN AF7 [get_ports {pci_exp_txp[0]}] +set_property PACKAGE_PIN AG3 [get_ports {pci_exp_rxn[1]}] +set_property PACKAGE_PIN AG4 [get_ports {pci_exp_rxp[1]}] +set_property PACKAGE_PIN AG8 [get_ports {pci_exp_txn[1]}] +set_property PACKAGE_PIN AG9 [get_ports {pci_exp_txp[1]}] +set_property PACKAGE_PIN AH1 [get_ports {pci_exp_rxn[2]}] +set_property PACKAGE_PIN AH2 [get_ports {pci_exp_rxp[2]}] +set_property PACKAGE_PIN AH6 [get_ports {pci_exp_txn[2]}] +set_property PACKAGE_PIN AH7 [get_ports {pci_exp_txp[2]}] +set_property PACKAGE_PIN AJ3 [get_ports {pci_exp_rxn[3]}] +set_property PACKAGE_PIN AJ4 [get_ports {pci_exp_rxp[3]}] +set_property PACKAGE_PIN AJ8 [get_ports {pci_exp_txn[3]}] +set_property PACKAGE_PIN AJ9 [get_ports {pci_exp_txp[3]}] +set_property PACKAGE_PIN AK1 [get_ports {pci_exp_rxn[4]}] +set_property PACKAGE_PIN AK2 [get_ports {pci_exp_rxp[4]}] +set_property PACKAGE_PIN AK6 [get_ports {pci_exp_txn[4]}] +set_property PACKAGE_PIN AK7 [get_ports {pci_exp_txp[4]}] +set_property PACKAGE_PIN AL3 [get_ports {pci_exp_rxn[5]}] +set_property PACKAGE_PIN AL4 [get_ports {pci_exp_rxp[5]}] +set_property PACKAGE_PIN AL8 [get_ports {pci_exp_txn[5]}] +set_property PACKAGE_PIN AL9 [get_ports {pci_exp_txp[5]}] +set_property PACKAGE_PIN AM1 [get_ports {pci_exp_rxn[6]}] +set_property PACKAGE_PIN AM2 [get_ports {pci_exp_rxp[6]}] +set_property PACKAGE_PIN AM6 [get_ports {pci_exp_txn[6]}] +set_property PACKAGE_PIN AM7 [get_ports {pci_exp_txp[6]}] +set_property PACKAGE_PIN AN3 [get_ports {pci_exp_rxn[7]}] +set_property PACKAGE_PIN AN4 [get_ports {pci_exp_rxp[7]}] +set_property PACKAGE_PIN AN8 [get_ports {pci_exp_txn[7]}] +set_property PACKAGE_PIN AN9 [get_ports {pci_exp_txp[7]}] +set_property PACKAGE_PIN AP1 [get_ports {pci_exp_rxn[8]}] +set_property PACKAGE_PIN AP2 [get_ports {pci_exp_rxp[8]}] +set_property PACKAGE_PIN AP6 [get_ports {pci_exp_txn[8]}] +set_property PACKAGE_PIN AP7 [get_ports {pci_exp_txp[8]}] +set_property PACKAGE_PIN AR3 [get_ports {pci_exp_rxn[9]}] +set_property PACKAGE_PIN AR4 [get_ports {pci_exp_rxp[9]}] +set_property PACKAGE_PIN AR8 [get_ports {pci_exp_txn[9]}] +set_property PACKAGE_PIN AR9 [get_ports {pci_exp_txp[9]}] +set_property PACKAGE_PIN AT1 [get_ports {pci_exp_rxn[10]}] +set_property PACKAGE_PIN AT2 [get_ports {pci_exp_rxp[10]}] +set_property PACKAGE_PIN AT6 [get_ports {pci_exp_txn[10]}] +set_property PACKAGE_PIN AT7 [get_ports {pci_exp_txp[10]}] +set_property PACKAGE_PIN AU3 [get_ports {pci_exp_rxn[11]}] +set_property PACKAGE_PIN AU4 [get_ports {pci_exp_rxp[11]}] +set_property PACKAGE_PIN AU8 [get_ports {pci_exp_txn[11]}] +set_property PACKAGE_PIN AU9 [get_ports {pci_exp_txp[11]}] +set_property PACKAGE_PIN AV1 [get_ports {pci_exp_rxn[12]}] +set_property PACKAGE_PIN AV2 [get_ports {pci_exp_rxp[12]}] +set_property PACKAGE_PIN AV6 [get_ports {pci_exp_txn[12]}] +set_property PACKAGE_PIN AV7 [get_ports {pci_exp_txp[12]}] +set_property PACKAGE_PIN AW3 [get_ports {pci_exp_rxn[13]}] +set_property PACKAGE_PIN AW4 [get_ports {pci_exp_rxp[13]}] +set_property PACKAGE_PIN BB4 [get_ports {pci_exp_txn[13]}] +set_property PACKAGE_PIN BB5 [get_ports {pci_exp_txp[13]}] +set_property PACKAGE_PIN BA1 [get_ports {pci_exp_rxn[14]}] +set_property PACKAGE_PIN BA2 [get_ports {pci_exp_rxp[14]}] +set_property PACKAGE_PIN BD4 [get_ports {pci_exp_txn[14]}] +set_property PACKAGE_PIN BD5 [get_ports {pci_exp_txp[14]}] +set_property PACKAGE_PIN BC1 [get_ports {pci_exp_rxn[15]}] +set_property PACKAGE_PIN BC2 [get_ports {pci_exp_rxp[15]}] +set_property PACKAGE_PIN BF4 [get_ports {pci_exp_txn[15]}] +set_property PACKAGE_PIN BF5 [get_ports {pci_exp_txp[15]}] \ No newline at end of file diff --git a/cocotb/.gitignore b/cocotb/.gitignore new file mode 100644 index 0000000..dc75fb2 --- /dev/null +++ b/cocotb/.gitignore @@ -0,0 +1,4 @@ +__pycache__ +sim_build +*.v +log \ No newline at end of file diff --git a/cocotb/bdmatb.py b/cocotb/bdmatb.py new file mode 100644 index 0000000..6bebaf2 --- /dev/null +++ b/cocotb/bdmatb.py @@ -0,0 +1,503 @@ +import logging +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, FallingEdge, Timer +from cocotb.regression import TestFactory +from cocotb.clock import Clock + +from cocotbext.pcie.core import RootComplex +from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice +from cocotbext.axi.stream import define_stream +from cocotbext.axi import ( + AxiStreamBus, AxiStreamSource, AxiStreamSink, AxiStreamMonitor, AxiStreamFrame) + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=[ + "start_addr", "byte_cnt", "is_write", "valid", "ready"] + ) + + +class BdmaTb(object): + def __init__(self, dut, msix=False): + self._pcie_init(dut, msix) + + def _pcie_init(self, dut, msix=False): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + self.clock = dut.CLK + self.resetn = dut.RST_N + + self._bus_width = 512 + self._bus_bytes = 64 + + # PCIe + self.rc = RootComplex() + self.rc.max_payload_size = 1 + self.rc.max_read_request_size = 2 + + self.rc.log.setLevel(logging.INFO) + + cq_straddle = False + cc_straddle = False + rq_straddle = True + rc_straddle = True + rc_4tlp_straddle = False + + self.client_tag = bool(int(os.getenv("CLIENT_TAG", "1"))) + + self.dev = UltraScalePlusPcieDevice( + # configuration options + pcie_generation=3, + # pcie_link_width=2, + # user_clk_frequency=250e6, + alignment="dword", + cq_straddle=cq_straddle, + cc_straddle=cc_straddle, + rq_straddle=rq_straddle, + rc_straddle=rc_straddle, + rc_4tlp_straddle=rc_4tlp_straddle, + pf_count=1, + max_payload_size=256, + enable_client_tag=self.client_tag, + enable_extended_tag=True, + enable_parity=False, + enable_rx_msg_interface=False, + enable_sriov=False, + enable_extended_configuration=False, + + pf0_msi_enable=True, + pf0_msi_count=32, + pf1_msi_enable=False, + pf1_msi_count=1, + pf2_msi_enable=False, + pf2_msi_count=1, + pf3_msi_enable=False, + pf3_msi_count=1, + pf0_msix_enable=msix, + pf0_msix_table_size=63, + pf0_msix_table_bir=4, + pf0_msix_table_offset=0x00000000, + pf0_msix_pba_bir=4, + pf0_msix_pba_offset=0x00008000, + pf1_msix_enable=False, + pf1_msix_table_size=0, + pf1_msix_table_bir=0, + pf1_msix_table_offset=0x00000000, + pf1_msix_pba_bir=0, + pf1_msix_pba_offset=0x00000000, + pf2_msix_enable=False, + pf2_msix_table_size=0, + pf2_msix_table_bir=0, + pf2_msix_table_offset=0x00000000, + pf2_msix_pba_bir=0, + pf2_msix_pba_offset=0x00000000, + pf3_msix_enable=False, + pf3_msix_table_size=0, + pf3_msix_table_bir=0, + pf3_msix_table_offset=0x00000000, + pf3_msix_pba_bir=0, + pf3_msix_pba_offset=0x00000000, + + # signals + user_clk=self.clock, + # user_reset=~self.resetn, + user_lnk_up=dut.user_lnk_up, + # sys_clk=dut.sys_clk, + # sys_clk_gt=dut.sys_clk_gt, + sys_reset=dut.sys_reset, + # phy_rdy_out=dut.phy_rdy_out, + + rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), + pcie_rq_seq_num0=dut.pcie_rq_seq_num0, + pcie_rq_seq_num_vld0=dut.pcie_rq_seq_num_vld0, + pcie_rq_seq_num1=dut.pcie_rq_seq_num1, + pcie_rq_seq_num_vld1=dut.pcie_rq_seq_num_vld1, + pcie_rq_tag0=dut.pcie_rq_tag0, + pcie_rq_tag1=dut.pcie_rq_tag1, + # pcie_rq_tag_av=dut.pcie_rq_tag_av, + pcie_rq_tag_vld0=dut.pcie_rq_tag_vld0, + pcie_rq_tag_vld1=dut.pcie_rq_tag_vld1, + + rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), + + cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), + pcie_cq_np_req=dut.pcie_cq_np_req, + pcie_cq_np_req_count=dut.pcie_cq_np_req_count, + + cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), + + pcie_tfc_nph_av=dut.pcie_tfc_nph_av, + pcie_tfc_npd_av=dut.pcie_tfc_npd_av, + cfg_phy_link_down=dut.cfg_phy_link_down, + cfg_phy_link_status=dut.cfg_phy_link_status, + cfg_negotiated_width=dut.cfg_negotiated_width, + cfg_current_speed=dut.cfg_current_speed, + cfg_max_payload=dut.cfg_max_payload, + cfg_max_read_req=dut.cfg_max_read_req, + cfg_function_status=dut.cfg_function_status, + cfg_function_power_state=dut.cfg_function_power_state, + cfg_vf_status=dut.cfg_vf_status, + cfg_vf_power_state=dut.cfg_vf_power_state, + cfg_link_power_state=dut.cfg_link_power_state, + cfg_mgmt_addr=dut.cfg_mgmt_addr, + cfg_mgmt_function_number=dut.cfg_mgmt_function_number, + cfg_mgmt_write=dut.cfg_mgmt_write, + cfg_mgmt_write_data=dut.cfg_mgmt_write_data, + cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, + cfg_mgmt_read=dut.cfg_mgmt_read, + cfg_mgmt_read_data=dut.cfg_mgmt_read_data, + cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, + cfg_mgmt_debug_access=dut.cfg_mgmt_debug_access, + cfg_err_cor_out=dut.cfg_err_cor_out, + cfg_err_nonfatal_out=dut.cfg_err_nonfatal_out, + cfg_err_fatal_out=dut.cfg_err_fatal_out, + cfg_local_error_valid=dut.cfg_local_error_valid, + cfg_local_error_out=dut.cfg_local_error_out, + cfg_ltssm_state=dut.cfg_ltssm_state, + cfg_rx_pm_state=dut.cfg_rx_pm_state, + cfg_tx_pm_state=dut.cfg_tx_pm_state, + cfg_rcb_status=dut.cfg_rcb_status, + cfg_obff_enable=dut.cfg_obff_enable, + # cfg_pl_status_change=dut.cfg_pl_status_change, + # cfg_tph_requester_enable=dut.cfg_tph_requester_enable, + # cfg_tph_st_mode=dut.cfg_tph_st_mode, + # cfg_vf_tph_requester_enable=dut.cfg_vf_tph_requester_enable, + # cfg_vf_tph_st_mode=dut.cfg_vf_tph_st_mode, + cfg_msg_received=dut.cfg_msg_received, + cfg_msg_received_data=dut.cfg_msg_received_data, + cfg_msg_received_type=dut.cfg_msg_received_type, + cfg_msg_transmit=dut.cfg_msg_transmit, + cfg_msg_transmit_type=dut.cfg_msg_transmit_type, + cfg_msg_transmit_data=dut.cfg_msg_transmit_data, + cfg_msg_transmit_done=dut.cfg_msg_transmit_done, + cfg_fc_ph=dut.cfg_fc_ph, + cfg_fc_pd=dut.cfg_fc_pd, + cfg_fc_nph=dut.cfg_fc_nph, + cfg_fc_npd=dut.cfg_fc_npd, + cfg_fc_cplh=dut.cfg_fc_cplh, + cfg_fc_cpld=dut.cfg_fc_cpld, + cfg_fc_sel=dut.cfg_fc_sel, + cfg_dsn=dut.cfg_dsn, + cfg_bus_number=dut.cfg_bus_number, + cfg_power_state_change_ack=dut.cfg_power_state_change_ack, + cfg_power_state_change_interrupt=dut.cfg_power_state_change_interrupt, + cfg_err_cor_in=dut.cfg_err_cor_in, + cfg_err_uncor_in=dut.cfg_err_uncor_in, + cfg_flr_in_process=dut.cfg_flr_in_process, + cfg_flr_done=dut.cfg_flr_done, + cfg_vf_flr_in_process=dut.cfg_vf_flr_in_process, + cfg_vf_flr_func_num=dut.cfg_vf_flr_func_num, + cfg_vf_flr_done=dut.cfg_vf_flr_done, + cfg_link_training_enable=dut.cfg_link_training_enable, + cfg_interrupt_int=dut.cfg_interrupt_int, + cfg_interrupt_pending=dut.cfg_interrupt_pending, + cfg_interrupt_sent=dut.cfg_interrupt_sent, + cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, + cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, + cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, + cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, + cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, + cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, + cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, + cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, + cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, + cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, + cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, + cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, + cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, + cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, + cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, + cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, + cfg_pm_aspm_l1_entry_reject=dut.cfg_pm_aspm_l1_entry_reject, + cfg_pm_aspm_tx_l0s_entry_disable=dut.cfg_pm_aspm_tx_l0s_entry_disable, + cfg_hot_reset_out=dut.cfg_hot_reset_out, + cfg_config_space_enable=dut.cfg_config_space_enable, + cfg_req_pm_transition_l23_ready=dut.cfg_req_pm_transition_l23_ready, + cfg_hot_reset_in=dut.cfg_hot_reset_in, + cfg_ds_port_number=dut.cfg_ds_port_number, + cfg_ds_bus_number=dut.cfg_ds_bus_number, + cfg_ds_device_number=dut.cfg_ds_device_number, + ) + + self.dev.log.setLevel(logging.INFO) + + dut.pcie_cq_np_req.setimmediatevalue(1) + dut.cfg_mgmt_addr.setimmediatevalue(0) + dut.cfg_mgmt_function_number.setimmediatevalue(0) + dut.cfg_mgmt_write.setimmediatevalue(0) + dut.cfg_mgmt_write_data.setimmediatevalue(0) + dut.cfg_mgmt_byte_enable.setimmediatevalue(0) + dut.cfg_mgmt_read.setimmediatevalue(0) + dut.cfg_mgmt_debug_access.setimmediatevalue(0) + dut.cfg_msg_transmit.setimmediatevalue(0) + dut.cfg_msg_transmit_type.setimmediatevalue(0) + dut.cfg_msg_transmit_data.setimmediatevalue(0) + dut.cfg_fc_sel.setimmediatevalue(0) + dut.cfg_dsn.setimmediatevalue(0) + dut.cfg_power_state_change_ack.setimmediatevalue(0) + dut.cfg_err_cor_in.setimmediatevalue(0) + dut.cfg_err_uncor_in.setimmediatevalue(0) + dut.cfg_flr_done.setimmediatevalue(0) + dut.cfg_vf_flr_func_num.setimmediatevalue(0) + dut.cfg_vf_flr_done.setimmediatevalue(0) + dut.cfg_link_training_enable.setimmediatevalue(1) + dut.cfg_interrupt_int.setimmediatevalue(0) + dut.cfg_interrupt_pending.setimmediatevalue(0) + dut.cfg_interrupt_msi_select.setimmediatevalue(0) + dut.cfg_interrupt_msi_int.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_data_enable.setimmediatevalue(0) + dut.cfg_interrupt_msi_pending_status_function_num.setimmediatevalue(0) + dut.cfg_interrupt_msi_attr.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_present.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_type.setimmediatevalue(0) + dut.cfg_interrupt_msi_tph_st_tag.setimmediatevalue(0) + dut.cfg_interrupt_msi_function_number.setimmediatevalue(0) + dut.cfg_pm_aspm_l1_entry_reject.setimmediatevalue(0) + dut.cfg_pm_aspm_tx_l0s_entry_disable.setimmediatevalue(0) + dut.cfg_config_space_enable.setimmediatevalue(1) + dut.cfg_req_pm_transition_l23_ready.setimmediatevalue(0) + dut.cfg_hot_reset_in.setimmediatevalue(0) + dut.cfg_ds_port_number.setimmediatevalue(0) + dut.cfg_ds_bus_number.setimmediatevalue(0) + dut.cfg_ds_device_number.setimmediatevalue(0) + + self.dev.functions[0].configure_bar(0, 16*1024*1024) + self.dev.functions[0].configure_bar(1, 16*1024) + + self.root_port = self.rc.make_port() + self.root_port.connect(self.dev) + + async def gen_reset(self): + await RisingEdge(self.clock) + + self.resetn.value = 0 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.resetn.value = 1 + await RisingEdge(self.clock) + await RisingEdge(self.clock) + await RisingEdge(self.clock) + self.log.info("Generated DMA RST_N") + + def gen_random_req(self, channel): + low_boundry = channel * 8192 + high_boundry = (channel + 1) * 8192 + idxs = random.sample(range(low_boundry, high_boundry), 2) + lo_idx, hi_idx = idxs[0], idxs[1] + if (hi_idx < lo_idx): + temp = hi_idx + hi_idx = lo_idx + lo_idx = temp + length = hi_idx - lo_idx + 1 + return (lo_idx, length) + + def gen_random_len(self): + return random.randint(1, 8192) + + def gen_random_aligned_len(self): + return random.randint(1, 2048) * 4 + + +class BdmaBypassTb(BdmaTb): + def __init__(self, dut, msix=False): + super().__init__(dut, msix) + + # DMA + self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix( + dut, "s_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_desc_source_0 = DescSource(DescBus.from_prefix( + dut, "s_desc_c2h_0"), self.clock, self.resetn, False) + self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix( + dut, "m_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix( + dut, "s_axis_c2h_1"), self.clock, self.resetn, False) + self.c2h_desc_source_1 = DescSource(DescBus.from_prefix( + dut, "s_desc_c2h_1"), self.clock, self.resetn, False) + self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix( + dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + + # monitor + self.rq_monitor = AxiStreamMonitor(AxiStreamBus.from_prefix( + dut, "m_axis_rq"), self.clock, self.resetn, False) + + async def send_desc(self, channel, startAddr, length, isWrite): + desc = DescTransaction() + desc.start_addr = startAddr + desc.byte_cnt = length + desc.is_write = isWrite + if channel == 0: + await self.c2h_desc_source_0.send(desc) + else: + await self.c2h_desc_source_1.send(desc) + + async def send_data(self, channel, data): + if channel == 0: + await self.c2h_write_source_0.send(data) + else: + await self.c2h_write_source_1.send(data) + + async def recv_data(self, channel): + if channel == 0: + data = await self.c2h_read_sink_0.read() + else: + data = await self.c2h_read_sink_1.read() + data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + return data + + async def run_single_write_once(self, channel, addr, data): + length = len(data) + self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", + channel, addr, length, data[0]) + await self.send_desc(channel, addr, length, True) + await self.send_data(channel, data) + + async def run_single_read_once(self, channel, addr, length): + self.log.info( + "Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.send_desc(channel, addr, length, False) + data = await self.recv_data(channel) + self.log.info( + "Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) + return data + + async def run_single_only_send_read_desc(self, channel, addr, length): + self.log.info( + "Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.send_desc(channel, addr, length, False) + + +class BdmaSimpleTb(BdmaTb): + def __init__(self, dut, msix=False): + super().__init__(dut, msix) + + # DMA + self.c2h_write_source_0 = AxiStreamSource(AxiStreamBus.from_prefix( + dut, "s_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_read_sink_0 = AxiStreamSink(AxiStreamBus.from_prefix( + dut, "m_axis_c2h_0"), self.clock, self.resetn, False) + self.c2h_write_source_1 = AxiStreamSource(AxiStreamBus.from_prefix( + dut, "s_axis_c2h_1"), self.clock, self.resetn, False) + self.c2h_read_sink_1 = AxiStreamSink(AxiStreamBus.from_prefix( + dut, "m_axis_c2h_1"), self.clock, self.resetn, False) + + async def send_data(self, channel, data): + if channel == 0: + await self.c2h_write_source_0.send(data) + else: + await self.c2h_write_source_1.send(data) + + async def recv_data(self, channel): + if channel == 0: + data = await self.c2h_read_sink_0.read() + else: + data = await self.c2h_read_sink_1.read() + data = bytes(''.join([chr(item) for item in data]), encoding='UTF-8') + return data + + def conbine_bar(self, bar): + self.ep_bar = bar + + async def write_register(self, addr: int, x: int): + x = x & 0xFFFFFFFF + self.log.debug("BdmaTb: write register at %d, value %d" % (addr, x)) + await self.ep_bar.write(addr * 4, x.to_bytes(4, byteorder='little', signed=False)) + + async def write_pa_table(self, channel, page_offset, pa): + base_addr = 512 + channel * 1024 + page_offset = page_offset & 0x1FF + paLo = pa & 0xFFFFFFFF + paHi = (pa >> 32) & 0xFFFFFFFF + await self.write_register(base_addr + 2*page_offset + 1, paLo) + await self.write_register(base_addr + 2*page_offset, paHi) + + async def memory_map(self): + self.log.info("BdmaTb: Starting memory map...") + await self.write_pa_table(0, 1, 123456) + await self.write_pa_table(1, 2, 1) + for i in range(512): + await self.write_pa_table(0, i, 4096*i) + await self.write_pa_table(1, i, 4096*i) + await Timer(4 * 512 * 2 * 2, units='ns') + + async def submit_transfer(self, channel, addr, length, isWrite=True): + addrLo = addr & 0xFFFFFFFF + addrHi = (addr >> 32) & 0xFFFFFFFF + base_addr = channel * 6 + await self.write_register(base_addr + 1, addrLo) + await self.write_register(base_addr + 2, addrHi) + await self.write_register(base_addr + 3, length) + await self.write_register(base_addr, int(isWrite)) + + async def run_single_write_once(self, channel, addr, data): + length = len(data) + self.log.info("Conduct DMA single write: channel %d addr %d, length %d, char %c", + channel, addr, length, data[0]) + await self.submit_transfer(channel, addr, length, True) + await self.send_data(channel, data) + + async def run_single_read_once(self, channel, addr, length): + self.log.info( + "Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.submit_transfer(channel, addr, length, False) + data = await self.recv_data(channel) + self.log.info( + "Read data from RootComplex successfully, recv length %d, req length %d", len(data), length) + return data + + +class BdmaLoopTb(BdmaTb): + def conbine_bar(self, bar): + self.ep_bar = bar + + async def write_register(self, addr: int, x: int): + x = x & 0xFFFFFFFF + self.log.debug("BdmaTb: write register at %d, value %d" % (addr, x)) + await self.ep_bar.write(addr * 4, x.to_bytes(4, byteorder='little', signed=False)) + + async def write_pa_table(self, channel, page_offset, pa): + base_addr = 512 + channel * 1024 + page_offset = page_offset & 0x1FF + paLo = pa & 0xFFFFFFFF + paHi = (pa >> 32) & 0xFFFFFFFF + await self.write_register(base_addr + 2*page_offset + 1, paLo) + await self.write_register(base_addr + 2*page_offset, paHi) + + async def memory_map(self): + self.log.info("BdmaTb: Starting memory map...") + await self.write_pa_table(0, 1, 123456) + await self.write_pa_table(1, 2, 1) + for i in range(512): + await self.write_pa_table(0, i, 4096*i) + await self.write_pa_table(1, i, 4096*i) + await Timer(4 * 512 * 2 * 2, units='ns') + + async def submit_transfer(self, channel, addr, length, isWrite=True): + addrLo = addr & 0xFFFFFFFF + addrHi = (addr >> 32) & 0xFFFFFFFF + base_addr = channel * 6 + await self.write_register(base_addr + 1, addrLo) + await self.write_register(base_addr + 2, addrHi) + await self.write_register(base_addr + 3, length) + await self.write_register(base_addr, int(isWrite)) + + async def run_single_write_once(self, channel, addr, length): + self.log.info( + "Conduct DMA single write: channel %d addr %d, length %d", channel, addr, length) + await self.submit_transfer(channel, addr, length, True) + + async def run_single_read_once(self, channel, addr, length): + self.log.info( + "Conduct DMA single read: channel %d addr %d, length %d", channel, addr, length) + await self.submit_transfer(channel, addr, length, False) diff --git a/cocotb/bypass_stress/Makefile b/cocotb/bypass_stress/Makefile new file mode 100644 index 0000000..ddd1627 --- /dev/null +++ b/cocotb/bypass_stress/Makefile @@ -0,0 +1,37 @@ +ROOT_DIR = $(abspath ../../) +BACKEND_DIR = $(ROOT_DIR)/backend +COCOTB_DIR = $(abspath ../) +TB_DIR = $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR = $(BACKEND_DIR)/build +VSRC_DIR = $(BACKEND_DIR)/verilog + +TARGET = RawBypassDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE = dma_stress +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log + +cocotb: verilog prepare run + +verilog: + cd $(BACKEND_DIR) && make verilog TOPMODULE=$(TOP_MODULE) + +prepare: + rm -rf $(VLOG_FILE) + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/bypass_stress/dma_stress_tb.py b/cocotb/bypass_stress/dma_stress_tb.py new file mode 100644 index 0000000..c051c5e --- /dev/null +++ b/cocotb/bypass_stress/dma_stress_tb.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator + +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) # fmt: off + +from bdmatb import BdmaBypassTb + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- + + +def gen_pseudo_data(addr, length): + start = int(addr / 4) + data = start.to_bytes(4, byteorder='little', signed=False) + for i in range(1, int(length/4)): + data = data + (start + i).to_bytes(4, byteorder='little', signed=False) + return data + + +async def stress_random_write_test(pcie_tb, dma_channel, mem, n): + addr = 0 + length = 0 + for _ in range(n): + length = pcie_tb.gen_random_aligned_len() + data = gen_pseudo_data(addr, length) + await pcie_tb.run_single_write_once(dma_channel, addr, data) + addr = int((addr + length)/4) * 4 + return addr + + +async def run_stress_write(pcie_tb, mem): + n = 10 + end = await stress_random_write_test(pcie_tb, 0, mem, n) + await Timer(8192, units="ns") + for i in range(int(end/4)): + assert i == int.from_bytes( + mem[i*4:(i+1)*4], byteorder='little', signed=False) + + +async def run_stress_read(pcie_tb, mem): + addr = 0 + length = 0 + n = 10 + dma_channel = 0 + for _ in range(n): + # length = pcie_tb.gen_random_aligned_len() + length = 64 + await pcie_tb.run_single_only_send_read_desc(dma_channel, addr, length) + addr = int((addr + length)/4) * 4 + + for _ in range(2): + data = await pcie_tb.recv_data(dma_channel) + cur_time = cocotb.utils.get_sim_time("ns") + print(f"time={cur_time}, read recv data={data}") + await Timer(2000, units='ns') + + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def step_random_write_test(dut): + + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await run_stress_read(tb, mem) + # await run_stress_write(tb, mem) + + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir +bdmatb_dir = os.path.dirname(tests_dir) + + +def test_dma(): + dut = "mkRawBypassDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + + +if __name__ == "__main__": + test_dma() diff --git a/cocotb/bypass_stress/run_until_fail.sh b/cocotb/bypass_stress/run_until_fail.sh new file mode 100755 index 0000000..f39115a --- /dev/null +++ b/cocotb/bypass_stress/run_until_fail.sh @@ -0,0 +1,9 @@ +while true; do + make run + + if [ $? -ne 0 ]; then + break + fi +done + +RANDOM_SEED=1740663151 \ No newline at end of file diff --git a/cocotb/bypass_write_read/Makefile b/cocotb/bypass_write_read/Makefile new file mode 100644 index 0000000..5de524b --- /dev/null +++ b/cocotb/bypass_write_read/Makefile @@ -0,0 +1,37 @@ +ROOT_DIR := $(abspath ../../) +BACKEND_DIR := $(ROOT_DIR)/backend +COCOTB_DIR := $(abspath ../) +TB_DIR := $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR := $(BACKEND_DIR)/build +VSRC_DIR := $(BACKEND_DIR)/verilog + +TARGET ?= RawBypassDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE ?= dma_wr_rd +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TB_CASE).log + +cocotb: verilog prepare run + +verilog: + cd $(BACKEND_DIR) && make verilog TOPMODULE=$(TOP_MODULE) + +prepare: + rm -rf $(VLOG_FILE) + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/bypass_write_read/dma_straddle_tb.py b/cocotb/bypass_write_read/dma_straddle_tb.py new file mode 100644 index 0000000..72eb14f --- /dev/null +++ b/cocotb/bypass_write_read/dma_straddle_tb.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +import os +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) # fmt: off + +import random + +import cocotb_test.simulator +import cocotb +from cocotb.triggers import RisingEdge, FallingEdge, Timer + + +from bdmatb import BdmaBypassTb + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- +test_num = 100 +async def single_path_random_write_test(pcie_tb, dma_channel, mem): + for _ in range(test_num): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + await pcie_tb.run_single_write_once(dma_channel, addr, data) + await Timer(200+length, units='ns') + assert mem[addr:addr+length] == data + + +async def single_path_random_read_test(pcie_tb, dma_channel, mem): + for _ in range(test_num): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + mem[addr:addr+length] = char * length + data = await pcie_tb.run_single_read_once(dma_channel, addr, length) + assert data == char * length + + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def straddle_write_test(dut): + + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + channel0 = cocotb.start_soon(single_path_random_write_test(tb, 0, mem)) + channel1 = cocotb.start_soon(single_path_random_write_test(tb, 1, mem)) + + tb.log.info("Start write test in straddle mode!") + + await channel0 + await channel1 + + tb.log.info("End write test in straddle mode succesfully!") + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def straddle_read_test(dut): + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + channel0 = cocotb.start_soon(single_path_random_read_test(tb, 0, mem)) + channel1 = cocotb.start_soon(single_path_random_read_test(tb, 1, mem)) + + tb.log.info("Start Read test in straddle mode!") + + await channel0 + await channel1 + +# tb.log.info("End Read test in straddle mode succesfully!") + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + + +def test_dma(): + dut = "mkRawBypassDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/cocotb/bypass_write_read/dma_wr_rd_tb.py b/cocotb/bypass_write_read/dma_wr_rd_tb.py new file mode 100644 index 0000000..f6ee43f --- /dev/null +++ b/cocotb/bypass_write_read/dma_wr_rd_tb.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +import logging +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator + +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) #fmt:off +from bdmatb import BdmaBypassTb + +# class TB architecture +# -------------- ------------- ----------- +# | Root Complex | <-> | End Pointer | <-> | Dut(DMAC) | +# -------------- ------------- ----------- +test_num = 100 + +async def single_path_random_write_test(pcie_tb, dma_channel, mem): + for _ in range(test_num): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + await pcie_tb.run_single_write_once(dma_channel, addr, data) + await Timer(100+length, units='ns') + assert mem[addr:addr+length] == data + + +async def single_path_random_read_test(pcie_tb, dma_channel, mem): + for _ in range(test_num): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + mem[addr:addr+length] = char * length + data = await pcie_tb.run_single_read_once(dma_channel, addr, length) + assert data == char * length + +@cocotb.test(timeout_time=100000000, timeout_unit="ns") +async def step_random_write_test(dut): + + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await single_path_random_write_test(tb, 0, mem) + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def step_random_read_test(dut): + tb = BdmaBypassTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + + await single_path_random_read_test(tb, 0, mem) + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir +bdmatb_dir = os.path.dirname(tests_dir) + + +def test_dma(): + dut = "mkRawBypassDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + +if __name__ == "__main__": + test_dma() \ No newline at end of file diff --git a/cocotb/cocotb.yaml b/cocotb/cocotb.yaml new file mode 100644 index 0000000..0e73cb3 --- /dev/null +++ b/cocotb/cocotb.yaml @@ -0,0 +1,41 @@ +name: cocotb +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h5eee18b_6 + - ca-certificates=2024.9.24=h06a4308_0 + - expat=2.6.3=h6a678d5_0 + - ld_impl_linux-64=2.40=h12ee557_0 + - libffi=3.4.4=h6a678d5_1 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libmpdec=4.0.0=h5eee18b_0 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.15=h5eee18b_0 + - pip=24.2=py313h06a4308_0 + - python=3.13.0=hf623796_100_cp313 + - python_abi=3.13=0_cp313 + - readline=8.2=h5eee18b_0 + - setuptools=75.1.0=py313h06a4308_0 + - sqlite=3.45.3=h5eee18b_0 + - tk=8.6.14=h39e8969_0 + - tzdata=2024b=h04d1e81_0 + - wheel=0.44.0=py313h06a4308_0 + - xz=5.4.6=h5eee18b_1 + - zlib=1.2.13=h5eee18b_1 + - pip: + - cocotb==1.9.1 + - cocotb-bus==0.2.1 + - cocotb-test==0.2.5 + - cocotbext-axi==0.1.24 + - cocotbext-pcie==0.2.14 + - find-libpython==0.4.0 + - iniconfig==2.0.0 + - packaging==24.1 + - pluggy==1.5.0 + - pytest==8.3.3 +prefix: /home/wjz/miniconda3/envs/cocotb \ No newline at end of file diff --git a/cocotb/loop_write_read/Makefile b/cocotb/loop_write_read/Makefile new file mode 100644 index 0000000..dc2f02c --- /dev/null +++ b/cocotb/loop_write_read/Makefile @@ -0,0 +1,37 @@ +ROOT_DIR = $(abspath ../../) +BACKEND_DIR = $(ROOT_DIR)/backend +COCOTB_DIR = $(abspath ../) +TB_DIR = $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR = $(BACKEND_DIR)/build +VSRC_DIR = $(BACKEND_DIR)/verilog + +TARGET = RawTestDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE = dma_loop +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log + +cocotb:verilog prepare run + +verilog: + cd $(BACKEND_DIR) && make verilog TOPMODULE=$(TOP_MODULE) + +prepare: + rm -rf $(VLOG_FILE) + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/loop_write_read/dma_loop_tb.py b/cocotb/loop_write_read/dma_loop_tb.py new file mode 100644 index 0000000..539325e --- /dev/null +++ b/cocotb/loop_write_read/dma_loop_tb.py @@ -0,0 +1,214 @@ +import os +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) # fmt: off + +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator +from bdmatb import BdmaLoopTb + + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir + + +async def loop_write_read_once(pcie_tb, mem): + # addr, length = pcie_tb.gen_random_req(0) + addr = 1 + length = 2378 + addr = mem.get_absolute_address(addr) + char = bytes(random.choice('abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + mem[addr:addr+length] = data + await pcie_tb.run_single_read_once(0, addr, length) + new_addr = addr + 8192 + await pcie_tb.run_single_write_once(0, new_addr, length) + await Timer(200+4*length, units='ns') + assert mem[new_addr:new_addr+length] == data + + + +async def throughput_test(dut, dev, mem): + + print(f"before=================={mem[10:15]}") + + desc_transfer_szie = 4096 + stride_size = 4096 + strise_cnt = 32 + + double_channel_test_offset = 0 # double channel test disabled + # double_channel_test_offset = 1024*512 # double channel test enabled + + dev_bar1 = dev.bar_window[1] + await dev_bar1.write(0x04, (0).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x08, (0).to_bytes(4, byteorder='little', signed=False)) + + await dev_bar1.write(0x0C, (1024*1024).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x10, (0).to_bytes(4, byteorder='little', signed=False)) + + await dev_bar1.write(0x14, (desc_transfer_szie).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x1C, (stride_size).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x20, (strise_cnt).to_bytes(4, byteorder='little', signed=False)) + + await dev_bar1.write(0x28, (double_channel_test_offset).to_bytes(4, byteorder='little', signed=False)) + + await dev_bar1.write(0x2c, (0x00).to_bytes(4, byteorder='little', signed=False)) # read write + # await dev_bar1.write(0x2c, (0x01).to_bytes(4, byteorder='little', signed=False)) # read only + # await dev_bar1.write(0x2c, (0x02).to_bytes(4, byteorder='little', signed=False)) # write only + + await dev_bar1.write(0x18, (0xFFF).to_bytes(4, byteorder='little', signed=False)) + + calc_time_ns = 5000 + old_val = int.from_bytes(await dev_bar1.read(0x18, 4), 'little') + old_time = cocotb.utils.get_sim_time("ns") + while True: + await Timer(calc_time_ns, "ns") + new_val = int.from_bytes(await dev_bar1.read(0x18, 4), 'little') + new_time = cocotb.utils.get_sim_time("ns") + value_delta = old_val - new_val + time_delta = new_time-old_time + speed = desc_transfer_szie * 8 * (value_delta) / (time_delta) + + print(f"old_value={old_val}, new_value={new_val}, value_delta={value_delta}, time_delta={time_delta}, speed={speed} Gbps") + + old_val = new_val + old_time = new_time + + if new_val == 0: + break + + print(f"after=================={mem[10:15]}") + + + +async def correct_test(dut, dev, mem): + + dev_bar1 = dev.bar_window[1] + + for iter_idx in range(1000): + + req_size = random.randint(1, 4096) + stride_size = req_size + stride_cnt = random.randint(1, 8) + + src_offset = random.randint(0, 1024*128) + dst_offset = src_offset # random.randint(0, 1024*512) + + req_cnt = stride_cnt + double_channel_test_offset = 0 + + await dev_bar1.write(0x04, (src_offset).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x08, (0).to_bytes(4, byteorder='little', signed=False)) + + await dev_bar1.write(0x0C, (1024*1024 + dst_offset).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x10, (0).to_bytes(4, byteorder='little', signed=False)) + + await dev_bar1.write(0x14, (req_size).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x1C, (stride_size).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x20, (stride_cnt).to_bytes(4, byteorder='little', signed=False)) + await dev_bar1.write(0x28, (double_channel_test_offset).to_bytes(4, byteorder='little', signed=False)) + + print(f"src_offset = {hex(src_offset)}, dst_offset = {hex(dst_offset)}, req_size={hex(req_size)}, stride_cnt={hex(stride_cnt)}") + + await dev_bar1.write(0x18, (req_cnt).to_bytes(4, byteorder='little', signed=False)) + + while True: + new_val = int.from_bytes(await dev_bar1.read(0x18, 4), 'little') + if new_val == 0: + break + await Timer(100, "ns") + + await Timer(5000, "ns") + + total_bytes_copy = req_size * req_cnt + + src_buffer = mem[0:] + dst_buffer = mem[1024*1024:] + + for offset in range(0, dst_offset, 1): + if dst_buffer[offset] != 0: + print(f"A should not be modified, dst_buffer[{hex(offset)}]={hex(dst_buffer[offset])}") + raise SystemExit + for (s_offset, d_offset) in zip(range(src_offset, src_offset + total_bytes_copy, 1), range(dst_offset, dst_offset + total_bytes_copy, 1)): + if dst_buffer[d_offset] != src_buffer[s_offset]: + print(f"A not match, dst_buffer[{hex(d_offset)}]={hex(dst_buffer[d_offset])}, src_buffer[{hex(s_offset)}]={hex(src_buffer[s_offset])}") + raise SystemExit + mem[1024*1024 + d_offset] = 0 + for offset in range(dst_offset + total_bytes_copy, double_channel_test_offset, 1): + if dst_buffer[offset] != 0: + print(f"A should not be modified, dst_buffer[{hex(offset)}]={hex(dst_buffer[offset])}") + raise SystemExit + + if double_channel_test_offset != 0: + for offset in range(double_channel_test_offset, double_channel_test_offset + dst_offset, 1): + if dst_buffer[offset] != 0: + print(f"B should not be modified, dst_buffer[{hex(offset)}]={hex(dst_buffer[offset])}") + raise SystemExit + for (s_offset, d_offset) in zip(range(double_channel_test_offset + src_offset, double_channel_test_offset + src_offset + total_bytes_copy, 1), range(double_channel_test_offset + dst_offset, double_channel_test_offset + dst_offset + total_bytes_copy, 1)): + if dst_buffer[d_offset] != src_buffer[s_offset]: + print(f"B not match, dst_buffer[{hex(d_offset)}]={hex(dst_buffer[d_offset])}, src_buffer[{hex(s_offset)}]={hex(src_buffer[s_offset])}") + raise SystemExit + mem[1024*1024 + d_offset] = 0 + for offset in range(double_channel_test_offset + dst_offset + total_bytes_copy, 1024 * 1024, 1): + if dst_buffer[offset] != 0: + print(f"B should not be modified, dst_buffer[{hex(offset)}]={hex(dst_buffer[offset])}") + raise SystemExit + + + + print("pass" + "\n" * 10) + + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def test_entry(dut): + tb = BdmaLoopTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + tb.root_port.downstream_port.link_delay_steps = 428*1000 + + mem = tb.rc.mem_pool.alloc_region(2*1024*1024) + for idx in range(32768): + mem[idx*2:idx*2+2] = idx.to_bytes(2, "little") + # mem[10:15] = b'world' + + await Timer(5000, "ns") + + await throughput_test(dut, dev, mem) + # await correct_test(dut, dev, mem) + + + + +def test_dma(): + dut = "mkRawTestDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + + +if __name__ == "__main__": + test_dma() diff --git a/cocotb/simple_write_read/Makefile b/cocotb/simple_write_read/Makefile new file mode 100644 index 0000000..fc6e999 --- /dev/null +++ b/cocotb/simple_write_read/Makefile @@ -0,0 +1,37 @@ +ROOT_DIR = $(abspath ../../) +BACKEND_DIR = $(ROOT_DIR)/backend +COCOTB_DIR = $(abspath ../) +TB_DIR = $(abspath ./) +include $(ROOT_DIR)/Makefile.base +VBUILD_DIR = $(BACKEND_DIR)/build +VSRC_DIR = $(BACKEND_DIR)/verilog + +TARGET = RawSimpleDmaController +TOP_MODULE = mk$(TARGET) +TOP_FILE = $(TOP_MODULE).v +VLOG_FILE = $(TB_DIR)/$(TOP_FILE) + +TB_CASE = dma_simple_wr_rd +TB_FILE = $(TB_CASE)_tb.py +DATE = $(shell date "+%Y%m%d") +LOG_FILE = $(TB_DIR)/log/$(DATE)_$(TOP_MODULE).log + +cocotb:clean verilog prepare run + +verilog: + cd $(BACKEND_DIR) && make verilog TOPMODULE=$(TOP_MODULE) + +prepare: + rm -rf $(VLOG_FILE) + bluetcl $(BACKEND_DIR)/listVlogFiles.tcl -bdir $(VBUILD_DIR) -vdir $(VSRC_DIR) $(TOP_MODULE) $(TOP_MODULE) | grep -i '\.v' | xargs -I {} cat {} >> $(VLOG_FILE) + sed -i '1i `timescale 1ns/1ps' $(VLOG_FILE) + +run: + cd $(TB_DIR) + mkdir -p log + python3 $(TB_FILE) 2>&1 | tee $(LOG_FILE) + +clean: + cd $(BACKEND_DIR) && make clean + cd $(TB_DIR) && rm -rf $(VLOG_FILE) __pycache__ .pytest_cache sim_build *.log + \ No newline at end of file diff --git a/cocotb/simple_write_read/dma_simple_wr_rd_tb.py b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py new file mode 100644 index 0000000..7b9b76c --- /dev/null +++ b/cocotb/simple_write_read/dma_simple_wr_rd_tb.py @@ -0,0 +1,85 @@ +from bdmatb import BdmaSimpleTb +import os +import random + +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock + +import cocotb_test.simulator + +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +tests_dir = os.path.dirname(__file__) +rtl_dir = tests_dir +test_num = 1 + + +async def single_path_random_write_test(pcie_tb, dma_channel, mem): + for _ in range(test_num): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice( + 'abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + data = char * length + await pcie_tb.run_single_write_once(dma_channel, addr, data) + await Timer(200+length, units='ns') + assert mem[addr:addr+length] == data + + +async def single_path_random_read_test(pcie_tb, dma_channel, mem): + for _ in range(test_num): + addr, length = pcie_tb.gen_random_req(dma_channel) + addr = mem.get_absolute_address(addr) + char = bytes(random.choice( + 'abcdefghijklmnopqrstuvwxyz'), encoding="UTF-8") + mem[addr:addr+length] = char * length + data = await pcie_tb.run_single_read_once(dma_channel, addr, length) + await Timer(200+length, units='ns') + assert data == char * length + + +@cocotb.test(timeout_time=10000000, timeout_unit="ns") +async def bar_test(dut): + tb = BdmaSimpleTb(dut) + await tb.gen_reset() + + await tb.rc.enumerate() + dev = tb.rc.find_device(tb.dev.functions[0].pcie_id) + + await dev.enable_device() + await dev.set_master() + + dev_bar0 = dev.bar_window[0] + + tb.conbine_bar(dev_bar0) + await tb.memory_map() + + mem = tb.rc.mem_pool.alloc_region(1024*1024) + await single_path_random_read_test(tb, 0, mem) + + +def test_dma(): + dut = "mkRawSimpleDmaController" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v") + ] + + sim_build = os.path.join(tests_dir, "sim_build", dut) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + timescale="1ns/1ps", + sim_build=sim_build + ) + + +if __name__ == "__main__": + test_dma() diff --git a/img/StreamSplit.drawio.svg b/img/StreamSplit.drawio.svg new file mode 100644 index 0000000..4ae2123 --- /dev/null +++ b/img/StreamSplit.drawio.svg @@ -0,0 +1,424 @@ + + + + + + + + + +
+
+
+ InputFifo +
+
+
+
+ + InputFifo + +
+
+ + + + + + +
+
+
+ getFrameSize +
+
+
+
+ + getFrameSize + +
+
+ + + + + + +
+
+
+ assertSplit +
+
+
+
+ + assertSplit + +
+
+ + + + + + + + +
+
+
+ dataSplit +
+
+
+
+ + dataSplit + +
+
+ + + + + + +
+
+
+ remainStream +
+
+
+
+ + remainStream + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ is Split Location in this frame? +
+
+
+
+ + is Split Location in this f... + +
+
+ + + + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + + + +
+
+
+ Last +
+
+
+
+ + Last + +
+
+ + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + + + +
+
+
+ ConcatStream +
+
+
+
+ + ConcatStream + +
+
+ + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ ① +
+
+
+
+ + ① + +
+
+ + + + +
+
+
+ ② +
+
+
+
+ + ② + +
+
+ + + + +
+
+
+ ③ +
+
+
+
+ + ③ + +
+
+ + + + + + +
+
+
+ Remain +
+
+
+
+ + Remain + +
+
+ + + + +
+
+
+ OutputFifo +
+
+
+
+ + OutputFifo + +
+
+ + + + +
+
+
+ ④ +
+
+
+
+ + ④ + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/chunkSplit.drawio.svg b/img/chunkSplit.drawio.svg new file mode 100644 index 0000000..5cc7635 --- /dev/null +++ b/img/chunkSplit.drawio.svg @@ -0,0 +1,560 @@ + + + + + + + + + +
+
+
+ FifoIn#(DmaRequest) +
+
+
+
+ + FifoIn#(DmaRequest) + +
+
+ + + + + + + + +
+
+
+ getFirstChunkLen +
+
+
+
+ + getFirstChunkLen + +
+
+ + + + +
+
+
+
    +
  • + startAddr +
  • +
  • + length +
  • +
+
+
+
+
+ + startAddrlength + +
+
+ + + + +
+
+
+ 4KB +
+
+
+
+ + 4KB + +
+
+ + + + +
+
+
+ 4KB +
+
+
+
+ + 4KB + +
+
+ + + + +
+
+
+ 4KB +
+
+
+
+ + 4KB + +
+
+ + + + +
+
+
+ offset +
+
+
+
+ + offset + +
+
+ + + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ firstChunk +
+
+
+
+ + firstChunk + +
+
+ + + + +
+
+
+ 512 +
+ √ +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ × +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ 512 +
+ × +
+
+
+
+ + 512... + +
+
+ + + + +
+
+
+ ... +
+
+
+
+ + ... + +
+
+ + + + +
+
+
+ ... +
+
+
+
+ + ... + +
+
+ + + + + + + +
+
+
+ FifoIn#(DataStream) +
+
+
+
+ + FifoIn#(DataStream) + +
+
+ + + + + + +
+
+
+ streamSplit +
+
+
+
+ + streamSplit + +
+
+ + + + + + + + +
+
+
+ chunkSplit +
+
+
+
+ + chunkSplit + +
+
+ + + + +
+
+
+ split the first chunk +
+
+
+
+ + split the first chunk + +
+
+ + + + +
+
+
+ split of MaxSizePayload +
+
+
+
+ + split of MaxSizePayload + +
+
+ + + + +
+
+
+ only need tag isLast +
+ MPS must be n*512 +
+
+
+
+ + only need tag isLast... + +
+
+ + + + +
+
+
+ firstChunkLen < MPS +
+
+
+
+ + firstChunkLen < MPS + +
+
+ + + + +
+
+
+ FifoOut#(DataStream) +
+
+
+
+ + FifoOut#(DataStream) + +
+
+ + + + +
+
+
+ FifoOut#(DmaRequest) +
+
+
+
+ + FifoOut#(DmaRequest) + +
+
+ + + + +
+
+
+ latency=0 +
+
+
+
+ + latency=0 + +
+
+ + + + +
+
+
+ latency=3 +
+
+
+
+ + latency=3 + +
+
+ + + + +
+
+
+ latency=0 +
+
+
+
+ + latency=0 + +
+
+ + + + +
+
+
+ latency=1 +
+
+
+
+ + latency=1 + +
+
+ + + + +
+
+
+ latency=1 +
+
+
+
+ + latency=1 + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/completer.drawio.svg b/img/completer.drawio.svg new file mode 100644 index 0000000..f9f2ce6 --- /dev/null +++ b/img/completer.drawio.svg @@ -0,0 +1,384 @@ + + + + + + + + + + +
+
+
+ rawPcieCompleterRequest +
+
+
+
+ + rawPcieCompleterRequest + +
+
+ + + + +
+
+
+ DataStream#(PcieAxiStream) +
+
+
+
+ + DataStream#(PcieAxiStream) + +
+
+ + + + + + +
+
+
+ DataStream#(CQSideBand) +
+
+
+
+ + DataStream#(CQSideBand) + +
+
+ + + + + + +
+
+
+ CsrWriteReq +
+
+
+
+ + CsrWriteReq + +
+
+ + + + + + +
+
+
+ CsrReadReq +
+
+
+
+ + CsrReadReq + +
+
+ + + + + + +
+
+
+ CQDescripter +
+
+
+
+ + CQDescripter + +
+
+ + + + +
+
+
+ CompleterRequest +
+
+
+
+ + CompleterRequest + +
+
+ + + + + + + + +
+
+
+ parse +
+
+
+
+ + parse + +
+
+ + + + + + + + +
+
+
+ CQDescripterFIFO +
+
+
+
+ + CQDescripterFIFO + +
+
+ + + + +
+
+
+ rawPcieCompleterComplete +
+
+
+
+ + rawPcieCompleterComplete + +
+
+ + + + +
+
+
+ DmaHostToCardWr +
+
+
+
+ + DmaHostToCardWr + +
+
+ + + + + + +
+
+
+ DmaHostToCardRd +
+
+
+
+ + DmaHostToCardRd + +
+
+ + + + + +
+
+
+ CompleterComplete +
+
+
+
+ + CompleterComplete + +
+
+ + + + +
+
+
+ DataStream#(PcieAxiStream) +
+
+
+
+ + DataStream#(PcieAxiStream) + +
+
+ + + + + + +
+
+
+ DataStream#(CCSideBand) +
+
+
+
+ + DataStream#(CCSideBand) + +
+
+ + + + + + +
+
+
+ generate +
+
+
+
+ + generate + +
+
+ + + + + + +
+
+
+ assert +
+
+
+
+ + assert + +
+
+ + + + + + +
+
+
+ CsrReadResp +
+
+
+
+ + CsrReadResp + +
+
+ + + + + +
+
+
+ CQ_credit +
+
+
+
+ + CQ_credit + +
+
+ + + + +
+
+
+ counter +
+
+
+
+ + counter + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/dmac.drawio.svg b/img/dmac.drawio.svg new file mode 100644 index 0000000..0a7cc01 --- /dev/null +++ b/img/dmac.drawio.svg @@ -0,0 +1,1130 @@ + + + + + + + +
+
+
+ wrData +
+
+
+
+ + wrData + +
+
+ + + + +
+
+
+ wrReq +
+
+
+
+ + wrReq + +
+
+ + + + +
+
+
+ + DmaC2HWrite + +
+
+
+
+ + DmaC2HWrite + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ rdData +
+
+
+
+ + rdData + +
+
+ + + + +
+
+
+ rdReq +
+
+
+
+ + rdReq + +
+
+ + + + +
+
+
+ + DmaC2HRead + +
+
+
+
+ + DmaC2HRead + +
+
+ + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ + TX + +
+ RequsterPcieAdapter +
+
+
+
+ + TX... + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ + RX + +
+ CompleterPcieAdapter +
+
+
+
+ + RX... + +
+
+ + + + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + +
+
+
+ wrData +
+
+
+
+ + wrData + +
+
+ + + + +
+
+
+ wrReq +
+
+
+
+ + wrReq + +
+
+ + + + +
+
+
+ + DmaC2HWrite + +
+
+
+
+ + DmaC2HWrite + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ rdData +
+
+
+
+ + rdData + +
+
+ + + + +
+
+
+ rdReq +
+
+
+
+ + rdReq + +
+
+ + + + +
+
+
+ + DmaC2HRead + +
+
+
+
+ + DmaC2HRead + +
+
+ + + + +
+
+
+ StraddleStream +
+
+
+
+ + StraddleStream + +
+
+ + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ PATH0 +
+
+
+
+ + PATH0 + +
+
+ + + + +
+
+
+ PATH1 +
+
+
+
+ + PATH1 + +
+
+ + + + + + + + + + +
+
+
+ AxiStreamMaster +
+
+
+
+ + AxiStreamMaster + +
+
+ + + + +
+
+
+ AxiStreamSlave +
+
+
+
+ + AxiStreamSlave + +
+
+ + + + +
+
+
+ RqDescriptor +
+
+
+
+ + RqDescriptor + +
+
+ + + + +
+
+
+ Payload +
+
+
+
+ + Payload + +
+
+ + + + +
+
+
+ RcDescriptor +
+
+
+
+ + RcDescriptor + +
+
+ + + + +
+
+
+ Payload +
+
+
+
+ + Payload + +
+
+ + + + +
+
+
+ User Interface +
+ All are pipelined +
+
+
+
+ + User Interface... + +
+
+ + + + +
+
+
+ Raw Xilinx Pcie IP Interface +
+
+
+
+ + Raw Xilinx Pcie... + +
+
+ + + + +
+
+
+ wrData +
+
+
+
+ + wrData + +
+
+ + + + +
+
+
+ wrReq +
+
+
+
+ + wrReq + +
+
+ + + + +
+
+
+ + ChunkSplit + +
+
+
+
+ + ChunkSplit + +
+
+ + + + +
+
+
+ chunkData +
+
+
+
+ + chunkData + +
+
+ + + + +
+
+
+ chunkReq +
+
+
+
+ + chunkReq + +
+
+ + + + +
+
+
+ + Add Descriptor +
+ Align To DWord +
+
+
+
+
+
+ + Add Descriptor... + +
+
+ + + + +
+
+
+ Data +
+
+
+
+ + Data + +
+
+ + + + +
+
+
+ sideBand +
+
+
+
+ + sideBand + +
+
+ + + + + + + +
+
+
+ rdReq +
+
+
+
+ + rdReq + +
+
+ + + + +
+
+
+ + ChunkSplit + +
+
+
+
+ + ChunkSplit + +
+
+ + + + +
+
+
+ chunkReq +
+
+
+
+ + chunkReq + +
+
+ + + + +
+
+
+ + Add Descriptor +
+
+
+
+
+
+ + Add Descriptor + +
+
+ + + + +
+
+
+ Data +
+
+
+
+ + Data + +
+
+ + + + + +
+
+
+ Straddle +
+
+
+
+ + Straddle + +
+
+ + + + +
+
+
+ + Reserve Tag +
+
+
+
+
+
+ + Reserve Tag + +
+
+ + + + +
+
+
+ + remove header +
+
+
+
+
+
+ + remove header + +
+
+ + + + +
+
+
+ + Reorder +
+
+
+
+
+
+ + Reorder + +
+
+ + + + +
+
+
+ chunkData +
+
+
+
+ + chunkData + +
+
+ + + + +
+
+
+ + ChunkReshape + +
+
+
+
+ + ChunkReshape + +
+
+ + + + +
+
+
+ rdData +
+
+
+
+ + rdData + +
+
+ + + + + + +
+
+
+ reqFifo +
+
+
+
+ + reqFifo + +
+
+ + + + +
+
+
+ + MPS + +
+
+
+
+ + MPS + +
+
+ + + + +
+
+
+ + MRRS + +
+
+
+
+ + MRRS + +
+
+ + + + +
+
+
+ + RCB + +
+
+
+
+ + RCB + +
+
+ + + + +
+
+
+ + + TX + + +
+ DataPath0 +
+
+
+
+ + TX... + +
+
+ + + + +
+
+
+ + + RX + + +
+ DataPath0 +
+
+
+
+ + RX... + +
+
+ + + + + + +
+
+
+ PathNum = 2 +
+ StraddleNum = 2 +
+
+
+
+ + PathNum = 2... + +
+
+ +
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/requester.drawio.svg b/img/requester.drawio.svg new file mode 100644 index 0000000..be971c5 --- /dev/null +++ b/img/requester.drawio.svg @@ -0,0 +1,997 @@ + + + + + + + + + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + + + +
+
+
+ Request +
+
+
+
+ + Request + +
+
+ + + + + + + + + + +
+
+
+ + convertToAxis + +
+
+
+
+ + convertToAxis + +
+
+ + + + +
+
+
+ RawPcieRequeser +
+
+
+
+ + RawPcieRequeser + +
+
+ + + + + +
+
+
+ data +
+
+
+
+ + data + +
+
+ + + + + +
+
+
+ req +
+
+
+
+ + req + +
+
+ + + + +
+
+
+ ChunkSplit +
+
+
+
+ + ChunkSplit + +
+
+ + + + +
+
+
+ isDone +
+
+
+
+ + isDone + +
+
+ + + + + + +
+
+
+ DataStream +
+
+
+
+ + DataStream + +
+
+ + + + + + +
+
+
+ Request +
+
+
+
+ + Request + +
+
+ + + + +
+
+
+ isDone +
+
+
+
+ + isDone + +
+
+ + + + +
+
+
+ + dsFifoInA + +
+
+
+
+ + dsFifoInA + +
+
+ + + + +
+
+
+ + tData + +
+
+
+
+ + tData + +
+
+ + + + +
+
+
+ + tKeep + +
+
+
+
+ + tKeep + +
+
+ + + + +
+
+
+ + tUser + +
+
+
+
+ + tUser + +
+
+ + + + +
+
+
+ + tLast + +
+
+
+
+ + tLast + +
+
+ + + + +
+
+
+ straddle 1 +
+
+
+
+ + straddle 1 + +
+
+ + + + +
+
+
+ straddle 0 +
+
+
+
+ + straddle 0 + +
+
+ + + + +
+
+
+ StreamA/B last beat bytePtr > 256bit +
+
+
+
+ + StreamA/B last beat bytePtr > 256bit + +
+
+ + + + +
+
+
+ Only singe streamA/B +
+
+
+
+ + Only singe streamA/B + +
+
+ + + + +
+
+
+ StreamA +
+
+
+
+ + StreamA + +
+
+ + + + +
+
+
+ StreamB +
+
+
+
+ + StreamB + +
+
+ + + + +
+
+
+ + x + +
+
+
+
+ + x + +
+
+ + + + +
+
+
+ + x + +
+
+
+
+ + x + +
+
+ + + + +
+
+
+ StreamA/B last beat bytePtr +
+ +remainPtr <= 256bit +
+
+
+
+ + StreamA/B last beat bytePtr... + +
+
+ + + + +
+
+
+ The other stream first +
+
+
+
+ + The other stream first + +
+
+ + + + +
+
+
+ Remain Data +
+
+
+
+ + Remain Data + +
+
+ + + + +
+
+
+ StreamA/B last beat bytePtr+remainPtr > 256bit +
+
+
+
+ + StreamA/B last beat bytePt... + +
+
+ + + + +
+
+
+ Remain Data +
+
+
+
+ + Remain Data + +
+
+ + + + +
+
+
+ Remain Data +
+
+
+
+ + Remain... + +
+
+ + + + + +
+
+
+ + isSop +
+ isEop +
+
+
+
+
+
+ + isSop... + +
+
+ + + + + + +
+
+
+ Same as above +
+
+
+
+ + Same as above + +
+
+ + + + +
+
+
+ + remainA + +
+
+
+
+ + remainA + +
+
+ + + + +
+
+
+ latency=5 +
+
+
+
+ + latency=5 + +
+
+ + + + +
+
+
+ + straddle 1 + +
+
+
+
+ + straddle 1 + +
+
+ + + + +
+
+
+ + straddle 0 + +
+
+
+
+ + straddle 0 + +
+
+ + + + +
+
+
+ 首先,如果存在remainStream,先拼接new coming dataStream与remainStream +
+
+ 1. 没有isSop与isEop:无数据或一整个完整数据 +
+
+ + 2. 只有一个isSop,没有isEop:第一帧且还有后续数据 + +
+
+ + 3. 只有一个isEop,没有isSop:最后一帧,另一个通道没有数据或者这个最后一帧长度大于256无法拼接 + +
+
+ + 4. 一个isSop,一个isEop:①一个短报文,长度大于256或者另一个通道没有数据;②一个通道结尾加另一个通道开头,通道一最后一帧,长度小于256,可以拼接另一个通道数据,可能会产生余数 + +
+
+ + 5. 一个isSop, 两个isEop:一个通道<256的结束,另一个通道开头,可能会产生遗留数据 + +
+
+ + 6. 两个isSop,一个isEop:一个通道有一个<256的短报文,另一个通道有 + +
+
+ + 7. 两个isSop,两个isEop + +
+
+
+
+
+ + 首先,如果存在remainStream,先拼接new coming dataStream与remainStream... + +
+
+ + + + +
+
+
+ + dsFifoInB + +
+
+
+
+ + dsFifoInB + +
+
+ + + + +
+
+
+ + remainB + +
+
+
+
+ + remainB + +
+
+ + + + + + + + + + + + + + + + + + + +
+
+
+ StreamShift +
+
+
+
+ + StreamShift + +
+
+ + + + + + + + + + + + + + + +
+
+
+ StreamShift +
+
+
+
+ + StreamShift + +
+
+ + + + + + + + + + + + + + +
+
+
+
+ + If 只有一个有数据 直接生成 + +
+
+ else if 两个都有数据 +
+
+ + + 先填充正在发送的,没有的话选一个 +
+
+
+ + + If 正在发送的byteEn[256] == 0 +
+
+
+ + + + 下半部分 + + 填另一个数据流的shift +
+
+
+ + + else 全部填充正在发送的 +
+
+
+
+
+
+ + If 只有一个有数据 直接生成... + +
+
+ + + + +
+
+
+ 有isFirst,就转换为isSop +
+ 有isLast,就转换为isEop +
+
+
+
+ + 有isFirst,就转换为isSop... + +
+
+ + + + + + +
+
+
+ StreamShiftToAlignDw +
+
+
+
+ + StreamShiftToAlignDw + +
+
+ + + + + + + +
+
+
+ first/lastByteEn +
+
+
+
+ + first/lastByteEn + +
+
+ + + + +
+
+
+ RqDescriptorGenerator +
+
+
+
+ + RqDescriptorGenerator + +
+
+ + + + + +
+
+
+ + DataStream + +
+
+
+
+ + DataStream + +
+
+ + + + +
+
+
+ bitOr +
+
+
+
+ + bitOr + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/img/requesterCmpl.drawio.svg b/img/requesterCmpl.drawio.svg new file mode 100644 index 0000000..2b1c844 --- /dev/null +++ b/img/requesterCmpl.drawio.svg @@ -0,0 +1,2744 @@ + + + + + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + + +
+
+
+ request +
+
+
+
+ + request + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ ideal response +
+
+
+
+ + ideal response + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ real RC AXIS +
+
+
+
+ + real RC AXIS + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ LSB +
+
+
+
+ + LSB + +
+
+ + + + + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + + + + + + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + +
+
+
+ Completion Buffer +
+
+
+
+ + Completion Buffer + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + + +
+
+
+ request +
+
+
+
+ + request + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ ideal response +
+
+
+
+ + ideal response + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ real RC AXIS +
+
+
+
+ + real RC AXIS + +
+
+ + + + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ LSB +
+
+
+
+ + LSB + +
+
+ + + + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ + PathA + +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + + + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ FIFO0 +
+
+
+
+ + FIFO0 + +
+
+ + + + + + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ FIFO1 +
+
+
+
+ + FIFO1 + +
+
+ + + + + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ FIFO... +
+
+
+
+ + FIFO... + +
+
+ + + + + + + + + +
+
+
+ FIFO31 +
+
+
+
+ + FIFO31 + +
+
+ + + + + + +
+
+
+ Reg#(SlotNum) nextSlotPtrReg; +
+
+
+
+ + Reg#(SlotNum) nextSlotPtrReg; + +
+
+ + + + +
+
+
+ CompletionBuffer.put(tuple2(tag, nextSlotPtrReg) +
+
+
+
+ + CompletionBuffer.put(tuple2(tag, nextSlotPtrR... + +
+
+ + + + +
+
+
+ slotNum = CompletionBuffer.drain +
+ Fifos[slotNum].deq; +
+
+
+
+ + slotNum = CompletionBuffer.drain... + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + + +
+
+
+ request +
+
+
+
+ + request + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ ideal response +
+
+
+
+ + ideal response + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ real RC AXIS +
+
+
+
+ + real RC AXIS + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ LSB +
+
+
+
+ + LSB + +
+
+ + + + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + + + +
+
+
+ A3(2) +
+
+
+
+ + A3(2) + +
+
+ + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + + + +
+
+
+ A3(1) +
+
+
+
+ + A3(1) + +
+
+ + + + + + + + +
+
+
+ B1 +
+
+
+
+ + B1 + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ firstBeat +
+
+
+
+ + firstBeat + +
+
+ + + + +
+
+
+ PathA +
+
+
+
+ + PathA + +
+
+ + + + +
+
+
+ PathB +
+
+
+
+ + PathB + +
+
+ + + + +
+
+
+ A1 +
+
+
+
+ + A1 + +
+
+ + + + +
+
+
+ A2 +
+
+
+
+ + A2 + +
+
+ + + + + + +
+
+
+ A3 +
+
+
+
+ + A3 + +
+
+ + + + +
+
+
+ A4 +
+
+
+
+ + A4 + +
+
+ + + + + +
+
+
+ request complete +
+
+
+
+ + request complete + +
+
+ + + + + +
+
+
+ Stream +
+
+
+
+ + Stream + +
+
+ + + + +
+
+
+ SLOTNUM : MRRS BRAM +
+
+
+
+ + SLOTNUM : MRRS BRAM + +
+
+ + + + +
+
+
+ 2: Data +
+
+
+
+ + 2: Data + +
+
+ + + + +
+
+
+ 3: +
+
+
+
+ + 3: + +
+
+ + + + +
+
+
+ 4: +
+
+
+
+ + 4: + +
+
+ + + + +
+
+
+ Completion Buffer +
+
+
+
+ + Completion Buffer + +
+
+ + + + + + +
+
+
+ PcieAxiStream +
+
+
+
+ + PcieAxiStream + +
+
+ + + + +
+
+
+ StreamShiftRight +
+
+
+
+ + StreamShiftRight + +
+
+ + + + + + + + +
+
+
+ Extract data according to isSop and isEop +
+ Dispatch to path A/B +
+
+
+
+ + Extract data according... + +
+
+ + + + +
+
+
+ StreamShiftRight +
+
+
+
+ + StreamShiftRight + +
+
+ + + + + + +
+
+
+ StraddleStreamFifo +
+
+
+
+ + StraddleStreamFifo + +
+
+ + + + + + +
+
+
+ StraddleStreamFifo +
+
+
+
+ + StraddleStreamFifo + +
+
+ + + + + +
+
+
+ reserve +
+
+
+
+ + reserve + +
+
+ + + + + + +
+
+
+ ChunkCompute +
+
+
+
+ + ChunkCompute + +
+
+ + + + + + +
+
+
+ DescriptorGen +
+
+
+
+ + DescriptorGen + +
+
+ + + + + +
+
+
+ Tag +
+
+
+
+ + Tag + +
+
+ + + + + +
+
+
+ drain +
+
+
+
+ + drain + +
+
+ + + + +
+
+
+ CBuffer +
+
+
+
+ + CBuffer + +
+
+ + + + + + +
+
+
+ RdReq +
+
+
+
+ + RdReq + +
+
+ + + + +
+
+
+ AxiStreamTx +
+
+
+
+ + AxiStreamTx + +
+
+ + + + + + +
+
+
+ AxiStreamRx +
+
+
+
+ + AxiStreamRx + +
+
+ + + + + +
+
+
+ Tag +
+
+
+
+ + Tag + +
+
+ + + + + +
+
+
+ TLp Payload +
+
+
+
+ + TLp Payload + +
+
+ + + + +
+
+
+ removeDesc +
+
+
+
+ + removeDesc + +
+
+ + + + + + +
+
+
+ SLOTNUM +
+
+
+
+ + SLOTNUM + +
+
+ + + + +
+
+
+ token -> tag +
+ data -> slotNum +
+
+
+
+ + token -> tag... + +
+
+
+ + + + + Text is not SVG - cannot display + + + +
\ No newline at end of file diff --git a/lib/blue_wrapper b/lib/blue_wrapper new file mode 160000 index 0000000..0845f36 --- /dev/null +++ b/lib/blue_wrapper @@ -0,0 +1 @@ +Subproject commit 0845f36b1dd60b90ba5b9163ed8ca37f493f3355 diff --git a/run b/run new file mode 100755 index 0000000..86169fa --- /dev/null +++ b/run @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +BASH_PROFILE=$HOME/.bash_profile +if [ -f "$BASH_PROFILE" ]; then + source $BASH_PROFILE +fi + +TEST_DIR=`realpath ./test` +LOG_DIR=`realpath ./tmp` +ALL_LOG=$TEST_DIR/run.log + +mkdir -p $LOG_DIR + +make -j8 -f Makefile.test all TESTDIR=$TEST_DIR LOGDIR=$LOG_DIR +cat $LOG_DIR/*.log | tee $ALL_LOG + +FAIL_KEYWORKS='Error\|ImmAssert' +grep -w $FAIL_KEYWORKS $LOG_DIR/*.log | cat +ERR_NUM=`grep -c -w $FAIL_KEYWORKS $ALL_LOG | cat` +if [ $ERR_NUM -gt 0 ]; then + echo "FAIL" + false +else + echo "PASS" +fi diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..86169fa --- /dev/null +++ b/run.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +BASH_PROFILE=$HOME/.bash_profile +if [ -f "$BASH_PROFILE" ]; then + source $BASH_PROFILE +fi + +TEST_DIR=`realpath ./test` +LOG_DIR=`realpath ./tmp` +ALL_LOG=$TEST_DIR/run.log + +mkdir -p $LOG_DIR + +make -j8 -f Makefile.test all TESTDIR=$TEST_DIR LOGDIR=$LOG_DIR +cat $LOG_DIR/*.log | tee $ALL_LOG + +FAIL_KEYWORKS='Error\|ImmAssert' +grep -w $FAIL_KEYWORKS $LOG_DIR/*.log | cat +ERR_NUM=`grep -c -w $FAIL_KEYWORKS $ALL_LOG | cat` +if [ $ERR_NUM -gt 0 ]; then + echo "FAIL" + false +else + echo "PASS" +fi diff --git a/run_one.sh b/run_one.sh new file mode 100755 index 0000000..ad1eac1 --- /dev/null +++ b/run_one.sh @@ -0,0 +1,37 @@ +#! /usr/bin/env bash + +set -o errexit +set -o nounset +set -o xtrace + +BASH_PROFILE=$HOME/.bash_profile +if [ -f "$BASH_PROFILE" ]; then + source $BASH_PROFILE +fi + +TEST_LOG=run.log +TEST_DIR=test +cd $TEST_DIR +truncate -s 0 $TEST_LOG +FILES=`ls TestStreamUtils.bsv` +########################################################################### +for FILE in $FILES; do + # echo $FILE + TESTCASES=`grep -Phzo 'doc.*?\nmodule\s+\S+(?=\()' $FILE | xargs -0 -I {} echo "{}" | grep module | cut -d ' ' -f 2` + for TESTCASE in $TESTCASES; do + make -j8 TESTFILE=$FILE TOPMODULE=$TESTCASE 2>&1 | tee -a $TEST_LOG + done +########################################################################### +done +########################################################################### + +FAIL_KEYWORKS='Error\|ImmAssert' +grep -w $FAIL_KEYWORKS $TEST_LOG | cat +ERR_NUM=`grep -c -w $FAIL_KEYWORKS $TEST_LOG | cat` +if [ $ERR_NUM -gt 0 ]; then + echo "FAIL" + false +else + echo "PASS" +fi + diff --git a/script.py b/script.py new file mode 100644 index 0000000..365f2d7 --- /dev/null +++ b/script.py @@ -0,0 +1,7 @@ +for i in range(64): + s = ['0'] * 64 + s[63-i] = '1' + if (i > 0): + s[63-i+1:63] = '?' * i + s = ''.join(s) + print("%s" % s) \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..137cded --- /dev/null +++ b/setup.sh @@ -0,0 +1,22 @@ +#! /bin/sh + +set -o errexit +set -o nounset +set -o xtrace + +rm -rf bsc-* +# wget https://github.com/B-Lang-org/bsc/releases/download/2022.01/bsc-2022.01-ubuntu-20.04.tar.gz +wget https://github.com/B-Lang-org/bsc/releases/download/2023.01/bsc-2023.01-ubuntu-22.04.tar.gz +tar zxf bsc-* + +BSC_FILE_NAME=`ls bsc-*.tar.gz` +BSC_DIR_NAME=`basename $BSC_FILE_NAME .tar.gz` +BLUESPEC_HOME=`realpath $BSC_DIR_NAME` + +BASH_PROFILE=$HOME/.bash_profile +touch $BASH_PROFILE +cat <> $BASH_PROFILE +# BSV required env +export BLUESPECDIR=$BLUESPEC_HOME/lib +export PATH=$PATH:$BLUESPEC_HOME/bin +EOF diff --git a/src/XilBdmaCompletionFifo.bsv b/src/XilBdmaCompletionFifo.bsv new file mode 100644 index 0000000..2103421 --- /dev/null +++ b/src/XilBdmaCompletionFifo.bsv @@ -0,0 +1,241 @@ +import GetPut::*; +import Counter::*; +import FIFOF::*; +import BRAMFIFO::*; +import Vector::*; +import DReg::*; +import Connectable::*; +import BRAM :: *; +import XilBdmaCpltBufferCf :: *; +import XilBdmaPrioritySearchBuffer :: *; + +import SemiFifo::*; + + + +typedef struct { + Bit#(TLog#(nChunk)) curChunkIdx; +} CpltFifoMetaEntry#(numeric type nChunk) deriving (Bits, FShow); + +// CompletionFifo +// +// A CompletionFifo is like a CompletionBuffer +// but uses Fifos instead of RegFile. +// CompletionFifo can reorder interlaced chunks belong to different streams. + +// Example +// reserve a token : slot = CRam.reserve.get; +// receive a chunk : CRam.append.enq(tuple2(slot, chunk)); +// all chunks received: CRam.complete.put(slot); +// get chunks in order: CRam.drain.first; CRam.drain.deq; + +// Parameters: +// nSlot : slot numbers, should be less than 16 in current version +// nChunk: chunk numbers per slot, a large value may cause bad timing +// tChunk: chunk data types +interface CompletionFifo#(numeric type nSlot, numeric type nChunk, type tChunk); + interface Get#(SlotNum#(nSlot)) reserve; + method Bool available; + interface FifoIn#(Tuple3#(SlotNum#(nSlot), tChunk, Bool)) append; + interface FifoOut#(tChunk) drain; +endinterface + +typedef Bit#(TLog#(nSlot)) SlotNum#(numeric type nSlot); +typedef Bit#(TAdd#(TLog#(nSlot), TLog#(nChunk))) CpltFifoInternalBufferAddress#(numeric type nSlot, numeric type nChunk); + +function Bool isPowerOf2(Integer n); + return (n == (2 ** (log2(n)))); +endfunction + +module mkCompletionFifo(CompletionFifo#(nSlot, nChunk, tChunk)) + provisos ( + Bits#(tChunk, szChunk), + Add#(1, _a, szChunk), + Alias#(CpltFifoInternalBufferAddress#(nSlot, nChunk), tStorageAddr), + FShow#(tChunk) + ); + + + let maxSlotIdx = fromInteger(valueOf(nSlot) - 1); + + FIFOF#(Tuple3#(SlotNum#(nSlot), tChunk, Bool)) appendFifo <- mkFIFOF; + FIFOF#(tChunk) drainFifo <- mkFIFOF; + + + BRAM2Port#(SlotNum#(nSlot), CpltFifoMetaEntry#(nChunk)) metaStorage <- mkBRAM2Server(defaultValue); + + BRAM2Port#(tStorageAddr, tChunk) chunkStorage <- mkBRAM2Server(defaultValue); + + CompletionBuf#(nSlot, CpltFifoMetaEntry#(nChunk)) cpltFlagBuffer <- mkCompletionBuf; + + Counter#(TAdd#(1, TLog#(nSlot))) counter <- mkCounter(0); // number of filled slots + PrioritySearchBuffer#(6, SlotNum#(nSlot), CpltFifoMetaEntry#(nChunk)) storageForwardBuffer <- mkPrioritySearchBuffer(6); + + // Pipeline FIFOs: + FIFOF#(Tuple3#(SlotNum#(nSlot), tChunk, Bool)) handleMeatStorageRespPipelineQueue <- mkSizedFIFOF(4); + FIFOF#(CpltFifoMetaEntry#(nChunk)) pendingOutputPipelineQueue <- mkFIFOF; + + Reg#(Bool) bramInitedReg <- mkReg(False); + Reg#(SlotNum#(nSlot)) bramInitIdxReg <- mkReg(0); + + rule initBram if (!bramInitedReg); + let initMeta = CpltFifoMetaEntry { + curChunkIdx: 0 + }; + + let bramReqForMeta = BRAMRequest { + write : True, + responseOnWrite : False, + address : bramInitIdxReg, + datain : initMeta + }; + metaStorage.portB.request.put(bramReqForMeta); + bramInitIdxReg <= bramInitIdxReg + 1; + + if (bramInitIdxReg == maxBound) begin + bramInitedReg <= True; + end + endrule + + rule forwardDrain; + cpltFlagBuffer.deq; + pendingOutputPipelineQueue.enq(cpltFlagBuffer.first); + endrule + + rule handleWriteStepOne if (bramInitedReg); + let {slot, data, isAllCplt} = appendFifo.first; + appendFifo.deq; + + let bramReq = BRAMRequest { + write : False, + responseOnWrite : False, + address : slot, + datain : ? + }; + metaStorage.portA.request.put(bramReq); + handleMeatStorageRespPipelineQueue.enq(tuple3(slot, data, isAllCplt)); + + // $display( + // "time=%0t", $time, "mkCompletionFifo handleWriteStepOne", + // ", slot=", fshow(slot), + // ", isAllCplt=", fshow(isAllCplt), + // ", data=", fshow(data) + // ); + + endrule + + rule handleMetaStorageResp if (bramInitedReg); + let {slot, data, isAllCplt} = handleMeatStorageRespPipelineQueue.first; + handleMeatStorageRespPipelineQueue.deq; + + let meta <- metaStorage.portA.response.get; + let metaFromForwardMaybe <- storageForwardBuffer.search(slot); + if (metaFromForwardMaybe matches tagged Valid .forwardMeta) begin + meta = forwardMeta; + end + + + tStorageAddr storageAddr = unpack({pack(slot), pack(meta.curChunkIdx)}); + + let writeBackMeta = CpltFifoMetaEntry { + curChunkIdx: isAllCplt ? 0 : meta.curChunkIdx + 1 + }; + + let bramReqForMeta = BRAMRequest { + write : True, + responseOnWrite : False, + address : slot, + datain : writeBackMeta + }; + metaStorage.portB.request.put(bramReqForMeta); + storageForwardBuffer.enq(slot, writeBackMeta); + + let bramReqForChunk = BRAMRequest { + write : True, + responseOnWrite : False, + address : storageAddr, + datain : data + }; + chunkStorage.portB.request.put(bramReqForChunk); + + + if (isAllCplt) begin + cpltFlagBuffer.complete(tuple2(slot, meta)); + end + + // $display( + // "time=%0t", $time, "mkCompletionFifo handleMetaStorageResp", + // ", slot=", fshow(slot), + // ", isAllCplt=", fshow(isAllCplt), + // ", data=", fshow(data) + // ); + + endrule + + Reg#(CpltFifoMetaEntry#(nChunk)) curOutputSlotMetaReg <- mkReg(CpltFifoMetaEntry{curChunkIdx: 0}); + Reg#(SlotNum#(nSlot)) curOutputSlotIdxReg <- mkReg(0); + rule handleFinalOutput; + let meta = pendingOutputPipelineQueue.first; + + let isFinished = meta.curChunkIdx == curOutputSlotMetaReg.curChunkIdx; + let isFirst = curOutputSlotMetaReg.curChunkIdx == 0; + + let nextBeatMeat = CpltFifoMetaEntry { curChunkIdx: curOutputSlotMetaReg.curChunkIdx + 1 }; + if (isFinished) begin + pendingOutputPipelineQueue.deq; + // mark next as First + nextBeatMeat = CpltFifoMetaEntry { curChunkIdx: 0 }; + curOutputSlotIdxReg <= curOutputSlotIdxReg + 1; + counter.down; + end + + let zeroBasedChunkCnt = meta.curChunkIdx; + tStorageAddr storageAddr = unpack({pack(curOutputSlotIdxReg), pack(curOutputSlotMetaReg.curChunkIdx)}); + let bramReqForChunk = BRAMRequest { + write : False, + responseOnWrite : False, + address : storageAddr, + datain : ? + }; + chunkStorage.portA.request.put(bramReqForChunk); + curOutputSlotMetaReg <= nextBeatMeat; + + // $display( + // "time=%0t", $time, "mkCompletionFifo handleFinalOutput", + // ", meta=", fshow(meta), + // ", curOutputSlotMetaReg=", fshow(curOutputSlotMetaReg) + // ); + endrule + + rule forwardFinalOutput; + let chunk <- chunkStorage.portA.response.get; + drainFifo.enq(chunk); + // $display( + // "time=%0t", $time, "mkCompletionFifo forwardFinalOutput", + // ", chunk=", fshow(chunk) + // ); + endrule + + + interface Get reserve; + method ActionValue#(SlotNum#(nSlot)) get(); + let slotId <- cpltFlagBuffer.reserve; + counter.up; + // $display( + // "time=%0t", $time, "mkCompletionFifo reserve", + // ", slotId=", fshow(slotId) + // ); + + return slotId; + endmethod + endinterface + + method Bool available(); + return (counter.value <= maxSlotIdx); + endmethod + + + interface append = convertFifoToFifoIn(appendFifo); + interface drain = convertFifoToFifoOut(drainFifo); + +endmodule \ No newline at end of file diff --git a/src/XilBdmaCpltBufferCf.bsv b/src/XilBdmaCpltBufferCf.bsv new file mode 100644 index 0000000..c6fa202 --- /dev/null +++ b/src/XilBdmaCpltBufferCf.bsv @@ -0,0 +1,95 @@ +import Vector :: *; +import GetPut :: *; + +typedef Bit#(TLog#(size)) CBufIndex#(numeric type size); + +interface CompletionBuf#(numeric type size, type dType); + method Bool notFull; + method ActionValue#(CBufIndex#(size)) reserve; + method Action complete(Tuple2#(CBufIndex#(size), dType) x); + method Bool notEmpty; + method dType first; + method Action deq; +endinterface + + +module mkCompletionBuf(CompletionBuf#(size, dType)) provisos(Bits#(dType, dSize)); + + Vector#(size, Reg#(dType)) dataArray <- replicateM(mkRegU); + Vector#(size, Array#(Reg#(Bool))) tagArray <- replicateM(mkCReg(2, False)); + // Vector#(size, ) + Reg#(Bool) full <- mkReg(False); + Reg#(Bool) empty <- mkReg(True); + Reg#(CBufIndex#(size)) enqP <- mkReg(0); + Reg#(CBufIndex#(size)) deqP <- mkReg(0); + CBufIndex#(size) maxIndex = fromInteger(valueOf(size) - 1); + + Reg#(Bool) deqReq[2] <- mkCReg(2, False); + Reg#(Bool) reserveReq[2] <- mkCReg(2, False); + Reg#(Maybe#(Tuple2#(CBufIndex#(size), dType))) completeReq[2] <- mkCReg(2, Invalid); + + function CBufIndex#(size) nextIndex(CBufIndex#(size) index); + return (index == maxIndex) ? 0 : index + 1; + endfunction + + (* fire_when_enabled *) // WILL_FIRE == CAN_FIRE + (* no_implicit_conditions *) // CAN_FIRE == guard (True) + rule canonicalize; + let nextEnqP = nextIndex(enqP); + let nextDeqP = nextIndex(deqP); + if (reserveReq[1] && deqReq[1]) begin + tagArray[enqP][0] <= False; + enqP <= nextEnqP; + deqP <= nextDeqP; + end + else if (reserveReq[1]) begin + tagArray[enqP][0] <= False; + enqP <= nextEnqP; + empty <= False; + full <= nextEnqP == deqP; + end + else if (deqReq[1]) begin + deqP <= nextDeqP; + full <= False; + empty <= nextDeqP == enqP; + end + deqReq[1] <= False; + reserveReq[1] <= False; + endrule + + (* fire_when_enabled *) // WILL_FIRE == CAN_FIRE + (* no_implicit_conditions *) // CAN_FIRE == guard (True) + rule doComplete; + if (isValid(completeReq[1])) begin + match {.index, .data} = fromMaybe(?, completeReq[1]); + tagArray[index][1] <= True; + dataArray[index] <= data; + end + completeReq[1] <= tagged Invalid; + endrule + + Bool reserveReady = !full; + Bool deqReady = !empty && tagArray[deqP][0]; + + method Bool notFull = reserveReady; + + method ActionValue#(CBufIndex#(size)) reserve if (reserveReady); + reserveReq[0] <= True; + return enqP; + endmethod + + method Action complete(Tuple2#(CBufIndex#(size), dType) completeInfo); + completeReq[0] <= tagged Valid completeInfo; + endmethod + + method Bool notEmpty = deqReady; + + method dType first if (deqReady); + return dataArray[deqP]; + endmethod + + method Action deq if (deqReady); + deqReq[0] <= True; + endmethod + +endmodule \ No newline at end of file diff --git a/src/XilBdmaDmaC2HPipe.bsv b/src/XilBdmaDmaC2HPipe.bsv new file mode 100644 index 0000000..8963312 --- /dev/null +++ b/src/XilBdmaDmaC2HPipe.bsv @@ -0,0 +1,641 @@ +import FIFOF::*; +import GetPut::*; +import Vector::*; +import Connectable::*; +import ClientServer::*; +import Probe :: *; + +import SemiFifo::*; +import XilBdmaPrimUtils::*; +import XilBdmaStreamUtils::*; +import XilBdmaPcieTypes::*; +import XilBdmaDmaTypes::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaPcieDescriptorTypes::*; +import XilBdmaDmaUtils::*; +import XilBdmaCompletionFifo::*; + + +// Wrapper between original dma pipe and blue-rdma style interface +// interface BdmaC2HPipe; +// // User Logic Ifc +// interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) writeSrv; +// interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) readSrv; + +// // Pcie Adapter Ifc +// interface FifoOut#(DataStream) tlpDataFifoOut; +// interface FifoOut#(RqSideBandSignal) tlpSideBandFifoOut; +// interface FifoIn#(StraddleStream) tlpDataFifoIn; +// // TODO: CSR Ifc +// interface Put#(TlpSizeCfg) tlpSizeCfg; +// // interface Client#(DmaCsrValue, DmaCsrValue) statusReg; +// endinterface + +// module mkBdmaC2HPipe#(DmaPathNo pathIdx)(BdmaC2HPipe); +// C2HReadCore readCore <- mkC2HReadCore(pathIdx); +// C2HWriteCore writeCore <- mkC2HWriteCore(pathIdx); + +// Reg#(Bool) isInitDoneReg <- mkReg(False); +// Reg#(Bool) isInWriteCoreOutputReg <- mkReg(False); + +// FIFOF#(BdmaUserC2hWrReq) wrReqInFifo <- mkFIFOF; +// FIFOF#(BdmaUserC2hWrResp) wrRespOutFifo <- mkFIFOF; +// FIFOF#(BdmaUserC2hRdReq) rdReqInFifo <- mkFIFOF; +// FIFOF#(BdmaUserC2hRdResp) rdRespOutFifo <- mkFIFOF; + +// FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; +// FIFOF#(RqSideBandSignal) tlpSideBandFifo <- mkFIFOF; + +// rule forwardWrReq if (isInitDoneReg); +// let req = wrReqInFifo.first; +// wrReqInFifo.deq; +// writeCore.dataFifoIn.enq(req.dataStream); +// writeCore.wrReqFifoIn.enq(DmaRequest { +// startAddr: req.addr, +// length : req.len, +// isWrite : True +// }); +// // $display($time, "ns SIM INFO @ mkBdmaC2HPipe%d: recv new request, startAddr:%d length:%d isWrite:%b", +// // pathIdx, req.addr, req.len, 1); +// endrule + +// rule forwardWrResp if (isInitDoneReg); +// let rv = writeCore.doneFifoOut.first; +// writeCore.doneFifoOut.deq; +// wrRespOutFifo.enq(BdmaUserC2hWrResp{ }); +// endrule + +// rule forwardRdReq if (isInitDoneReg); +// let req = rdReqInFifo.first; +// rdReqInFifo.deq; +// readCore.rdReqFifoIn.enq(DmaRequest { +// startAddr: req.addr, +// length : req.len, +// isWrite : False +// }); +// // $display($time, "ns SIM INFO @ mkBdmaC2HPipe%d: recv new request, startAddr:%d length:%d isWrite:%b", +// // pathIdx, req.addr, req.len, 0); +// endrule + +// rule forwardRdResp if (isInitDoneReg); +// let stream = readCore.dataFifoOut.first; +// readCore.dataFifoOut.deq; +// rdRespOutFifo.enq(BdmaUserC2hRdResp{ +// dataStream: stream +// }); +// endrule + +// rule muxTlpOut; +// if (isInWriteCoreOutputReg) begin +// let tlpStream = writeCore.tlpFifoOut.first; +// tlpOutFifo.enq(tlpStream); +// writeCore.tlpFifoOut.deq; +// isInWriteCoreOutputReg <= !tlpStream.isLast; +// end +// else begin +// if (readCore.tlpFifoOut.notEmpty) begin +// tlpOutFifo.enq(readCore.tlpFifoOut.first); +// tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); +// readCore.tlpFifoOut.deq; +// readCore.tlpSideBandFifoOut.deq; +// end +// else begin +// tlpOutFifo.enq(writeCore.tlpFifoOut.first); +// tlpSideBandFifo.enq(writeCore.tlpSideBandFifoOut.first); +// writeCore.tlpFifoOut.deq; +// writeCore.tlpSideBandFifoOut.deq; +// isInWriteCoreOutputReg <= !writeCore.tlpFifoOut.first.isLast; +// end +// end +// endrule + +// // User Ifc +// interface readSrv = toGPServer(rdReqInFifo, rdRespOutFifo); +// interface writeSrv = toGPServer(wrReqInFifo, wrRespOutFifo); + +// // Pcie Adapter Ifc +// interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); +// interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpSideBandFifo); +// interface tlpDataFifoIn = readCore.tlpFifoIn; +// // TODO: CSR Ifc +// interface Put tlpSizeCfg; +// method Action put(sizeCfg); +// writeCore.maxPayloadSize.put(tuple2(sizeCfg.mps, sizeCfg.mpsWidth)); +// readCore.maxReadReqSize.put(tuple2(sizeCfg.mrrs, sizeCfg.mrrsWidth)); +// isInitDoneReg <= True; +// endmethod +// endinterface + +// endmodule + +// TODO : change the PCIe Adapter Ifc to TlpData and TlpHeader, +// move the module which convert TlpHeader to IP descriptor from dma to adapter +interface DmaC2HPipe; + // User Logic Ifc + interface FifoIn#(DataStream) wrDataFifoIn; + interface FifoIn#(DmaRequest) reqFifoIn; + interface FifoOut#(DataStream) rdDataFifoOut; + interface FifoOut#(Bool) doneFifoOut; + // Pcie Adapter Ifc + interface FifoOut#(DataStream) tlpDataFifoOut; + interface FifoOut#(RqSideBandSignal) tlpSideBandFifoOut; + interface FifoIn#(StraddleStream) tlpDataFifoIn; + // TODO: CSR Ifc + interface Put#(TlpSizeCfg) tlpSizeCfg; + // interface Client#(DmaCsrValue, DmaCsrValue) statusReg; +endinterface + +// Single Path module +(* synthesize *) +module mkDmaC2HPipe#(DmaPathNo pathIdx)(DmaC2HPipe); + C2HReadCore readCore <- mkC2HReadCore(pathIdx); + C2HWriteCore writeCore <- mkC2HWriteCore(pathIdx); + + Reg#(Bool) isInitDoneReg <- mkReg(False); + Reg#(Bool) isInWriteCoreOutputReg <- mkReg(False); + + FIFOF#(DataStream) dataInFifo <- mkLFIFOF; + FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; + FIFOF#(RqSideBandSignal) tlpSideBandFifo <- mkFIFOF; + + mkConnection(dataInFifo, writeCore.dataFifoIn); + + // rule debug; + // if (!readCore.rdReqFifoIn.notFull) $display("mkDmaC2HPipe debug [%d] Queue Full readCore.rdReqFifoIn", pathIdx); + // if (!writeCore.wrReqFifoIn.notFull) $display("mkDmaC2HPipe debug [%d] Queue Full writeCore.wrReqFifoIn", pathIdx); + // if (!reqInFifo.notEmpty) $display("mkDmaC2HPipe debug [%d] Queue Empty reqInFifo", pathIdx); + // endrule + + rule reqDeMux if (isInitDoneReg); + let req = reqInFifo.first; + reqInFifo.deq; + if (req.isWrite) begin + writeCore.wrReqFifoIn.enq(req); + end + else begin + readCore.rdReqFifoIn.enq(req); + end + // $display($time, "ns SIM INFO @ mkDmaC2HPipe%d: recv new request, startAddr:%d length:%d isWrite:%b", + // pathIdx, req.startAddr, req.length, pack(req.isWrite)); + endrule + + + rule muxTlpOut; + if (isInWriteCoreOutputReg) begin + let tlpStream = writeCore.tlpFifoOut.first; + tlpOutFifo.enq(tlpStream); + writeCore.tlpFifoOut.deq; + isInWriteCoreOutputReg <= !tlpStream.isLast; + end + else begin + if (readCore.tlpFifoOut.notEmpty) begin + tlpOutFifo.enq(readCore.tlpFifoOut.first); + tlpSideBandFifo.enq(readCore.tlpSideBandFifoOut.first); + readCore.tlpSideBandFifoOut.deq; + readCore.tlpFifoOut.deq; + end + else begin + tlpOutFifo.enq(writeCore.tlpFifoOut.first); + tlpSideBandFifo.enq(writeCore.tlpSideBandFifoOut.first); + writeCore.tlpFifoOut.deq; + writeCore.tlpSideBandFifoOut.deq; + isInWriteCoreOutputReg <= !writeCore.tlpFifoOut.first.isLast; + end + end + endrule + + // User Logic Ifc + interface wrDataFifoIn = convertFifoToFifoIn(dataInFifo); + interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + interface rdDataFifoOut = readCore.dataFifoOut; + interface doneFifoOut = writeCore.doneFifoOut; + // Pcie Adapter Ifc + interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); + interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpSideBandFifo); + interface tlpDataFifoIn = readCore.tlpFifoIn; + // TODO: CSR Ifc + interface Put tlpSizeCfg; + method Action put(sizeCfg); + writeCore.maxPayloadSize.put(tuple2(sizeCfg.mps, sizeCfg.mpsWidth)); + readCore.maxReadReqSize.put(tuple2(sizeCfg.mrrs, sizeCfg.mrrsWidth)); + isInitDoneReg <= True; + endmethod + endinterface +endmodule + +interface C2HReadCore; + // User Logic Ifc + interface FifoOut#(DataStream) dataFifoOut; + interface FifoIn#(DmaRequest) rdReqFifoIn; + // PCIe IP Ifc, connect to Requester Adapter + interface FifoIn#(StraddleStream) tlpFifoIn; + interface FifoOut#(DataStream) tlpFifoOut; + interface FifoOut#(RqSideBandSignal) tlpSideBandFifoOut; + + interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxReadReqSize; +endinterface + +// Total Latency(Tlp Output): 1 + 2 + 1 + 1 = 5 +// Total Latency(Tlp Input) : 1\2 + 2 + n + 2 + 1 = 5/6 + n (depends on the order) +module mkC2HReadCore#(DmaPathNo pathIdx)(C2HReadCore); + FIFOF#(StraddleStream) tlpInFifo <- mkLFIFOF; + FIFOF#(DmaRequest) reqInFifo <- mkFIFOF; + FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; + FIFOF#(RqSideBandSignal) tlpByteEnFifo <- mkFIFOF; + + FIFOF#(SlotToken) tagFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + FIFOF#(Bool) completedFifo <- mkSizedFIFOF(valueOf(TAdd#(1, STREAM_HEADER_REMOVE_LATENCY))); + FIFOF#(DmaReadReqCnt) inflightFifo <- mkSizedFIFOF(valueOf(SLOT_PER_PATH)); + + + // StreamPipe descRemove <- mkStreamHeaderRemove(fromInteger(valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); + StreamPipe dwRemove <- mkStreamRemoveDescAndDW(fromInteger(valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); + StreamPipe reshapeStrad <- mkStreamReshape(pathIdx == 0); + StreamPipe reshapeRcb <- mkStreamReshape(False); + StreamPipe reshapeMrrs <- mkStreamReshape(False); + ChunkCompute chunkSplitor <- mkChunkComputer(DMA_RX); + CompletionFifo#(SLOT_PER_PATH, MAX_STREAM_NUM_PER_COMPLETION, DataStream) cBuffer <- mkCompletionFifo; + RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(False); + + Reg#(Bool) hasReadOnceReg <- mkReg(False); + Reg#(Bool) isStreamValidReg <- mkReg(True); + Reg#(DmaReadReqCnt) rcvReqCntReg <- mkReg(1); + Vector#(SLOT_PER_PATH, Reg#(Bool)) chunkFlagRegs <- replicateM(mkReg(False)); + + // mkConnection(reshapeStrad.streamFifoOut, descRemove.streamFifoIn); + // mkConnection(descRemove.streamFifoOut, dwRemove.streamFifoIn); + mkConnection(reshapeStrad.streamFifoOut, dwRemove.streamFifoIn); + + + mkConnection(chunkSplitor.reqCntFifoOut, inflightFifo); + Reg#(Bit#(8)) rcbBlockCntDebugReg <- mkReg(0); + Probe#(Bit#(8)) rcbBlockCntDebugRegProbe <- mkProbe; + + // rule debug if (pathIdx == 0); + // if (!reshapeStrad.streamFifoIn.notFull) begin + // $display("time=%0t, FULL QUEUE mkC2HReadCore reshapeStrad.streamFifoIn", $time); + // end + // if (!tagFifo.notFull) begin + // $display("time=%0t, FULL QUEUE mkC2HReadCore tagFifo", $time); + // end + // if (!completedFifo.notFull) begin + // $display("time=%0t, FULL QUEUE mkC2HReadCore completedFifo", $time); + // end + + // if (!tlpInFifo.notFull) begin + // $display("time=%0t, FULL QUEUE mkC2HReadCore tlpInFifo", $time); + // end + + // if (!descRemove.streamFifoIn.notFull) begin + // $display("time=%0t, FULL QUEUE mkC2HReadCore descRemove.streamFifoIn", $time); + // end + + // if (!dwRemove.streamFifoIn.notFull) begin + // $display("time=%0t, FULL QUEUE mkC2HReadCore dwRemove.streamFifoIn", $time); + // end + + // if (!tlpInFifo.notEmpty) begin + // $display("time=%0t, EMPTY QUEUE mkC2HReadCore tlpInFifo", $time); + // end + + + + // endrule + Probe#(ErrorCode) errorCodeProbe <- mkProbe; + // Pipeline stage 1: convert StraddleStream to DataStream, may cost 2 cycle for one StraddleStream + rule convertStraddleToDataStream; + let sdStream = tlpInFifo.first; + let stream = getEmptyStream; + SlotToken tag = 0; + Bool isCompleted = False; + + if (sdStream.isDoubleFrame) begin + PcieTlpCtlIsSopPtr isSopPtr = 0; + if (hasReadOnceReg) begin + tlpInFifo.deq; + hasReadOnceReg <= False; + isSopPtr = 1; + end + else begin + hasReadOnceReg <= True; + end + stream = DataStream { + data : getStraddleData(isSopPtr, sdStream.data), + byteEn : getStraddleByteEn(isSopPtr, sdStream.byteEn), + isFirst : sdStream.isFirst[isSopPtr], + isLast : sdStream.isLast[isSopPtr] + }; + tag = sdStream.tag[isSopPtr]; + isCompleted = sdStream.isCompleted[isSopPtr]; + end + else begin + tlpInFifo.deq; + hasReadOnceReg <= False; + stream = DataStream { + data : sdStream.data, + byteEn : sdStream.byteEn, + isFirst : sdStream.isFirst[0], + isLast : sdStream.isLast[0] + }; + tag = sdStream.tag[0]; + isCompleted = sdStream.isCompleted[0]; + end + + stream = maskDataStreamWithByteEn(stream); + + Bool isStreamValid = isStreamValidReg; + if (stream.isFirst) begin + PcieRequesterCompleteDescriptor desc = unpack(truncate(stream.data)); + isStreamValid = (desc.errorcode == 0); + errorCodeProbe <= desc.errorcode; + end + if (isStreamValid) begin + reshapeStrad.streamFifoIn.enq(stream); + // $display("time=%0t, parse from straddle, tag: %d, cmpl status: %d", $time, tag, pack(isCompleted), fshow(stream)); + if (stream.isFirst) begin + tagFifo.enq(tag); + completedFifo.enq(isCompleted); + end + end + else begin + // $display("time=%0t, parse from straddle not valid, tag: %d, cmpl status: %d", $time, tag, pack(isCompleted), ", stream=", fshow(stream), ", sdStream=", fshow(sdStream)); + end + isStreamValidReg <= isStreamValid; + + endrule + + // Pipeline stage 2: remove the descriptor in the head of each TLP + + // rule debug; + // // if (!chunkSplitor.dmaRequestFifoIn.notFull) $display("mkC2HReadCore debug [%d] Queue Full chunkSplitor.dmaRequestFifoIn", pathIdx); + // // if (!reqInFifo.notEmpty) $display("mkC2HReadCore debug [%d] Queue Empty reqInFifo", pathIdx); + // // if (!chunkSplitor.chunkRequestFifoOut.notEmpty) $display("mkC2HReadCore debug [%d] Queue Empty chunkSplitor.chunkRequestFifoOut", pathIdx); + // // if (!rqDescGenerator.exReqFifoIn.notFull) $display("mkC2HReadCore debug [%d] Queue Full rqDescGenerator.exReqFifoIn", pathIdx); + + // // if (!tlpOutFifo.notFull) $display("mkC2HReadCore debug [%d] Queue Full tlpOutFifo", pathIdx); + // // if (!tlpByteEnFifo.notFull) $display("mkC2HReadCore debug [%d] Queue Full tlpByteEnFifo", pathIdx); + // // if (!cBuffer.available) $display("mkC2HReadCore debug [%d] cBuffer not Available", pathIdx); + + + // if (!cBuffer.append.notFull) $display("mkC2HReadCore debug [%d] Queue Full cBuffer.append", pathIdx); + // if (!dwRemove.streamFifoOut.notEmpty) $display("mkC2HReadCore debug [%d] Queue Empty dwRemove.streamFifoOut", pathIdx); + // if (!completedFifo.notEmpty) $display("mkC2HReadCore debug [%d] Queue Empty completedFifo", pathIdx); + // if (!tagFifo.notEmpty) $display("mkC2HReadCore debug [%d] Queue Empty tagFifo", pathIdx); + // endrule + + // Pipeline stage 3: Buffer the received DataStreams and reorder them + rule reorderStream; + let stream = dwRemove.streamFifoOut.first; + let byteInStream = convertByteEn2BytePtr(stream.byteEn); + let isCompleted = completedFifo.first; + let tag = tagFifo.first; + let rcvdFlag = True; + dwRemove.streamFifoOut.deq; + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: from dwRemove to cBuf, tag: %d, cmpl: %d", pathIdx, tag, pack(isCompleted), fshow(stream)); + if (stream.isLast) begin + completedFifo.deq; + tagFifo.deq; + rcbBlockCntDebugReg <= 0; + end + else begin + rcbBlockCntDebugReg <= rcbBlockCntDebugReg + 1; + end + rcbBlockCntDebugRegProbe <= rcbBlockCntDebugReg; + stream.isLast = isCompleted && stream.isLast; //Re-define the stream boundary + stream.isFirst = stream.isFirst && (!chunkFlagRegs[tag]); + cBuffer.append.enq(tuple3(unpack(truncate(pack(tag))), stream, stream.isLast)); + if (stream.isLast) begin + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: a chunk is completed in cBuffer, tag: %d", pathIdx, tag); + rcvdFlag = False; + end + chunkFlagRegs[tag] <= rcvdFlag; + endrule + + // Pipeline stage 4: there may be a bubble between the first and last DataStream of cBUffer drain output + // Reshape the DataStream from RCB chunks to MRRS chunks + rule reshapeRCB; + let stream = cBuffer.drain.first; + cBuffer.drain.deq; + reshapeRcb.streamFifoIn.enq(stream); + // $display("time=%0t, cbuf output", $time, fshow(stream)); + endrule + + // Pipeline stage 4: there may be bubbles in the first and last DataStream of a request because of MRRS split + // Reshape the DataStream from MRRS chunks to a whole DataStream + rule reshapeMRRS; + let stream = reshapeRcb.streamFifoOut.first; + reshapeRcb.streamFifoOut.deq; + if (stream.isLast) begin + let rcvReqCnt = rcvReqCntReg; + // $display("DEBUG: get isLast from reshapeRcb, fifo.first:%d, rcvReqCntReg: %d", inflightFifo.first, rcvReqCntReg); + if (inflightFifo.first == rcvReqCnt) begin + rcvReqCnt = 1; + inflightFifo.deq; + end + else begin + rcvReqCnt = rcvReqCnt + 1; + stream.isLast = False; + end + rcvReqCntReg <= rcvReqCnt; + end + stream.isFirst = stream.isFirst && (rcvReqCntReg == 1); + reshapeMrrs.streamFifoIn.enq(stream); + // $display( + // "time=%0t", $time, ", mkC2HReadCore reshapeMRRS", + // ", pathIdx=", fshow(pathIdx), + // ", stream=", fshow(stream) + // ); + endrule + + // Pipeline stage 1: split to req to MRRS chunks + rule reqSplit; + let req = reqInFifo.first; + reqInFifo.deq; + let exReq = DmaExtendRequest { + startAddr : req.startAddr, + endAddr : req.startAddr + zeroExtend(req.length - 1), + length : req.length, + tag : 0, + attr : req.attr + }; + chunkSplitor.dmaRequestFifoIn.enq(exReq); + + if (req.length > fromInteger(valueOf(BUS_BOUNDARY))) begin + $display("length too large. the max value is limited by READ_REQ_CNT_WIDTH, if the read cplt packet cnt exceed this value, undefined behaviour will occur."); + $finish(1); + end + endrule + + // Pipeline stage 2: generate read descriptor + rule cqDescGen; + let req = chunkSplitor.chunkRequestFifoOut.first; + chunkSplitor.chunkRequestFifoOut.deq; + let token <- cBuffer.reserve.get; + let exReq = DmaExtendRequest { + startAddr: req.startAddr, + endAddr : req.startAddr + zeroExtend(req.length - 1), + length : req.length, + tag : convertSlotTokenToTag(zeroExtend(token), pathIdx), + attr : req.attr + }; + rqDescGenerator.exReqFifoIn.enq(exReq); + $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: tx a new read chunk, tag:%d, addr:%d, length:%d", pathIdx, exReq.tag, req.startAddr, req.length); + endrule + + // Pipeline stage 3: generate Tlp to PCIe Adapter + rule tlpGen; + let stream = rqDescGenerator.descFifoOut.first; + let rqSideBandSignal = rqDescGenerator.byteEnFifoOut.first; + rqDescGenerator.descFifoOut.deq; + rqDescGenerator.byteEnFifoOut.deq; + stream.isFirst = True; + stream.isLast = True; + tlpOutFifo.enq(stream); + tlpByteEnFifo.enq(rqSideBandSignal); + // $display($time, "ns SIM INFO @ mkDmaC2HReadCore%d: output new tlp, BE:%h/%h", pathIdx, tpl_1(rqSideBandSignal), tpl_2(rqSideBandSignal)); + endrule + + + + // User Logic Ifc + interface rdReqFifoIn = convertFifoToFifoIn(reqInFifo); + interface dataFifoOut = reshapeMrrs.streamFifoOut; + // PCIe IP Ifc + interface tlpFifoIn = convertFifoToFifoIn(tlpInFifo); + interface tlpFifoOut = convertFifoToFifoOut(tlpOutFifo); + interface tlpSideBandFifoOut = convertFifoToFifoOut(tlpByteEnFifo); + // Cfg Ifc + interface Put maxReadReqSize; + method Action put(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mrrsCfg); + chunkSplitor.maxReadReqSize.put(mrrsCfg); + endmethod + endinterface +endmodule + +// Core path of a single stream, from (DataStream, DmaRequest) ==> (DataStream, RqSideBandSignal) +// split to chunks, align to DWord and add descriptor at the first +interface C2HWriteCore; + // User Logic Ifc + interface FifoIn#(DataStream) dataFifoIn; + interface FifoIn#(DmaRequest) wrReqFifoIn; + interface FifoOut#(Bool) doneFifoOut; + // PCIe IP Ifc + interface FifoOut#(DataStream) tlpFifoOut; + interface FifoOut#(RqSideBandSignal) tlpSideBandFifoOut; + + interface Put#(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth)) maxPayloadSize; +endinterface + +// Total Latency: 1 + 3 + 2 + 1 = 7 +module mkC2HWriteCore#(DmaPathNo pathIdx)(C2HWriteCore); + FIFOF#(DataStream) dataInFifo <- mkLFIFOF; + FIFOF#(DmaRequest) wrReqInFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkFIFOF; + FIFOF#(RqSideBandSignal) byteEnOutFifo <- mkFIFOF; + + Reg#(SlotToken) tagReg <- mkReg(0); + + ChunkSplit chunkSplit <- mkChunkSplit(DMA_TX); + StreamShiftAlignToDw streamAlign <- mkStreamShiftAlignToDw(fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH)))); + RqDescriptorGenerator rqDescGenerator <- mkRqDescriptorGenerator(True); + + // rule debug; + // if (!wrReqInFifo.notEmpty) $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: emptyQueue wrReqInFifo", pathIdx); + // if (!dataInFifo.notEmpty) $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: emptyQueue dataInFifo", pathIdx); + // if (!chunkSplit.reqFifoIn.notFull) $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: fullQueue chunkSplit.reqFifoIn", pathIdx); + // if (!chunkSplit.dataFifoIn.notFull) $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: fullQueue chunkSplit.dataFifoIn", pathIdx); + + // endrule + + // Pipeline stage 1: split the whole write request to chunks, latency = 3 + rule splitToChunks; + let wrStream = dataInFifo.first; + // $display($time, "ns SIM INFO @ mkC2HWriteCore: ", fshow(wrStream), fshow(wrReqInFifo.notEmpty), fshow(chunkSplit.dataFifoIn.notFull), fshow(chunkSplit.reqFifoIn.notFull)); + if (wrStream.isFirst && wrReqInFifo.notEmpty) begin + wrReqInFifo.deq; + let wrReq = wrReqInFifo.first; + let exReq = DmaExtendRequest { + startAddr : wrReq.startAddr, + endAddr : wrReq.startAddr + zeroExtend(wrReq.length - 1), + length : wrReq.length, + tag : 0, + attr : wrReq.attr + }; + chunkSplit.reqFifoIn.enq(exReq); + dataInFifo.deq; + chunkSplit.dataFifoIn.enq(wrStream); + if (wrReq.length > fromInteger(valueOf(BUS_BOUNDARY))) begin + $display("length too large. the max value is limited by READ_REQ_CNT_WIDTH."); + $finish(1); + end + end + else if (!wrStream.isFirst) begin + dataInFifo.deq; + chunkSplit.dataFifoIn.enq(wrStream); + end + endrule + + // Pipeline stage 2: shift the datastream for descriptor adding and dw alignment + rule shiftToAlignment; + if (chunkSplit.chunkReqFifoOut.notEmpty) begin + let chunkReq = chunkSplit.chunkReqFifoOut.first; + chunkSplit.chunkReqFifoOut.deq; + let exReq = DmaExtendRequest { + startAddr: chunkReq.startAddr, + endAddr : chunkReq.startAddr + zeroExtend(chunkReq.length - 1), + length : chunkReq.length, + tag : convertSlotTokenToTag(tagReg, pathIdx), + attr : chunkReq.attr + }; + tagReg <= tagReg + 1; + let startAddrOffset = byteModDWord(exReq.startAddr); + streamAlign.setAlignMode(unpack(startAddrOffset)); + rqDescGenerator.exReqFifoIn.enq(exReq); + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx a new write chunk, tag:%d, addr:%d, length:%d", pathIdx, convertSlotTokenToTag(tagReg, pathIdx), chunkReq.startAddr, chunkReq.length); + end + if (chunkSplit.chunkDataFifoOut.notEmpty) begin + let chunkDataStream = chunkSplit.chunkDataFifoOut.first; + chunkSplit.chunkDataFifoOut.deq; + streamAlign.dataFifoIn.enq(chunkDataStream); + // if (chunkDataStream.isLast && chunkDataStream.isFirst) begin + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg, pathIdx)); + // end + // else if (chunkDataStream.isLast) begin + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx write chunk end , tag:%d", pathIdx, convertSlotTokenToTag(tagReg-1, pathIdx)); + // end + end + endrule + + // Pipeline stage 3: Add descriptor and add to the axis convert module + rule addDescriptorToAxis; + let stream = streamAlign.dataFifoOut.first; + streamAlign.dataFifoOut.deq; + if (stream.isFirst) begin + let descStream = rqDescGenerator.descFifoOut.first; + let rqSideBandSignal = rqDescGenerator.byteEnFifoOut.first; + rqDescGenerator.descFifoOut.deq; + rqDescGenerator.byteEnFifoOut.deq; + stream.data = stream.data | descStream.data; + stream.byteEn = stream.byteEn | descStream.byteEn; + byteEnOutFifo.enq(rqSideBandSignal); + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tx a new tlp, BE:%b/%b", pathIdx, tpl_1(rqSideBandSignal), tpl_2(rqSideBandSignal)); + end + dataOutFifo.enq(stream); + // $display($time, "ns SIM INFO @ mkDmaC2HWriteCore%d: tlp stream", pathIdx, fshow(stream)); + endrule + + // User Logic Ifc + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface wrReqFifoIn = convertFifoToFifoIn(wrReqInFifo); + interface doneFifoOut = chunkSplit.doneFifoOut; + // PCIe Adapter Ifc + interface tlpFifoOut = convertFifoToFifoOut(dataOutFifo); + interface tlpSideBandFifoOut = convertFifoToFifoOut(byteEnOutFifo); + // Cfg Ifc + interface Put maxPayloadSize; + method Action put(Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mpsCfg); + chunkSplit.maxPayloadSize.put(mpsCfg); + endmethod + endinterface +endmodule diff --git a/src/XilBdmaDmaH2CPipe.bsv b/src/XilBdmaDmaH2CPipe.bsv new file mode 100644 index 0000000..01ca169 --- /dev/null +++ b/src/XilBdmaDmaH2CPipe.bsv @@ -0,0 +1,261 @@ +import FIFOF::*; +import Vector::*; +import RegFile::*; +import ClientServer::*; +import Connectable :: *; + +import SemiFifo::*; +import XilBdmaPrimUtils::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaPcieTypes::*; +import XilBdmaPcieDescriptorTypes::*; +import XilBdmaPcieAdapter::*; +import XilBdmaDmaTypes::*; +import XilBdmaSimpleModeUtils::*; +import Probe :: *; + +typedef 1 IDEA_CQ_CSR_DWORD_CNT; +typedef 1 IDEA_CC_CSR_DWORD_CNT; +typedef 4 IDEA_CC_CSR_BYTE_CNT; +typedef 4 IDEA_FIRST_BE_HIGH_VALID_PTR_OF_CSR; + +// // Wrapper between original dma pipe and blue-rdma style interface +// interface BdmaH2CPipe#(numeric type sz_csr_addr, numeric type sz_csr_data); +// // User Ifc +// interface Client#(BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data), BdmaUserH2cWrResp) writeClt; +// interface Client#(BdmaUserH2cRdReq#(sz_csr_addr), BdmaUserH2cRdResp#(sz_csr_data)) readClt; + +// // Pcie Adapter Ifc +// interface FifoIn#(DataStream) tlpDataFifoIn; +// interface FifoOut#(DataStream) tlpDataFifoOut; +// endinterface + +// module mkBdmaH2CPipe(BdmaH2CPipe#(sz_csr_addr, sz_csr_data)) +// provisos( +// Add#(_a, sz_csr_addr, DMA_CSR_ADDR_WIDTH), +// Add#(_b, sz_csr_data, DMA_CSR_DATA_WIDTH) +// ); +// DmaH2CPipe pipe <- mkDmaH2CPipe; +// FIFOF#(BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data)) wrReqQ <- mkFIFOF; +// FIFOF#(BdmaUserH2cWrResp) wrRespQ <- mkFIFOF; +// FIFOF#(BdmaUserH2cRdReq#(sz_csr_addr)) rdReqQ <- mkFIFOF; +// FIFOF#(BdmaUserH2cRdResp#(sz_csr_data)) rdRespQ <- mkFIFOF; +// let dummyCsr <- mkDummyCsr; + +// mkConnection(pipe.csrReqFifoOut, dummyCsr.reqFifoIn); +// mkConnection(dummyCsr.respFifoOut, pipe.csrRespFifoIn); + +// rule forwardReq; +// let h2cReq = pipe.userReqFifoOut.first; +// pipe.userReqFifoOut.deq; +// if (h2cReq.isWrite) begin +// BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data) wrReq = BdmaUserH2cWrReq { +// addr: truncate(h2cReq.addr), +// data: truncate(h2cReq.value) +// }; +// wrReqQ.enq(wrReq); +// end +// else begin +// BdmaUserH2cRdReq#(sz_csr_addr) rdReq = BdmaUserH2cRdReq { +// addr: truncate(h2cReq.addr) +// }; +// rdReqQ.enq(rdReq); +// end +// endrule + +// rule handleWrResp; +// wrRespQ.deq; +// endrule + +// rule handleRdResp; +// let value = rdRespQ.first.data; +// rdRespQ.deq; +// pipe.userRespFifoIn.enq(CsrResponse{ +// addr : 0, +// value: zeroExtend(value) +// }); +// endrule + +// interface writeClt = toGPClient(wrReqQ, wrRespQ); +// interface readClt = toGPClient(rdReqQ, rdRespQ); +// interface tlpDataFifoIn = pipe.tlpDataFifoIn; +// interface tlpDataFifoOut = pipe.tlpDataFifoOut; +// endmodule + + +function CsrResponse getEmptyCsrResponse(); + return CsrResponse { + addr : 0, + value : 0 + }; +endfunction + +interface DmaH2CPipe; + // DMA Internal Csr + interface FifoOut#(CsrRequest) csrReqFifoOut; + interface FifoIn#(CsrResponse) csrRespFifoIn; + // User Ifc + interface FifoOut#(CsrRequest) userReqFifoOut; + interface FifoIn#(CsrResponse) userRespFifoIn; + // Pcie Adapter Ifc + interface FifoIn#(DataStream) tlpDataFifoIn; + interface FifoOut#(DataStream) tlpDataFifoOut; + // TODO: Cfg Ifc +endinterface + +(* synthesize *) +module mkDmaH2CPipe(DmaH2CPipe); + + FIFOF#(DataStream) tlpInFifo <- mkLFIFOF; + FIFOF#(DataStream) tlpOutFifo <- mkFIFOF; + + FIFOF#(CsrRequest) reqOutFifo <- mkFIFOF; + FIFOF#(CsrResponse) respInFifo <- mkFIFOF; + + FIFOF#(CsrRequest) userOutFifo <- mkFIFOF; + FIFOF#(CsrResponse) userInFifo <- mkFIFOF; + + FIFOF#(Tuple2#(CsrRequest, PcieCompleterRequestDescriptor)) pendingFifo <- mkSizedFIFOF(valueOf(CMPL_NPREQ_INFLIGHT_NUM)); + + function PcieCompleterRequestDescriptor getDescriptorFromFirstBeat(DataStream stream); + return unpack(truncate(stream.data)); + endfunction + + function Data getDataFromFirstBeat(DataStream stream); + return stream.data >> valueOf(DES_CQ_DESCRIPTOR_WIDTH); + endfunction + + Reg#(Bool) isInPacket <- mkReg(False); + Reg#(UInt#(32)) illegalPcieReqCntReg <- mkReg(0); + + DataBytePtr csrCmplBytes = fromInteger(valueOf(TDiv#(TAdd#(DES_CC_DESCRIPTOR_WIDTH ,DMA_CSR_DATA_WIDTH), BYTE_WIDTH))); + + // The return address of this function is aligned to BYTE + function DmaCsrAddr getBarAddrFromCqDescriptor(PcieCompleterRequestDescriptor descriptor); + // Only care about low bits, because the offset is pre-assigned and not important. + let addr = getAddrLowBits(zeroExtend(descriptor.address) << valueOf(TLog#(DWORD_BYTES)), descriptor.barAperture); + return truncate(addr); + endfunction + + rule parseTlp; + tlpInFifo.deq; + let stream = tlpInFifo.first; + isInPacket <= !stream.isLast; + if (!isInPacket) begin + let descriptor = getDescriptorFromFirstBeat(stream); + if (descriptor.dwordCnt == fromInteger(valueOf(IDEA_CQ_CSR_DWORD_CNT))) begin + $display($time, "ns SIM INFO @ mkDmaH2CPipe: recv CQ, address: %h\n", descriptor.address, fshow(descriptor)); + case (descriptor.reqType) + fromInteger(valueOf(MEM_WRITE_REQ)): begin + let firstData = getDataFromFirstBeat(stream); + DmaCsrValue wrValue = truncate(firstData); + let wrAddr = getBarAddrFromCqDescriptor(descriptor); + let req = CsrRequest { + addr : wrAddr, + value : wrValue, + isWrite : True + }; + if (descriptor.barId == 0) begin + req.addr = req.addr >> valueOf(TLog#(DWORD_BYTES)); + reqOutFifo.enq(req); + end + else if (descriptor.barId == 1) begin + userOutFifo.enq(req); + end + end + fromInteger(valueOf(MEM_READ_REQ)): begin + let rdAddr = getBarAddrFromCqDescriptor(descriptor); + let req = CsrRequest{ + addr : rdAddr, + value : 0, + isWrite : False + }; + if (descriptor.barId == 0) begin + req.addr = req.addr >> valueOf(TLog#(DWORD_BYTES)); + reqOutFifo.enq(req); + end + else if (descriptor.barId == 1) begin + userOutFifo.enq(req); + // $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid User Bar rdReq, addr %h", getBarAddrFromCqDescriptor(descriptor)); + end + pendingFifo.enq(tuple2(req, descriptor)); + end + default: illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + endcase + end + else begin + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Invalid req with Addr %d, dwCnt %d", getBarAddrFromCqDescriptor(descriptor), descriptor.dwordCnt); + $finish(1); + illegalPcieReqCntReg <= illegalPcieReqCntReg + 1; + end + end + endrule + + rule genTlp; + CsrResponse resp = getEmptyCsrResponse; + if (respInFifo.notEmpty) begin + resp = respInFifo.first; + resp.addr = resp.addr << valueOf(TLog#(DWORD_BYTES)); + respInFifo.deq; + end + else begin + resp = userInFifo.first; + userInFifo.deq; + end + let addr = resp.addr; + let value = resp.value; + let {req, cqDescriptor} = pendingFifo.first; + + if (addr == req.addr || addr == 0) begin + $display($time, "ns SIM INFO @ mkDmaH2CPipe: Valid rdResp with Addr %d, value %d", req.addr, value); + end + else begin + $display($time, "ns SIM ERROR @ mkDmaH2CPipe: InValid rdResp with Addr %d, value %d and Expect Addr %d", addr, value, req.addr); + $finish(1); + end + + pendingFifo.deq; + let ccDescriptor = PcieCompleterCompleteDescriptor { + reserve0 : 0, + attributes : cqDescriptor.attributes, + trafficClass : cqDescriptor.trafficClass, + completerIdEn : False, + completerId : 0, + tag : cqDescriptor.tag, + requesterId : cqDescriptor.requesterId, + reserve1 : 0, + isPoisoned : False, + status : fromInteger(valueOf(DES_CC_STAUS_SUCCESS)), + dwordCnt : fromInteger(valueOf(IDEA_CC_CSR_DWORD_CNT)), + reserve2 : 0, + isLockedReadCmpl: False, + byteCnt : fromInteger(valueOf(IDEA_CC_CSR_BYTE_CNT)), + reserve3 : 0, + addrType : cqDescriptor.addrType, + reserve4 : 0, + lowerAddr : truncate(req.addr) + }; + Data data = zeroExtend(pack(ccDescriptor)); + data = data | (zeroExtend(value) << valueOf(DES_CC_DESCRIPTOR_WIDTH)); + let stream = DataStream { + data : data, + byteEn : convertBytePtr2ByteEn(csrCmplBytes), + isFirst : True, + isLast : True + }; + tlpOutFifo.enq(stream); + endrule + + // DMA Csr Ifc + interface csrReqFifoOut = convertFifoToFifoOut(reqOutFifo); + interface csrRespFifoIn = convertFifoToFifoIn(respInFifo); + // User Ifc + interface userReqFifoOut = convertFifoToFifoOut(userOutFifo); + interface userRespFifoIn = convertFifoToFifoIn(userInFifo); + // Pcie Adapter Ifc + interface tlpDataFifoIn = convertFifoToFifoIn(tlpInFifo); + interface tlpDataFifoOut = convertFifoToFifoOut(tlpOutFifo); +endmodule + + + diff --git a/src/XilBdmaDmaTypes.bsv b/src/XilBdmaDmaTypes.bsv new file mode 100755 index 0000000..56c97e0 --- /dev/null +++ b/src/XilBdmaDmaTypes.bsv @@ -0,0 +1,313 @@ +import Vector::*; +import FShow::*; +import SemiFifo::*; +import XilBdmaPcieTypes::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaPcieDescriptorTypes::*; + +typedef PCIE_AXIS_DATA_WIDTH DATA_WIDTH; + +typedef 64 DMA_MEM_ADDR_WIDTH; +typedef 32 DMA_REQ_LEN_WIDTH; + +typedef 32 DMA_CSR_ADDR_WIDTH; +typedef 32 DMA_CSR_DATA_WIDTH; + +typedef Bit#(DMA_MEM_ADDR_WIDTH) DmaMemAddr; +typedef Bit#(DMA_REQ_LEN_WIDTH) DmaReqLen; +typedef Bit#(DMA_CSR_ADDR_WIDTH) DmaCsrAddr; +typedef Bit#(DMA_CSR_DATA_WIDTH) DmaCsrValue; + +typedef TLog#(BYTE_WIDTH) BYTE_WIDTH_WIDTH; +typedef 2 BYTE_DWORD_SHIFT_WIDTH; + +typedef Bit#(BYTE_WIDTH) Byte; +typedef Bit#(DWORD_WIDTH) DWord; +typedef Bit#(1) ByteParity; + +typedef 4096 BUS_BOUNDARY; +typedef TAdd#(1, TLog#(BUS_BOUNDARY)) BUS_BOUNDARY_WIDTH; + +typedef 128 DEFAULT_MPS; +typedef TLog#(DEFAULT_MPS) DEFAULT_MPS_WIDTH; +typedef 128 DEFAULT_MRRS; +typedef TLog#(DEFAULT_MRRS) DEFAULT_MRRS_WIDTH; + + +typedef 128 DEFAULT_TLP_SIZE; +typedef TLog#(DEFAULT_TLP_SIZE) DEFAULT_TLP_SIZE_WIDTH; +// Only support max to 512bytes TLP for resouce saving +typedef 4096 MAX_TLP_SIZE; +typedef TLog#(MAX_TLP_SIZE) MAX_TLP_SIZE_WIDTH; +typedef Bit#(BUS_BOUNDARY_WIDTH) TlpPayloadSize; +typedef Bit#(TLog#(BUS_BOUNDARY_WIDTH)) TlpPayloadSizeWidth; + +typedef struct { + TlpPayloadSize mps; // 13 + TlpPayloadSizeWidth mpsWidth; // 4 + TlpPayloadSize mrrs; // 13 + TlpPayloadSizeWidth mrrsWidth; // 4 +} TlpSizeCfg deriving(Bits, Eq, Bounded, FShow); + +typedef 2 CONCAT_STREAM_NUM; + +typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) BYTE_EN_WIDTH; +typedef TDiv#(DATA_WIDTH, DWORD_WIDTH) DWORD_EN_WIDTH; + +typedef Bit#(DATA_WIDTH) Data; +typedef Bit#(BYTE_EN_WIDTH) ByteEn; +typedef Bit#(DWORD_BYTES) DWordByteEn; + +typedef Bit#(TAdd#(1, TLog#(DATA_WIDTH))) DataBitPtr; +typedef Bit#(TAdd#(1, TLog#(BYTE_EN_WIDTH))) DataBytePtr; +typedef Bit#(TAdd#(1, TLog#(DWORD_EN_WIDTH))) DataDwordPtr; + +typedef Bit#(TAdd#(1, TLog#(DWORD_BYTES))) DWordBytePtr; +typedef Bit#(BYTE_DWORD_SHIFT_WIDTH) ByteModDWord; +typedef 2'b11 MaxByteModDword; + +typedef TSub#(BUS_BOUNDARY_WIDTH, DEFAULT_TLP_SIZE_WIDTH) READ_REQ_CNT_WIDTH; +typedef Bit#(READ_REQ_CNT_WIDTH) DmaReadReqCnt; + +typedef 2 TLP_PH_WIDTH; +typedef Bit#(TLP_PH_WIDTH) TlpPh; + +typedef struct { + Bool noSnoop; + Bool relaxedOrder; + Bool idBasedOrdering; + Bool isTlpHintsExist; + TlpPh tlpPh; +} DmaRequestAttr deriving(Eq, Bits, Bounded, DefaultValue, FShow); + +typedef struct { + DmaMemAddr startAddr; + DmaReqLen length; + Bool isWrite; + DmaRequestAttr attr; +} DmaRequest deriving(Bits, Bounded, Eq); + +typedef struct { + DmaMemAddr startAddr; + DmaMemAddr endAddr; + DmaReqLen length; + Tag tag; + DmaRequestAttr attr; +} DmaExtendRequest deriving(Bits, Bounded, Eq, FShow); + +typedef struct { + DmaCsrAddr addr; + DmaCsrValue value; + Bool isWrite; +} CsrRequest deriving(Bits, Bounded, Eq, FShow); + +typedef struct { + DmaCsrAddr addr; + DmaCsrValue value; +} CsrResponse deriving(Bits, Bounded, Eq, FShow); + +typedef enum { + DMA_RX, + DMA_TX +} TRXDirection deriving(Bits, Eq, FShow); + +typedef struct { + Data data; + ByteEn byteEn; + Bool isFirst; + Bool isLast; +} DataStream deriving(Bits, Bounded, Eq); + +typedef struct { + Bool th; + TlpPh ph; +} TphInfo deriving(Bits, Bounded, Eq, FShow); + +typedef Tuple3#( + DWordByteEn, + DWordByteEn, + TphInfo +) RqSideBandSignal; + +instance FShow#(DmaRequest); + function Fmt fshow(DmaRequest request); + return ($format("> valueOf(TLog#(BUS_BOUNDARY)); + let lowIdx = request.startAddr >> valueOf(TLog#(BUS_BOUNDARY)); + return (highIdx > lowIdx); + endfunction + + function Bool hasBoundary(DmaExtendRequest request); + let highIdx = request.endAddr >> tlpMaxSizeWidthReg; + let lowIdx = request.startAddr >> tlpMaxSizeWidthReg; + return (highIdx != lowIdx); + endfunction + + function DmaReqLen getOffset(DmaExtendRequest request); + // MPS - startAddr % MPS, MPS means MRRS when the module is set to RX mode + DmaReqLen remainderOfMps = zeroExtend(TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + DmaReqLen offsetOfMps = tlpMaxSizeReg - remainderOfMps; + return offsetOfMps; + endfunction + + // Pipeline stage 1, calculate the first chunkLen which may be smaller than MPS + rule getfirstChunkLen; + // If is the first beat of a new request, get firstChunkLen and pipe into the splitor + if (!isInProcReg) begin + let request = reqInFifo.first; + reqInFifo.deq; + let stream = dataInFifo.first; + dataInFifo.deq; + let offset = getOffset(request); + let firstChunkLen = tlpMaxSizeReg; + if (hasBoundary(request) || has4KBoundary(request)) begin + firstChunkLen = offset; + end + else begin + firstChunkLen = request.length; + end + // $display($time, "ns SIM INFO @ mkChunkSplit: get first chunkLen, offset %d, remainder %d", offset, TlpPayloadSize'(request.startAddr[tlpMaxSizeWidthReg-1:0])); + firstChunkSplitor.splitLocationFifoIn.enq(unpack(truncate(firstChunkLen))); + let firstReq = DmaRequest { + startAddr : request.startAddr, + length : firstChunkLen, + isWrite : True, + attr : request.attr + }; + firstReqPipeFifo.enq(firstReq); + firstChunkSplitor.inputStreamFifoIn.enq(stream); + inputReqPipeFifo.enq(request); + isInProcReg <= !stream.isLast; + end + // If is the remain beats of the request, continue pipe into the splitor + else begin + let stream = dataInFifo.first; + dataInFifo.deq; + firstChunkSplitor.inputStreamFifoIn.enq(stream); + isInProcReg <= !stream.isLast; + end + endrule + + // Pipeline stage 2: use StreamUtils::StreamSplit to split the input datastream to the firstChunk and the remain chunks + // In StreamUtils::StreamSplit firstChunkSplitor + + // Pipeline stage 3, set isFirst/isLast accroding to MaxPayloadSize, i.e. split the remain chunks + rule splitToMps; + let stream = firstChunkSplitor.outputStreamFifoOut.first; + firstChunkSplitor.outputStreamFifoOut.deq; + // End of a TLP, reset beatsReg and tag isLast=True + if (stream.isLast || beatsReg == tlpMaxBeatsReg - 1) begin + stream.isLast = True; + beatsReg <= 0; + end + else begin + beatsReg <= beatsReg + 1; + end + // Start of a TLP, get Req Infos and tag isFirst=True + if (beatsReg == 0) begin + stream.isFirst = True; + let nextStartAddr = nextStartAddrReg; + let remainLen = remainLenReg; + // The first TLP of chunks + if (firstReqPipeFifo.notEmpty && !isInSplitReg) begin + let chunkReq = firstReqPipeFifo.first; + let oriReq = inputReqPipeFifo.first; + firstReqPipeFifo.deq; + inputReqPipeFifo.deq; + reqAttrReg <= oriReq.attr; + if (chunkReq.length == oriReq.length) begin + nextStartAddr = 0; + remainLen = 0; + doneFifo.enq(True); + end + else begin + nextStartAddr = oriReq.startAddr + zeroExtend(chunkReq.length); + remainLen = oriReq.length - chunkReq.length; + end + reqOutFifo.enq(chunkReq); + end + // The following chunks + else begin + let chunkReq = DmaRequest { + startAddr: nextStartAddr, + length : tlpMaxSizeReg, + isWrite : True, + attr : reqAttrReg + }; + if (!isInSplitReg) begin + // Do nothing + end + else if (remainLen <= tlpMaxSizeReg) begin + chunkReq.length = remainLen; + reqOutFifo.enq(chunkReq); + nextStartAddr = 0; + remainLen = 0; + doneFifo.enq(True); + end + else begin + nextStartAddr = nextStartAddr + zeroExtend(tlpMaxSizeReg); + remainLen = remainLen - tlpMaxSizeReg; + reqOutFifo.enq(chunkReq); + end + end + // $display($time, "ns SIM INFO @ mkChunkSplit: output chunkReq."); + nextStartAddrReg <= nextStartAddr; + remainLenReg <= remainLen; + isInSplitReg <= (remainLen != 0); + end + chunkOutFifo.enq(stream); + endrule + + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface reqFifoIn = convertFifoToFifoIn(reqInFifo); + interface doneFifoOut = convertFifoToFifoOut(doneFifo); + + interface chunkDataFifoOut = convertFifoToFifoOut(chunkOutFifo); + interface chunkReqFifoOut = convertFifoToFifoOut(reqOutFifo); + + interface Put maxPayloadSize; + method Action put (Tuple2#(TlpPayloadSize, TlpPayloadSizeWidth) mpsCfg); + tlpMaxSizeReg <= zeroExtend(tpl_1(mpsCfg)); + tlpMaxSizeWidthReg <= tpl_2(mpsCfg); + // BeatsNum = (MaxPayloadSize + DescriptorSize) / BytesPerBeat + tlpMaxBeatsReg <= truncate(tpl_1(mpsCfg) >> valueOf(TLog#(BYTE_EN_WIDTH))); + endmethod + endinterface +endmodule + +// Generate RequesterRequest descriptor +interface RqDescriptorGenerator; + interface FifoIn#(DmaExtendRequest) exReqFifoIn; + interface FifoOut#(DataStream) descFifoOut; + interface FifoOut#(RqSideBandSignal) byteEnFifoOut; +endinterface + +module mkRqDescriptorGenerator#(Bool isWrite)(RqDescriptorGenerator); + FIFOF#(DmaExtendRequest) exReqInFifo <- mkFIFOF; + FIFOF#(DataStream) descOutFifo <- mkFIFOF; + FIFOF#(RqSideBandSignal) byteEnOutFifo <- mkFIFOF; + + Probe#(DwordCount) tlpDwCountProbe <- mkProbe; + Probe#(DmaMemAddr) tlpStartAddrCountProbe <- mkProbe; + Probe#(DmaMemAddr) tlpEndAddrCountProbe <- mkProbe; + rule genRqDesc; + let exReq = exReqInFifo.first; + exReqInFifo.deq; + let endOffset = byteModDWord(exReq.endAddr); + DwordCount dwCnt = truncate((exReq.endAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)) - (exReq.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH))) + 1; + dwCnt = (exReq.length == 0) ? 1 : dwCnt; + tlpDwCountProbe <= dwCnt; + tlpStartAddrCountProbe <= exReq.startAddr; + tlpEndAddrCountProbe <= exReq.endAddr; + + DataBytePtr bytePtr = fromInteger(valueOf(TDiv#(DES_RQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))); + let descriptor = PcieRequesterRequestDescriptor { + forceECRC : False, + attributes : {pack(exReq.attr.idBasedOrdering), pack(exReq.attr.relaxedOrder), pack(exReq.attr.noSnoop)}, + trafficClass : 0, + requesterIdEn : False, + completerId : 0, + tag : exReq.tag, + requesterId : 0, + isPoisoned : False, + reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) : fromInteger(valueOf(MEM_READ_REQ)), + dwordCnt : dwCnt, + address : truncate(exReq.startAddr >> valueOf(BYTE_DWORD_SHIFT_WIDTH)), + addrType : fromInteger(valueOf(UNTRANSLATED_ADDR)) + }; + let stream = DataStream { + data : zeroExtend(pack(descriptor)), + byteEn : convertBytePtr2ByteEn(bytePtr), + isFirst : True, + isLast : True + }; + descOutFifo.enq(stream); + let startAddrOffset = byteModDWord(exReq.startAddr); + let endAddrOffset = byteModDWord(exReq.endAddr); + let firstByteEn = convertDWordOffset2FirstByteEn(startAddrOffset); + let lastByteEn = convertDWordOffset2LastByteEn(endAddrOffset); + // if startAddr and endAddr are in the same DWord + if ((exReq.startAddr >> valueOf(TLog#(DWORD_BYTES))) == (exReq.endAddr >> valueOf(TLog#(DWORD_BYTES)))) begin + firstByteEn = firstByteEn & lastByteEn; + lastByteEn = 0; + end + + let tphInfo = TphInfo{th:exReq.attr.isTlpHintsExist, ph:exReq.attr.tlpPh}; + byteEnOutFifo.enq(tuple3(firstByteEn, lastByteEn, tphInfo)); + $display($time, "ns SIM INFO @ mkRqDescriptorGenerator: generate desc, tag %d, dwcnt %d, start:%d, end:%d, byteCnt:%d ", exReq.tag, dwCnt, exReq.startAddr, exReq.endAddr, exReq.length); + endrule + + interface exReqFifoIn = convertFifoToFifoIn(exReqInFifo); + interface descFifoOut = convertFifoToFifoOut(descOutFifo); + interface byteEnFifoOut = convertFifoToFifoOut(byteEnOutFifo); +endmodule + diff --git a/src/XilBdmaDmaWrapper.bsv b/src/XilBdmaDmaWrapper.bsv new file mode 100755 index 0000000..69321d4 --- /dev/null +++ b/src/XilBdmaDmaWrapper.bsv @@ -0,0 +1,954 @@ +import FIFOF::*; +import FIFO::*; +import Vector::*; +import Connectable :: *; +import DReg::*; +import GetPut::*; +import BRAMFIFO::*; +import ClientServer::*; +import Probe :: *; + +import SemiFifo::*; +import BusConversion::*; +import AxiStreamTypes::*; +import XilBdmaPcieTypes::*; +import XilBdmaPcieConfigurator::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaPcieAdapter::*; +import XilBdmaDmaTypes::*; +import XilBdmaDmaUtils::*; +import XilBdmaDmaC2HPipe::*; +import XilBdmaDmaH2CPipe::*; +import XilBdmaSimpleModeUtils::*; +import XilBdmaTestUtils::*; +import XilBdmaPrimUtils :: *; + +// // For Bsv User +// interface BdmaControllerBypassWrapper#(numeric type sz_csr_addr, numeric type sz_csr_data); +// // User Logic Ifc +// interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) c2hWrSrvA; +// interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) c2hRdSrvA; +// interface Server#(BdmaUserC2hWrReq, BdmaUserC2hWrResp) c2hWrSrvB; +// interface Server#(BdmaUserC2hRdReq, BdmaUserC2hRdResp) c2hRdSrvB; +// // User Csr Ifc +// interface Client#(BdmaUserH2cWrReq#(sz_csr_addr, sz_csr_data), BdmaUserH2cWrResp) csrWrClt; +// interface Client#(BdmaUserH2cRdReq#(sz_csr_addr), BdmaUserH2cRdResp#(sz_csr_data)) csrRdClt; + +// // Raw PCIe interfaces, connected to the Xilinx PCIe IP +// (* prefix = "" *)interface RawXilinxPcieIp rawPcie; +// endinterface + +// module mkBdmaControllerBypassWrapper(BdmaControllerBypassWrapper#(sz_csr_addr, sz_csr_data)) +// provisos( +// Add#(_a, sz_csr_addr, DMA_CSR_ADDR_WIDTH), +// Add#(_b, sz_csr_data, DMA_CSR_DATA_WIDTH) +// ); +// Wire#(Bool) linkUpWire <- mkWire; +// Reg#(Bool) linkUpReg <- mkReg(False); +// Reg#(Bool) cfgFlagReg <- mkDReg(False); +// Reg#(DWord) cfgReadDelayCounterReg <- mkReg(0); + +// BdmaC2HPipe c2hPipeA <- mkBdmaC2HPipe(0); +// BdmaC2HPipe c2hPipeB <- mkBdmaC2HPipe(1); +// BdmaH2CPipe#(sz_csr_addr, sz_csr_data) h2cPipe <- mkBdmaH2CPipe; + +// RequesterAxiStreamAdapter reqAdapter <- mkRequesterAxiStreamAdapter; +// CompleterAxiStreamAdapter cmplAdapter <- mkCompleterAxiStreamAdapter; + +// PcieConfigurator configurator <- mkPcieConfigurator; + +// mkConnection(c2hPipeA.tlpDataFifoOut, reqAdapter.dmaDataFifoIn[0]); +// mkConnection(c2hPipeA.tlpSideBandFifoOut, reqAdapter.dmaSideBandFifoIn[0]); +// mkConnection(reqAdapter.dmaDataFifoOut[0], c2hPipeA.tlpDataFifoIn); + +// mkConnection(c2hPipeB.tlpDataFifoOut, reqAdapter.dmaDataFifoIn[1]); +// mkConnection(c2hPipeB.tlpSideBandFifoOut, reqAdapter.dmaSideBandFifoIn[1]); +// mkConnection(reqAdapter.dmaDataFifoOut[1], c2hPipeB.tlpDataFifoIn); + +// mkConnection(cmplAdapter.dmaDataFifoOut, h2cPipe.tlpDataFifoIn); +// mkConnection(h2cPipe.tlpDataFifoOut, cmplAdapter.dmaDataFifoIn); + +// rule detectLink if (linkUpWire && !linkUpReg); +// cfgReadDelayCounterReg <= cfgReadDelayCounterReg + 1; + +// if (cfgReadDelayCounterReg > 2000) begin +// configurator.initCfg; +// cfgFlagReg <= True; +// linkUpReg <= True; +// // $display($time, "ns SIM INFO @ BLUE-DMAC: PCIe link is up!"); +// end +// endrule + +// rule setCfg if (cfgFlagReg); +// let tlpSizeCfg <- configurator.tlpSizeCfg.get; +// c2hPipeA.tlpSizeCfg.put(tlpSizeCfg); +// c2hPipeB.tlpSizeCfg.put(tlpSizeCfg); +// $display($time, "ns SIM INFO @ BLUE-DMAC: Get PCIe configurations, mps:%d, mrrs:%d", tlpSizeCfg.mps, tlpSizeCfg.mrrs); +// endrule + +// // User Logic Ifc +// interface c2hWrSrvA = c2hPipeA.writeSrv; +// interface c2hRdSrvA = c2hPipeA.readSrv; +// interface c2hWrSrvB = c2hPipeB.writeSrv; +// interface c2hRdSrvB = c2hPipeB.readSrv; +// interface csrWrClt = h2cPipe.writeClt; +// interface csrRdClt = h2cPipe.readClt; + +// // Raw PCIe Ifc +// interface RawXilinxPcieIp rawPcie; +// interface requesterRequest = reqAdapter.rawRequesterRequest; +// interface requesterComplete = reqAdapter.rawRequesterComplete; +// interface completerRequest = cmplAdapter.rawCompleterRequest; +// interface completerComplete = cmplAdapter.rawCompleterComplete; +// interface configuration = configurator.rawConfiguration; +// method Action linkUp(Bool isLinkUp); +// linkUpWire <= isLinkUp; +// endmethod +// endinterface +// endmodule + + +// Native Blue-DMA Interface, the addrs in the req should be pa +interface DmaController; + // User Logic Ifc + interface Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) c2hDataFifoIn; + interface Vector#(DMA_PATH_NUM, FifoOut#(DataStream)) c2hDataFifoOut; + interface Vector#(DMA_PATH_NUM, FifoIn#(DmaRequest)) c2hReqFifoIn; + + interface FifoIn#(CsrResponse) h2cRespFifoIn; + interface FifoOut#(CsrRequest) h2cReqFifoOut; + + interface FifoIn#(CsrResponse) innerRespFifoIn; + interface FifoOut#(CsrRequest) innerReqFifoOut; + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *)interface RawXilinxPcieIp rawPcie; + (* prefix = "" *)method TlpSizeCfg tlpSizeDebugPort; +endinterface + +// TODO : connect Configurator to other modules +(* synthesize *) +module mkDmaController(DmaController); + Vector#(DMA_PATH_NUM, DmaC2HPipe) c2hPipes = newVector; + Reg#(TlpSizeCfg) tlpSizeDebugPortReg <- mkReg(unpack(0)); + + Wire#(Bool) linkUpWire <- mkWire; + + + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hPipes[pathIdx] <- mkDmaC2HPipe(pathIdx); + end + DmaH2CPipe h2cPipe <- mkDmaH2CPipe; + + RequesterAxiStreamAdapter reqAdapter <- mkRequesterAxiStreamAdapter; + CompleterAxiStreamAdapter cmplAdapter <- mkCompleterAxiStreamAdapter; + + PcieConfigurator configurator <- mkPcieConfigurator; + + Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) c2hDataInIfc = newVector; + Vector#(DMA_PATH_NUM, FifoOut#(DataStream)) c2hDataOutIfc = newVector; + Vector#(DMA_PATH_NUM, FifoIn#(DmaRequest)) c2hReqInIfc = newVector; + + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hDataInIfc[pathIdx] = c2hPipes[pathIdx].wrDataFifoIn; + c2hDataOutIfc[pathIdx] = c2hPipes[pathIdx].rdDataFifoOut; + c2hReqInIfc[pathIdx] = c2hPipes[pathIdx].reqFifoIn; + mkConnection(c2hPipes[pathIdx].tlpDataFifoOut, reqAdapter.dmaDataFifoIn[pathIdx]); + mkConnection(c2hPipes[pathIdx].tlpSideBandFifoOut, reqAdapter.dmaSideBandFifoIn[pathIdx]); + mkConnection(reqAdapter.dmaDataFifoOut[pathIdx], c2hPipes[pathIdx].tlpDataFifoIn); + rule doneFlag; //TODO: let verilog interface has done signal + c2hPipes[pathIdx].doneFifoOut.deq; + endrule + end + + mkConnection(cmplAdapter.dmaDataFifoOut, h2cPipe.tlpDataFifoIn); + mkConnection(h2cPipe.tlpDataFifoOut, cmplAdapter.dmaDataFifoIn); + + rule forwardConfig; + configurator.initCfg; + let tlpSizeCfg <- configurator.tlpSizeCfg.get; + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hPipes[pathIdx].tlpSizeCfg.put(tlpSizeCfg); + end + tlpSizeDebugPortReg <= tlpSizeCfg; + + // $display($time, "ns SIM INFO @ BLUE-DMAC: PCIe link is up!, tlpSizeCfg=", fshow(tlpSizeCfg)); + + endrule + + // User Logic Ifc + interface c2hDataFifoIn = c2hDataInIfc; + interface c2hDataFifoOut = c2hDataOutIfc; + interface c2hReqFifoIn = c2hReqInIfc; + interface h2cRespFifoIn = h2cPipe.userRespFifoIn; + interface h2cReqFifoOut = h2cPipe.userReqFifoOut; + interface innerRespFifoIn = h2cPipe.csrRespFifoIn; + interface innerReqFifoOut = h2cPipe.csrReqFifoOut; + + // Raw PCIe Ifc + interface RawXilinxPcieIp rawPcie; + interface requesterRequest = reqAdapter.rawRequesterRequest; + interface requesterComplete = reqAdapter.rawRequesterComplete; + interface completerRequest = cmplAdapter.rawCompleterRequest; + interface completerComplete = cmplAdapter.rawCompleterComplete; + interface configuration = configurator.rawConfiguration; + method Action linkUp(Bool isLinkUp); + linkUpWire <= isLinkUp; + endmethod + endinterface + + method tlpSizeDebugPort = tlpSizeDebugPortReg; +endmodule + +// For Verilog User + +(* always_ready, always_enabled *) +interface RawDmaReqSlave; + (* prefix = "" *) + method Action validReq( + (* port = "valid" *) Bool valid, + (* port = "start_addr" *) DmaMemAddr startAddr, + (* port = "byte_cnt" *) DmaReqLen length, + (* port = "is_write" *) Bool isWrite + ); + (* result = "ready" *) method Bool ready; +endinterface + +(* always_ready, always_enabled *) +interface RawDmaCsrMaster; + (* result = "address" *) method DmaCsrAddr address; + (* result = "value" *) method DmaCsrValue value; + (* result = "is_write" *) method Bool isWrite; + (* result = "valid" *) method Bool valid; + (* prefix = "" *) method Action ready((* port = "ready" *) Bool rdy); +endinterface + +(* always_ready, always_enabled *) +interface RawDmaCsrSlave; + (* prefix = "" *) + method Action validResp( + (* port = "valid" *) Bool valid, + (* port = "address" *) DmaCsrAddr address, + (* port = "value" *) DmaCsrValue value + ); + (* result = "ready" *) method Bool ready; +endinterface + +typedef TDiv#(DATA_WIDTH, BYTE_WIDTH) DMA_DATA_KEEP_WIDTH; +typedef 1 DMA_DATA_USER_WIDTH; +typedef RawAxiStreamSlave#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) RawDmaDataSlave; +typedef RawAxiStreamMaster#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) RawDmaDataMaster; +typedef AxiStream#(DMA_DATA_KEEP_WIDTH, DMA_DATA_USER_WIDTH) DmaAxiStream; + +// module mkFifoInToRawDmaDataSlave#(FifoIn#(DataStream) pipe)(RawDmaDataSlave); +// Reg#(Bool) isFirstReg <- mkReg(True); +// let rawBus <- mkFifoInToRawBusSlave(pipe); + +// method Bool tReady = rawBus.ready; +// method Action tValid( +// Bool valid, +// Bit#(DATA_WIDTH) tData, +// Bit#(DMA_DATA_KEEP_WIDTH) tKeep, +// Bool tLast, +// Bit#(DMA_DATA_USER_WIDTH) tUser +// ); +// if (valid && rawBus.ready) begin +// if (tLast) begin +// isFirstReg <= True; +// end +// else if (isFirstReg) begin +// isFirstReg <= False; +// end +// end +// let stream = DataStream { +// data : tData, +// byteEn : tKeep, +// isFirst : isFirstReg && valid, +// isLast : tLast +// }; +// rawBus.validData(valid, stream); +// endmethod +// endmodule + +// module mkFifoOutToRawDmaDataMaster#(FifoOut#(DataStream) pipe)(RawDmaDataMaster); +// let rawBus <- mkFifoOutToRawBusMaster(pipe); +// method Bool tValid = rawBus.valid; +// method Bit#(DATA_WIDTH) tData = rawBus.data.data; +// method Bit#(DMA_DATA_KEEP_WIDTH) tKeep = rawBus.data.byteEn; +// method Bool tLast = rawBus.data.isLast; +// method Bit#(DMA_DATA_USER_WIDTH) tUser = 0; +// method Action tReady(Bool rdy); +// rawBus.ready(rdy); +// endmethod +// endmodule + +// module mkFifoInToRawDmaReqSlave#(FifoIn#(DmaRequest) pipe)(RawDmaReqSlave); +// let rawBus <- mkFifoInToRawBusSlave(pipe); +// method Action validReq( +// Bool valid, +// DmaMemAddr startAddr, +// DmaReqLen length, +// Bool isWrite +// ); +// let request = DmaRequest { +// startAddr : startAddr, +// length : length, +// isWrite : isWrite +// }; +// rawBus.validData(valid, request); +// endmethod +// method Bool ready = rawBus.ready; +// endmodule + +module mkFifoOutToRawCsrMaster#(FifoOut#(CsrRequest) pipe)(RawDmaCsrMaster); + let rawBus <- mkFifoOutToRawBusMaster(pipe); + method DmaCsrAddr address = rawBus.data.addr; + method DmaCsrValue value = rawBus.data.value; + method Bool isWrite = rawBus.data.isWrite; + method Bool valid = rawBus.valid; + method Action ready(Bool rdy); + rawBus.ready(rdy); + endmethod +endmodule + +module mkFifoInToRawCsrClient#(FifoIn#(CsrResponse) pipe)(RawDmaCsrSlave); + let rawBus <- mkFifoInToRawBusSlave(pipe); + method Action validResp( + Bool valid, + DmaCsrAddr addr, + DmaCsrValue value + ); + let resp = CsrResponse { + addr : addr, + value : value + }; + rawBus.validData(valid, resp); + endmethod + method Bool ready = rawBus.ready; +endmodule + +// // Bypass Mode +// // Raw verilog Wrapper of Dma User Logic Ifc +// interface RawBypassDmaController; +// // User Logic Ifc +// (* prefix = "s_axis_c2h_0" *) interface RawDmaDataSlave dmaWrData0; +// (* prefix = "s_desc_c2h_0" *) interface RawDmaReqSlave dmaDesc0; +// (* prefix = "m_axis_c2h_0" *) interface RawDmaDataMaster dmaRdData0; + +// (* prefix = "s_axis_c2h_1" *) interface RawDmaDataSlave dmaWrData1; +// (* prefix = "s_desc_c2h_1" *) interface RawDmaReqSlave dmaDesc1; +// (* prefix = "m_axis_c2h_1" *) interface RawDmaDataMaster dmaRdData1; + +// (* prefix = "s_h2c_csr" *) interface RawDmaCsrSlave dmaCsrResp; +// (* prefix = "m_h2c_csr" *) interface RawDmaCsrMaster dmaCsrReq; + +// // Raw PCIe interfaces, connected to the Xilinx PCIe IP +// (* prefix = "" *) interface RawXilinxPcieIp rawPcie; +// method Bool sys_reset; +// endinterface + +// (* synthesize *) +// module mkRawBypassDmaController(RawBypassDmaController); +// Reg#(Bit#(32)) sysResetCounterReg <- mkReg(0); +// DmaController dmac <- mkDmaController; +// GenericCsr dummyCsr <- mkDummyCsr; + +// let dmaWrData0Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[0]); +// let dmaDesc0Ifc <- mkFifoInToRawDmaReqSlave(dmac.c2hReqFifoIn[0]); +// let dmaRdData0Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[0]); + +// let dmaWrData1Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[1]); +// let dmaDesc1Ifc <- mkFifoInToRawDmaReqSlave(dmac.c2hReqFifoIn[1]); +// let dmaRdData1Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[1]); + +// let csrRespIfc <- mkFifoInToRawCsrClient(dmac.h2cRespFifoIn); +// let csrReqIfc <- mkFifoOutToRawCsrMaster(dmac.h2cReqFifoOut); + +// mkConnection(dmac.innerReqFifoOut, dummyCsr.reqFifoIn); +// mkConnection(dummyCsr.respFifoOut, dmac.innerRespFifoIn); + +// rule sysResetHandler; +// if (sysResetCounterReg < 5000) begin +// sysResetCounterReg <= sysResetCounterReg + 1; +// end +// endrule + +// interface dmaWrData0 = dmaWrData0Ifc; +// interface dmaDesc0 = dmaDesc0Ifc; +// interface dmaRdData0 = dmaRdData0Ifc; +// interface dmaWrData1 = dmaWrData1Ifc; +// interface dmaDesc1 = dmaDesc1Ifc; +// interface dmaRdData1 = dmaRdData1Ifc; +// interface dmaCsrResp = csrRespIfc; +// interface dmaCsrReq = csrReqIfc; + + + +// interface rawPcie = dmac.rawPcie; + +// method Bool sys_reset = sysResetCounterReg == 5000; +// endmodule + +// interface RawSimpleDmaController; +// // User Logic Ifc +// (* prefix = "s_axis_c2h_0" *) interface RawDmaDataSlave dmaWrData0; +// (* prefix = "m_axis_c2h_0" *) interface RawDmaDataMaster dmaRdData0; + +// (* prefix = "s_axis_c2h_1" *) interface RawDmaDataSlave dmaWrData1; +// (* prefix = "m_axis_c2h_1" *) interface RawDmaDataMaster dmaRdData1; + +// (* prefix = "s_h2c_csr" *) interface RawDmaCsrSlave dmaCsrResp; +// (* prefix = "m_h2c_csr" *) interface RawDmaCsrMaster dmaCsrReq; + +// // Raw PCIe interfaces, connected to the Xilinx PCIe IP +// (* prefix = "" *) interface RawXilinxPcieIp rawPcie; +// endinterface + +// // Simple Mode For Read-Write Loop Testing, which has no external ports +// (* synthesize *) +// module mkRawSimpleDmaController(RawSimpleDmaController); +// DmaController dmac <- mkDmaController; +// DmaSimpleCore simpleCore <- mkDmaSimpleCore; + +// for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1 ) begin +// mkConnection(dmac.c2hReqFifoIn[pathIdx], simpleCore.c2hReqFifoOut[pathIdx]); +// end + +// let dmaWrData0Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[0]); +// let dmaRdData0Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[0]); + +// let dmaWrData1Ifc <- mkFifoInToRawDmaDataSlave(dmac.c2hDataFifoIn[1]); +// let dmaRdData1Ifc <- mkFifoOutToRawDmaDataMaster(dmac.c2hDataFifoOut[1]); + +// let csrRespIfc <- mkFifoInToRawCsrClient(dmac.h2cRespFifoIn); +// let csrReqIfc <- mkFifoOutToRawCsrMaster(dmac.h2cReqFifoOut); + +// mkConnection(dmac.innerReqFifoOut, simpleCore.reqFifoIn); +// mkConnection(dmac.innerRespFifoIn, simpleCore.respFifoOut); + +// interface rawPcie = dmac.rawPcie; + +// interface dmaWrData0 = dmaWrData0Ifc; +// interface dmaRdData0 = dmaRdData0Ifc; +// interface dmaWrData1 = dmaWrData1Ifc; +// interface dmaRdData1 = dmaRdData1Ifc; +// interface dmaCsrResp = csrRespIfc; +// interface dmaCsrReq = csrReqIfc; +// endmodule + +interface RawLoopDmaController; + // User Logic Ifc + + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *) interface RawXilinxPcieIp rawPcie; + (* prefix = "" *) method TlpSizeCfg tlpSizeDebugPort; + method Bool sys_reset; +endinterface + +// (* synthesize *) +// module mkRawTestDmaController(RawLoopDmaController); +// Reg#(Bit#(32)) sysResetCounterReg <- mkReg(0); +// DmaController dmac <- mkDmaController; +// DmaSimpleCore simpleCore <- mkDmaSimpleCore; +// GenericCsr dummyCsr <- mkDummyCsr; +// Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataFifo <- replicateM(mkSizedBRAMFIFOF(valueOf(BUS_BOUNDARY))); + +// for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1 ) begin +// mkConnection(dataFifo[pathIdx], dmac.c2hDataFifoIn[pathIdx]); +// mkConnection(dmac.c2hDataFifoOut[pathIdx], dataFifo[pathIdx]); +// mkConnection(dmac.c2hReqFifoIn[pathIdx], simpleCore.c2hReqFifoOut[pathIdx]); +// end + +// mkConnection(dmac.innerReqFifoOut, simpleCore.reqFifoIn); +// mkConnection(dmac.innerRespFifoIn, simpleCore.respFifoOut); + +// mkConnection(dmac.h2cReqFifoOut, dummyCsr.reqFifoIn); +// mkConnection(dmac.h2cRespFifoIn, dummyCsr.respFifoOut); + +// rule logRead; +// let stream = dmac.c2hDataFifoOut[0].first; +// $display($time, "ns SIM INFO @ mkRawTestDmaController: recv stream, isFirst %d, isLast %d, data %h", pack(stream.isFirst), pack(stream.isLast), stream.data); +// endrule + + + +// rule sysResetHandler; +// if (sysResetCounterReg < 1000) begin +// sysResetCounterReg <= sysResetCounterReg + 1; +// end +// endrule + +// interface rawPcie = dmac.rawPcie; + +// method Bool sys_reset = sysResetCounterReg == 1000; +// method tlpSizeDebugPort = dmac.tlpSizeDebugPort; + +// endmodule + + + + + + + + + + + + + + +(* synthesize *) +module mkRawTestDmaController(RawLoopDmaController); + Reg#(Bit#(32)) sysResetCounterReg <- mkReg(0); + DmaController dmac <- mkDmaController; + FIFOF#(DataStream) dataFifoA <- mkSizedFIFOF(512); + FIFOF#(DataStream) dataFifoB <- mkSizedFIFOF(512); + + mkConnection(dataFifoA, dmac.c2hDataFifoIn[1]); + mkConnection(dataFifoB, dmac.c2hDataFifoIn[0]); + // mkConnection(dmac.c2hDataFifoOut[0], dataFifo); + + + Reg#(Bit#(32)) srcAddrLowReg <- mkReg(0); + Reg#(Bit#(32)) srcAddrHighReg <- mkReg(0); + Reg#(Bit#(32)) dstAddrLowReg <- mkReg(0); + Reg#(Bit#(32)) dstAddrHighReg <- mkReg(0); + Reg#(Bit#(32)) lengthReg <- mkReg(0); + Reg#(Bit#(32)) modeReg <- mkReg(0); + Reg#(Bit#(32)) strideSizeReg <- mkReg(0); + Reg#(Bit#(32)) maxStrideCntReg <- mkReg(0); + + Reg#(Bit#(32)) curReadStrideCntAReg[2] <- mkCReg(2, 0); + Reg#(Bit#(32)) curReadStrideCntBReg[2] <- mkCReg(2, 0); + Reg#(Bit#(32)) curWriteStrideCntAReg[2] <- mkCReg(2, 0); + Reg#(Bit#(32)) curWriteStrideCntBReg[2] <- mkCReg(2, 0); + + Reg#(Bit#(32)) curSrcLowAddrAReg <- mkReg(0); + Reg#(Bit#(32)) curSrcLowAddrBReg <- mkReg(0); + Reg#(Bit#(32)) curDstLowAddrAReg <- mkReg(0); + Reg#(Bit#(32)) curDstLowAddrBReg <- mkReg(0); + + Reg#(Bit#(32)) batchTestCounterAReg[2] <- mkCReg(2, 0); + Reg#(Bit#(32)) batchTestCounterBReg[2] <- mkCReg(2, 0); + Reg#(Bit#(16)) batchWriteCounterAReg[2] <- mkCReg(2, 0); + Reg#(Bit#(16)) batchWriteCounterBReg[2] <- mkCReg(2, 0); + + // doubleChannelTestOffsetReg is also a switch, if its not 0, then enable two channel test + Reg#(Bit#(32)) doubleChannelTestOffsetReg <- mkReg(0); + + Reg#(Bit#(32)) testModeCtlReg <- mkReg(0); + + + + Probe#(Bool) readToWriteQueueNotFullProbeA <- mkProbe; + Probe#(Bool) readToWriteQueueNotFullProbeB <- mkProbe; + Probe#(Bool) readToWriteQueueNotEmptyProbeA <- mkProbe; + Probe#(Bool) readToWriteQueueNotEmptyProbeB <- mkProbe; + Probe#(Bool) dmaReadDescEnqProbe <- mkProbe; + Probe#(Bool) dmaWriteDescEnqProbeA <- mkProbe; + Probe#(Bool) dmaWriteDescEnqProbeB <- mkProbe; + + Probe#(Bool) writeDescQueueNotFullProbeA <- mkProbe; + Probe#(Bool) writeDescQueueNotFullProbeB <- mkProbe; + + Reg#(Bit#(32)) attrReg <- mkReg(0); + + Bool isReadOnlyTest = unpack(testModeCtlReg[0]); + Bool isWriteOnlyTest = unpack(testModeCtlReg[1]); + + + // rule debug; + // if (!dmac.c2hDataFifoOut[0].notEmpty) $display("dmac.c2hDataFifoOut[0] Empty"); + // if (!dataFifo.notFull) $display("dataFifo Full"); + // endrule + + rule debug; + readToWriteQueueNotFullProbeA <= dataFifoA.notFull; + readToWriteQueueNotEmptyProbeA <= dataFifoA.notEmpty; + writeDescQueueNotFullProbeA <= dmac.c2hReqFifoIn[1].notFull; + + readToWriteQueueNotFullProbeB <= dataFifoB.notFull; + readToWriteQueueNotEmptyProbeB <= dataFifoB.notEmpty; + writeDescQueueNotFullProbeB <= dmac.c2hReqFifoIn[0].notFull; + endrule + + rule forwardDataA if (!isWriteOnlyTest); + dmac.c2hDataFifoOut[0].deq; + if (!isReadOnlyTest) begin + dataFifoA.enq(dmac.c2hDataFifoOut[0].first); + if (dmac.c2hDataFifoOut[0].first.isLast) begin + batchWriteCounterAReg[0] <= batchWriteCounterAReg[0] + 1; + end + end + $display($time, "ns SIM INFO @ mkRawTestDmaController: forwardData data 0 =", fshow(dmac.c2hDataFifoOut[0].first)); + endrule + + rule forwardDataB if (!isWriteOnlyTest); + dmac.c2hDataFifoOut[1].deq; + if (!isReadOnlyTest) begin + dataFifoB.enq(dmac.c2hDataFifoOut[1].first); + if (dmac.c2hDataFifoOut[1].first.isLast) begin + batchWriteCounterBReg[0] <= batchWriteCounterBReg[0] + 1; + end + end + $display($time, "ns SIM INFO @ mkRawTestDmaController: forwardData data 1 =", fshow(dmac.c2hDataFifoOut[1].first)); + endrule + + rule handleCsrAccess; + let req = dmac.h2cReqFifoOut.first; + dmac.h2cReqFifoOut.deq; + // $display($time, "ns SIM INFO @ mkRawTestDmaController: handleCsrAccess req=", fshow(req)); + if (req.isWrite) begin + case (req.addr) + 'h0004: begin + srcAddrLowReg <= unpack(pack(req.value)); + end + 'h0008: begin + srcAddrHighReg <= unpack(pack(req.value)); + end + 'h000c: begin + dstAddrLowReg <= unpack(pack(req.value)); + end + 'h0010: begin + dstAddrHighReg <= unpack(pack(req.value)); + end + 'h0014: begin + lengthReg <= unpack(pack(req.value)); + end + 'h0018: begin + batchTestCounterAReg[1] <= unpack(pack(req.value)); + batchTestCounterBReg[1] <= unpack(pack(req.value)); + curReadStrideCntAReg[1] <= 0; + curReadStrideCntBReg[1] <= 0; + curWriteStrideCntAReg[1] <= 0; + curWriteStrideCntBReg[1] <= 0; + end + 'h001c: begin + strideSizeReg <= unpack(pack(req.value)); + end + 'h0020: begin + maxStrideCntReg <= unpack(pack(req.value)); + end + 'h0024: begin + attrReg <= unpack(pack(req.value)); + end + 'h0028: begin + doubleChannelTestOffsetReg <= unpack(pack(req.value)); + end + 'h002C: begin + testModeCtlReg <= unpack(pack(req.value)); + end + 'h0030: begin + batchTestCounterBReg[1] <= unpack(pack(req.value)); + end + + + + + + endcase + end + else begin + let resp = CsrResponse { + addr: req.addr, + value: ? + }; + case (req.addr) + 'h0004: begin + resp.value = unpack(pack(srcAddrLowReg)); + end + 'h0008: begin + resp.value = unpack(pack(srcAddrHighReg)); + end + 'h000c: begin + resp.value = unpack(pack(dstAddrLowReg)); + end + 'h0010: begin + resp.value = unpack(pack(dstAddrHighReg)); + end + 'h0014: begin + resp.value = unpack(pack(lengthReg)); + end + 'h0018: begin + resp.value = unpack(pack(batchTestCounterAReg[1])); + end + 'h001c: begin + resp.value = unpack(pack(strideSizeReg)); + end + 'h0020: begin + resp.value = unpack(pack(maxStrideCntReg)); + end + 'h0024: begin + resp.value = unpack(pack(attrReg)); + end + 'h0028: begin + resp.value = unpack(pack(doubleChannelTestOffsetReg)); + end + 'h002C: begin + resp.value = unpack(pack(testModeCtlReg)); + end + 'h0030: begin + resp.value = unpack(pack(batchTestCounterBReg[1])); + end + + endcase + dmac.h2cRespFifoIn.enq(resp); + end + endrule + + + + (* descending_urgency = "batchWriteRequestB, batchReadRequestA" *) + rule batchReadRequestA if (batchTestCounterAReg[0] != 0 && !isWriteOnlyTest); + batchTestCounterAReg[0] <= batchTestCounterAReg[0] - 1; + + let curSrcLowAddr = curReadStrideCntAReg[0] == 0 ? srcAddrLowReg : curSrcLowAddrAReg; + + dmac.c2hReqFifoIn[0].enq(DmaRequest{ + startAddr:unpack({srcAddrHighReg, curSrcLowAddr}), + length: unpack(lengthReg), + isWrite: False, + attr: unpack(truncate(attrReg)) + }); + + if (curReadStrideCntAReg[0] + 1 == maxStrideCntReg) begin + curReadStrideCntAReg[0] <= 0; + end + else begin + curReadStrideCntAReg[0] <= curReadStrideCntAReg[0] + 1; + end + + curSrcLowAddrAReg <= curSrcLowAddr + strideSizeReg; + + // $display($time, "ns SIM INFO @ mkRawTestDmaController: batchRequest batchTestCounterAReg=", fshow(batchTestCounterAReg[0])); + dmaReadDescEnqProbe <= True; + endrule + + + (* descending_urgency = "batchWriteRequestA, batchReadRequestB" *) + rule batchReadRequestB if (batchTestCounterBReg[0] != 0 && doubleChannelTestOffsetReg != 0 && !isWriteOnlyTest); + batchTestCounterBReg[0] <= batchTestCounterBReg[0] - 1; + + let curSrcLowAddr = curReadStrideCntBReg[0] == 0 ? srcAddrLowReg : curSrcLowAddrBReg; + + dmac.c2hReqFifoIn[1].enq(DmaRequest{ + startAddr:unpack({srcAddrHighReg, curSrcLowAddr + doubleChannelTestOffsetReg}), + length: unpack(lengthReg), + isWrite: False, + attr: unpack(truncate(attrReg)) + }); + + if (curReadStrideCntBReg[0] + 1 == maxStrideCntReg) begin + curReadStrideCntBReg[0] <= 0; + end + else begin + curReadStrideCntBReg[0] <= curReadStrideCntBReg[0] + 1; + end + + curSrcLowAddrBReg <= curSrcLowAddr + strideSizeReg; + + // $display($time, "ns SIM INFO @ mkRawTestDmaController: batchRequest batchTestCounterBReg=", fshow(batchTestCounterBReg[0])); + dmaReadDescEnqProbe <= True; + endrule + + rule batchWriteRequestA if (batchWriteCounterAReg[1] != 0 && !isWriteOnlyTest); + batchWriteCounterAReg[1] <= batchWriteCounterAReg[1] - 1; + + let curDstLowAddr = curWriteStrideCntAReg[0] == 0 ? dstAddrLowReg : curDstLowAddrAReg; + dmac.c2hReqFifoIn[1].enq(DmaRequest{ + startAddr:unpack({dstAddrHighReg, curDstLowAddr}), + length: unpack(lengthReg), + isWrite: True, + attr: unpack(truncate(attrReg)) + }); + + + if (curWriteStrideCntAReg[0] + 1 == maxStrideCntReg) begin + curWriteStrideCntAReg[0] <= 0; + end + else begin + curWriteStrideCntAReg[0] <= curWriteStrideCntAReg[0] + 1; + end + + curDstLowAddrAReg <= curDstLowAddr + strideSizeReg; + + dmaWriteDescEnqProbeA <= True; + // $display($time, "ns SIM INFO @ mkRawTestDmaController: batchRequest batchWriteCounterAReg=", fshow(batchWriteCounterAReg[1])); + + endrule + + + + rule batchWriteRequestB if (batchWriteCounterBReg[1] != 0 && !isWriteOnlyTest); + batchWriteCounterBReg[1] <= batchWriteCounterBReg[1] - 1; + + let curDstLowAddr = curWriteStrideCntBReg[0] == 0 ? dstAddrLowReg : curDstLowAddrBReg; + dmac.c2hReqFifoIn[0].enq(DmaRequest{ + startAddr:unpack({dstAddrHighReg, curDstLowAddr + doubleChannelTestOffsetReg}), + length: unpack(lengthReg), + isWrite: True, + attr: unpack(truncate(attrReg)) + }); + + + if (curWriteStrideCntBReg[0] + 1 == maxStrideCntReg) begin + curWriteStrideCntBReg[0] <= 0; + end + else begin + curWriteStrideCntBReg[0] <= curWriteStrideCntBReg[0] + 1; + end + + curDstLowAddrBReg <= curDstLowAddr + strideSizeReg; + + dmaWriteDescEnqProbeB <= True; + // $display($time, "ns SIM INFO @ mkRawTestDmaController: batchRequest batchWriteCounterBReg=", fshow(batchWriteCounterBReg[1])); + + endrule + + FIFOF#(Bit#(32)) writeOnlyPayloadGenReqAQueue <- mkSizedFIFOF(32); + Reg#(Bit#(32)) writeOnlyPayloadGenCounterAReg <- mkReg(0); + Reg#(Bool) writeOnlyPayloadGenIsFirstAReg <- mkReg(True); + + FIFOF#(Bit#(32)) writeOnlyPayloadGenReqBQueue <- mkSizedFIFOF(32); + Reg#(Bit#(32)) writeOnlyPayloadGenCounterBReg <- mkReg(0); + Reg#(Bool) writeOnlyPayloadGenIsFirstBReg <- mkReg(True); + + rule writeOnlyTestA if (batchTestCounterAReg[0] != 0 && isWriteOnlyTest); + batchTestCounterAReg[0] <= batchTestCounterAReg[0] - 1; + + let curDstLowAddr = curWriteStrideCntAReg[0] == 0 ? dstAddrLowReg : curDstLowAddrAReg; + dmac.c2hReqFifoIn[1].enq(DmaRequest{ + startAddr:unpack({dstAddrHighReg, curDstLowAddr}), + length: unpack(lengthReg), + isWrite: True, + attr: unpack(truncate(attrReg)) + }); + + writeOnlyPayloadGenReqAQueue.enq(lengthReg); + + + if (curWriteStrideCntAReg[0] + 1 == maxStrideCntReg) begin + curWriteStrideCntAReg[0] <= 0; + end + else begin + curWriteStrideCntAReg[0] <= curWriteStrideCntAReg[0] + 1; + end + + curDstLowAddrAReg <= curDstLowAddr + strideSizeReg; + endrule + + + rule genWriteOnlyStreamA if (isWriteOnlyTest); + let ds = ?; + if (writeOnlyPayloadGenIsFirstAReg) begin + let req = writeOnlyPayloadGenReqAQueue.first; + writeOnlyPayloadGenReqAQueue.deq; + + let isLast = req <= fromInteger(valueOf(BYTE_EN_WIDTH)); + ds = DataStream { + data : ?, + byteEn : convertBytePtr2ByteEn(isLast ? truncate(req) : fromInteger(valueOf(BYTE_EN_WIDTH))), + isFirst: True, + isLast: isLast + }; + writeOnlyPayloadGenIsFirstAReg <= isLast; + writeOnlyPayloadGenCounterAReg <= req - fromInteger(valueOf(BYTE_EN_WIDTH)); + end + else begin + let isLast = writeOnlyPayloadGenCounterAReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); + + ds = DataStream { + data : ?, + byteEn : convertBytePtr2ByteEn(isLast ? truncate(writeOnlyPayloadGenCounterAReg) : fromInteger(valueOf(BYTE_EN_WIDTH))), + isFirst: False, + isLast: isLast + }; + writeOnlyPayloadGenIsFirstAReg <= isLast; + + writeOnlyPayloadGenCounterAReg <= writeOnlyPayloadGenCounterAReg - fromInteger(valueOf(BYTE_EN_WIDTH)); + end + + dataFifoA.enq(ds); + endrule + + + + + rule writeOnlyTestB if (batchTestCounterBReg[0] != 0 && isWriteOnlyTest && doubleChannelTestOffsetReg != 0); + batchTestCounterBReg[0] <= batchTestCounterBReg[0] - 1; + + let curDstLowAddr = curWriteStrideCntBReg[0] == 0 ? dstAddrLowReg : curDstLowAddrBReg; + dmac.c2hReqFifoIn[0].enq(DmaRequest{ + startAddr:unpack({dstAddrHighReg, curDstLowAddr}), + length: unpack(lengthReg), + isWrite: True, + attr: unpack(truncate(attrReg)) + }); + + writeOnlyPayloadGenReqBQueue.enq(lengthReg); + + + if (curWriteStrideCntBReg[0] + 1 == maxStrideCntReg) begin + curWriteStrideCntBReg[0] <= 0; + end + else begin + curWriteStrideCntBReg[0] <= curWriteStrideCntBReg[0] + 1; + end + + curDstLowAddrBReg <= curDstLowAddr + strideSizeReg; + endrule + + + rule genWriteOnlyStreamB if (isWriteOnlyTest && doubleChannelTestOffsetReg != 0); + let ds = ?; + if (writeOnlyPayloadGenIsFirstBReg) begin + let req = writeOnlyPayloadGenReqBQueue.first; + writeOnlyPayloadGenReqBQueue.deq; + + let isLast = req <= fromInteger(valueOf(BYTE_EN_WIDTH)); + ds = DataStream { + data : ?, + byteEn : convertBytePtr2ByteEn(isLast ? truncate(req) : fromInteger(valueOf(BYTE_EN_WIDTH))), + isFirst: True, + isLast: isLast + }; + writeOnlyPayloadGenIsFirstBReg <= isLast; + writeOnlyPayloadGenCounterBReg <= req - fromInteger(valueOf(BYTE_EN_WIDTH)); + end + else begin + let isLast = writeOnlyPayloadGenCounterBReg <= fromInteger(valueOf(BYTE_EN_WIDTH)); + + ds = DataStream { + data : ?, + byteEn : convertBytePtr2ByteEn(isLast ? truncate(writeOnlyPayloadGenCounterBReg) : fromInteger(valueOf(BYTE_EN_WIDTH))), + isFirst: False, + isLast: isLast + }; + writeOnlyPayloadGenIsFirstBReg <= isLast; + + writeOnlyPayloadGenCounterBReg <= writeOnlyPayloadGenCounterBReg - fromInteger(valueOf(BYTE_EN_WIDTH)); + end + + dataFifoB.enq(ds); + endrule + + // rule logRead; + // // let stream = dmac.c2hDataFifoOut[0].first; + // // $display($time, "ns SIM INFO @ mkRawTestDmaController: recv stream, isFirst %d, isLast %d, data %h", pack(stream.isFirst), pack(stream.isLast), stream.data); + // endrule + + + + rule sysResetHandler; + if (sysResetCounterReg < 1000) begin + sysResetCounterReg <= sysResetCounterReg + 1; + end + endrule + + interface rawPcie = dmac.rawPcie; + + method tlpSizeDebugPort = dmac.tlpSizeDebugPort; + method Bool sys_reset = sysResetCounterReg == 1000; + +endmodule diff --git a/src/XilBdmaPcieAdapter.bsv b/src/XilBdmaPcieAdapter.bsv new file mode 100644 index 0000000..7b09cc8 --- /dev/null +++ b/src/XilBdmaPcieAdapter.bsv @@ -0,0 +1,1120 @@ +import FIFOF::*; +import GetPut :: *; +import Vector::*; +import Probe :: *; + +import SemiFifo::*; +import XilBdmaPcieTypes::*; +import XilBdmaDmaTypes::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaPrimUtils::*; +import XilBdmaStreamUtils::*; +import XilBdmaPcieDescriptorTypes::*; +import XilBdmaCompletionFifo::*; + +typedef 64 CMPL_NPREQ_INFLIGHT_NUM; +typedef 20 CMPL_NPREQ_WAITING_CLKS; +typedef 2'b11 NP_CREDIT_INCREMENT; +typedef 2'b00 NP_CREDIT_NOCHANGE; + +typedef 3 BYTEEN_INFIFO_DEPTH; + +typedef 'h1F IDEA_CQ_TKEEP_OF_CSR; +typedef 'hF IDEA_CC_TKEEP_OF_CSR; + +// Support Straddle in RQ/RC +interface RequesterAxiStreamAdapter; + // Dma To Adapter DataStreams + interface Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) dmaDataFifoIn; + interface Vector#(DMA_PATH_NUM, FifoIn#(RqSideBandSignal)) dmaSideBandFifoIn; + // Adapter To Dma StraddleStreams, which may contains 2 TLP + interface Vector#(DMA_PATH_NUM, FifoOut#(StraddleStream)) dmaDataFifoOut; + // C2H RQ AxiStream Master + (* prefix = "" *) interface RawPcieRequesterRequest rawRequesterRequest; + // C2H RC AxiStream Slave + (* prefix = "" *) interface RawPcieRequesterComplete rawRequesterComplete; +endinterface + +// TODO: optimize fully-pipeline performance +(* synthesize *) +module mkRequesterAxiStreamAdapter(RequesterAxiStreamAdapter); + ConvertDataStreamsToStraddleAxis dmaToAxisConverter <- mkConvertDataStreamsToStraddleAxis; + ConvertStraddleAxisToDataStream axisToDmaConverter <- mkConvertStraddleAxisToDataStream; + + Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) dmaDataFifoInIfc = newVector; + Vector#(DMA_PATH_NUM, FifoIn#(RqSideBandSignal)) dmaSideBandFifoInIfc = newVector; + Vector#(DMA_PATH_NUM, FifoOut#(StraddleStream)) dmaFifoOutIfc = newVector; + + let rawAxiStreamSlaveIfc <- mkFifoInToRawPcieAxiStreamSlave(axisToDmaConverter.axiStreamFifoIn); + let rawAxiStreamMasterIfc <- mkFifoOutToRawPcieAxiStreamMaster(dmaToAxisConverter.axiStreamFifoOut); + + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + dmaDataFifoInIfc[pathIdx] = dmaToAxisConverter.dataFifoIn[pathIdx]; + dmaSideBandFifoInIfc[pathIdx] = dmaToAxisConverter.byteEnFifoIn[pathIdx]; + dmaFifoOutIfc[pathIdx] = axisToDmaConverter.dataFifoOut[pathIdx]; + end + + interface dmaDataFifoIn = dmaDataFifoInIfc; + interface dmaSideBandFifoIn = dmaSideBandFifoInIfc; + interface dmaDataFifoOut = dmaFifoOutIfc; + + interface RawPcieRequesterRequest rawRequesterRequest; + interface rawAxiStreamMaster = rawAxiStreamMasterIfc; + method Action pcieProgressTrack( + Bool tagValid0, + Bool tagValid1, + PcieRqTag tag0, + PcieRqTag tag1, + Bool seqNumValid0, + Bool seqNumValid1, + PcieRqSeqNum seqNum0, + PcieRqSeqNum seqNum1 + ); + // Not support progress track now + endmethod + endinterface + + interface RawPcieRequesterComplete rawRequesterComplete; + interface rawAxiStreamSlave = rawAxiStreamSlaveIfc; + endinterface +endmodule + +// Do not support straddle in CQ/CC +interface CompleterAxiStreamAdapter; + // Adapter To Dma DataStream + interface FifoOut#(DataStream) dmaDataFifoOut; + // Dma To Adapter DataStreams + interface FifoIn#(DataStream) dmaDataFifoIn; + // H2C CQ AxiStream Slave + (* prefix = "" *) interface RawPcieCompleterRequest rawCompleterRequest; + // H2C CC AxiStream Master + (* prefix = "" *) interface RawPcieCompleterComplete rawCompleterComplete; +endinterface + +// Completer Only Receives and Transmits One Beat TLP, in which isFirst = isLast = True +(* synthesize *) +module mkCompleterAxiStreamAdapter(CompleterAxiStreamAdapter); + FIFOF#(DataStream) inFifo <- mkLFIFOF; + FIFOF#(DataStream) outFifo <- mkFIFOF; + FIFOF#(CmplReqAxiStream) reqInFifo <- mkFIFOF; + FIFOF#(CmplCmplAxiStream) cmplOutFifo <- mkFIFOF; + + Reg#(Bool) isInPacketReg <- mkReg(False); + + let rawAxiStreamSlaveIfc <- mkFifoInToRawPcieAxiStreamSlave(convertFifoToFifoIn(reqInFifo)); + let rawAxiStreamMasterIfc <- mkFifoOutToRawPcieAxiStreamMaster(convertFifoToFifoOut(cmplOutFifo)); + + rule genAxis; + // Straddle mode is disable of completer + let stream = inFifo.first; + inFifo.deq; + if (stream.isFirst && stream.isLast) begin + let isSop = PcieTlpCtlIsSopCommon { + isSopPtrs : replicate(0), + isSop : 1 + }; + let isEop = PcieTlpCtlIsEopCommon { + isEopPtrs : replicate(0), + isEop : 1 + }; + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(stream.byteEn)); + // Do not enable parity check in the core + let sideBand = PcieCompleterCompleteSideBandFrame { + parity : 0, + discontinue : False, + isSop : isSop, + isEop : isEop + }; + let axiStream = CmplCmplAxiStream { + tData : stream.data, + tKeep : fromInteger(valueOf(IDEA_CC_TKEEP_OF_CSR)), + tLast : True, + tUser : pack(sideBand) + }; + cmplOutFifo.enq(axiStream); + end + endrule + + rule parseAxis; + let axiStream = reqInFifo.first; + reqInFifo.deq; + isInPacketReg <= !axiStream.tLast; + // First Beat + if (!isInPacketReg && axiStream.tLast) begin + PcieCompleterRequestSideBandFrame sideBand = unpack(axiStream.tUser); + let stream = DataStream { + data : axiStream.tData, + byteEn : sideBand.dataByteEn, + isFirst : True, + isLast : True + }; + outFifo.enq(stream); + end + endrule + + interface dmaDataFifoOut = convertFifoToFifoOut(outFifo); + interface dmaDataFifoIn = convertFifoToFifoIn(inFifo); + + interface RawPcieCompleterRequest rawCompleterRequest; + interface rawAxiStreamSlave = rawAxiStreamSlaveIfc; + method PcieNonPostedRequst nonPostedReqCreditIncrement = fromInteger(valueOf(NP_CREDIT_INCREMENT)); + method Action nonPostedReqCreditCnt(PcieNonPostedRequstCount nonPostedpReqCount); + endmethod + endinterface + + interface RawPcieCompleterComplete rawCompleterComplete; + interface rawAxiStreamMaster = rawAxiStreamMasterIfc; + endinterface + +endmodule + +// Convert 2 DataStream input to 1 PcieAxiStream output +// - The axistream is in straddle mode which means tKeep and tLast are ignored +// - The core use isSop and isEop to location Tlp and allow 2 Tlp in one beat +// - The input dataStream should be added Descriptor and aligned to DW already +interface ConvertDataStreamsToStraddleAxis; + interface Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) dataFifoIn; + interface Vector#(DMA_PATH_NUM, FifoIn#(RqSideBandSignal)) byteEnFifoIn; + interface FifoOut#(ReqReqAxiStream) axiStreamFifoOut; +endinterface + +typedef Bit#(2) StraddleState; +typedef 2'b00 S_IDLE; +typedef 2'b01 S_SINGLE; +typedef 2'b10 S_DOUBLE; + +typedef struct { + Bool valid; + Bool isSd; + DataStream stream; + DmaPathNo id; + DmaPathNo subId; +} ArbitHandle deriving(Bits, Eq, Bounded, FShow); + +function ArbitHandle getEmptyArbitHandle(); + return ArbitHandle { + valid : False, + isSd : False, + stream : getEmptyStream, + id : 0, + subId : 0 + }; +endfunction + +function Bool hasStraddleSpace(DataStream stream); + return !unpack(stream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); +endfunction + +module mkConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); + FIFOF#(DataStream) dataAFifo <- mkFIFOF; + FIFOF#(DataStream) dataBFifo <- mkFIFOF; + FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; + + FIFOF#(RqSideBandSignal) byteEnAFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); + FIFOF#(RqSideBandSignal) byteEnBFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); + + FIFOF#(ArbitHandle) arbitFifo <- mkFIFOF; + + Reg#(ArbitHandle) cacheReg <- mkReg(getEmptyArbitHandle); + Wire#(ArbitHandle) way0Wire <- mkDWire(getEmptyArbitHandle); + Wire#(ArbitHandle) way1Wire <- mkDWire(getEmptyArbitHandle); + Probe#(Bit#(4)) txStraddleProbe <- mkProbe; + + function Tuple2#(DataStream, DataStream) conductStraddle(DataStream first, DataStream second); + let sum = first; + let carry = second; + sum.data = first.data | (second.data << valueOf(STRADDLE_THRESH_BIT_WIDTH)); + sum.byteEn = first.byteEn | (second.byteEn << valueOf(STRADDLE_THRESH_BYTE_WIDTH)); + carry.data = second.data >> valueOf(STRADDLE_THRESH_BIT_WIDTH); + carry.byteEn = second.byteEn >> valueOf(STRADDLE_THRESH_BYTE_WIDTH); + sum.isLast = isByteEnZero(carry.byteEn); // If carry is empty, than sum is last frame + carry.isFirst = False; + return tuple2(carry, sum); + endfunction + + // generate straddle mode datastream from 2 way seperated datastream + // return : tuple2(cache, result) + // warning: the module should save return wb and input as cache in the next cycle + function Tuple4#(ArbitHandle, ArbitHandle, Bool, Bool) arbitStraddleTwoWay(ArbitHandle cache, ArbitHandle way0, ArbitHandle way1); + let result = getEmptyArbitHandle; + let wb = getEmptyArbitHandle; + Bool way0dq = False; + Bool way1dq = False; + case(tuple3(cache.valid, way0.valid, way1.valid)) + // Only cache , output directly if isLast, or waiting subsequent beats + tuple3(True, False, False): begin + if (cache.stream.isLast) begin + result = cache; + wb.id = cache.id; + wb.stream.isLast = result.stream.isLast; + end + else begin + wb = cache; + end + end + // Combine cache and way0, if cache isLast high, it's straddle combine, or is normal stream combine + tuple3(True, True, False): begin + if (cache.id == 0) begin // Normal inner-stream combine + result = cache; + if (!cache.stream.isLast) begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 0; + way0dq = True; + end + else begin + wb.id = cache.id; + end + end + else begin // bypass or Straddle combine + if (cache.stream.isLast) begin + result = cache; + if (hasStraddleSpace(cache.stream)) begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 0; + result.subId = 0; + result.isSd = True; + way0dq = True; + end + end + else begin + wb = cache; + end + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + // Combine cache and way1, if cache isLast high, it's straddle combine, or is normal stream combine + tuple3(True, False, True): begin + if (cache.id == 1) begin // Normal inner-stream combine + result = cache; + if (!cache.stream.isLast) begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 1; + way1dq = True; + end + else begin + wb.id = cache.id; + end + end + else begin // bypass or Straddle combine + if (cache.stream.isLast) begin + result = cache; + if (hasStraddleSpace(cache.stream)) begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = 1; + result.subId = 1; + result.isSd = True; + way1dq = True; + end + end + else begin + wb = cache; + end + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + // Both streams and the cache have data + tuple3(True, True, True): begin + result = cache; + // cache's stream is not over yet, combine cache and way(x) first + if (!cache.stream.isLast) begin + if (cache.id == 0) begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + way0dq = True; + end + else begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + way1dq = True; + end + wb.id = cache.id; + wb.valid = !isByteEnZero(wb.stream.byteEn); + end + // assert whether it isLast and has straddle space, combine the other stream + else begin + if(hasStraddleSpace(cache.stream)) begin + result.isSd = True; + if (cache.id == 0) begin + let {carry, sum} = conductStraddle(cache.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 1; + wb.id = 1; + way1dq = True; + end + else begin + let {carry, sum} = conductStraddle(cache.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 0; + wb.id = 0; + way0dq = True; + end + wb.valid = !isByteEnZero(wb.stream.byteEn); + end + else begin + wb.id = cache.id; + end + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + // Only way0 + tuple3(False, True, False): begin + // Last trans is over + if (cache.id == 0 || cache.stream.isLast) begin + result = way0; + wb.stream.isLast = result.stream.isLast; + wb.id = 0; + way0dq = True; + end + // waiting the other channel + else begin + wb = cache; + end + end + // Only way1 + tuple3(False, False, True): begin + // Last trans is over + if (cache.id == 1 || cache.stream.isLast) begin + result = way1; + wb.stream.isLast = result.stream.isLast; + wb.id = 1; + way1dq = True; + end + // waiting the other channel + else begin + wb = cache; + end + end + // Bypass + tuple3(False, False, False): begin + wb = cache; + end + // Both path have data, arbitrate the stream, and conbine the other if have spaces + tuple3(False, True, True): begin + // If no stream tranferring + if (cache.stream.isLast) begin + if (cache.id == 0) begin + result = way1; + way1dq = True; + end + else begin + result = way0; + way0dq = True; + end + end + // Continue the tranferring one + else begin + if (cache.id == 0) begin + result = way0; + way0dq = True; + end + else begin + result = way1; + way1dq = True; + end + end + wb.id = result.id; + // If the result is the last + if (hasStraddleSpace(result.stream) && result.stream.isLast) begin + result.isSd = True; + if (result.id == 0) begin + let {carry, sum} = conductStraddle(result.stream, way1.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 1; + way1dq = True; + end + else begin + let {carry, sum} = conductStraddle(result.stream, way0.stream); + result.stream = sum; + wb.stream = carry; + result.subId = 0; + way0dq = True; + end + wb.valid = !isByteEnZero(wb.stream.byteEn); + wb.id = result.subId; + end + wb.stream.isLast = wb.valid ? wb.stream.isLast : result.stream.isLast; + end + endcase + return tuple4(wb, result, way0dq, way1dq); + endfunction + + // Generate isSop and isEop from ArbitHandle, byteEnA should be the sideband signal of the lsb straddle frame + function PcieRequesterRequestSideBandFrame genRQSideBand (ArbitHandle hdl, RqSideBandSignal byteEnA, RqSideBandSignal byteEnB); + // generate isSop and isEop first + let isSop = PcieTlpCtlIsSopCommon { + isSopPtrs : replicate(0), + isSop : 0 + }; + let isEop = PcieTlpCtlIsEopCommon { + isEopPtrs : replicate(0), + isEop : 0 + }; + if (!hdl.isSd) begin + if (hdl.stream.isFirst) begin + isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + end + if (hdl.stream.isLast) begin + isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(hdl.stream.byteEn)); + end + end + else if (hdl.isSd) begin + if (hdl.stream.isFirst) begin + isSop.isSop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); + end + else begin + isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_32)); + end + Bit#(STRADDLE_THRESH_BYTE_WIDTH) lsbByteEn = truncate(hdl.stream.byteEn); + if (hdl.stream.isLast) begin + isEop.isEop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(zeroExtend(lsbByteEn))); + isEop.isEopPtrs[1] = truncate(convertByteEn2DwordPtr(hdl.stream.byteEn)); + end + else begin + isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); + isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(zeroExtend(lsbByteEn))); + end + end + // generate the full sideband frame + let {firstByteEnA, lastByteEnA, tphInfoA} = byteEnA; + let {firstByteEnB, lastByteEnB, tphInfoB} = byteEnB; + let sideBand = PcieRequesterRequestSideBandFrame { + // Do not use parity check in the core + parity : 0, + // Do not support progress track + seqNum1 : 0, + seqNum0 : 0, + //TODO: Do not support Transaction Processing Hint now, maybe we need TPH for better performance + tphSteeringTag : 0, + tphIndirectTagEn : 0, + tphType : {pack(tphInfoB.ph), pack(tphInfoA.ph)}, + tphPresent : {pack(tphInfoB.th), pack(tphInfoA.th)}, + // Do not support discontinue + discontinue : False, + // Indicates end of the tlp + isEop : isEop, + // Indicates starts of a new tlp + isSop : isSop, + // Disable when use DWord-aligned Mode + addrOffset : 0, + // Indicates byte enable in the first/last DWord + lastByteEn : {pack(lastByteEnB), pack(lastByteEnA)}, + firstByteEn : {pack(firstByteEnB), pack(firstByteEnA)} + }; + return sideBand; + endfunction + + rule getHandle; + if (dataAFifo.notEmpty) begin + way0Wire <= ArbitHandle { + valid : True, + isSd : False, + stream: dataAFifo.first, + id : 0, + subId : 0 + }; + end + if (dataBFifo.notEmpty) begin + way1Wire <= ArbitHandle { + valid : True, + isSd : False, + stream: dataBFifo.first, + id : 1, + subId : 0 + }; + end + endrule + + rule arbitrate; + // if (way0Wire.valid) + // $display($time, "ns SIM INFO @ arbit sim: input: id: %d, isFirst: %d, isLast: %d, data %h", way0Wire.id, pack(way0Wire.stream.isFirst), pack(way0Wire.stream.isLast), way0Wire.stream.data); + // if (way1Wire.valid) + // $display($time, "ns SIM INFO @ arbit sim: input: id: %d, isFirst: %d, isLast: %d, data %h", way1Wire.id, pack(way1Wire.stream.isFirst), pack(way1Wire.stream.isLast), way1Wire.stream.data); + let resultHdl = getEmptyArbitHandle; + let writebackHdl = getEmptyArbitHandle; + Bool way0dq = False; + Bool way1dq = False; + {writebackHdl, resultHdl, way0dq, way1dq} = arbitStraddleTwoWay(cacheReg, way0Wire, way1Wire); + cacheReg <= writebackHdl; + if (resultHdl.valid) begin + if (way0dq) begin + dataAFifo.deq; + end + if (way1dq) begin + dataBFifo.deq; + end + arbitFifo.enq(resultHdl); + + txStraddleProbe <= zeroExtend({pack(way1dq), pack(way0dq)}); + // $display($time, "ns SIM INFO @ arbit sim: input: cache.valid:%d way0.valid:%d, way1.valid:%d", cacheReg.valid, way0Wire.valid, way1Wire.valid); + // $display($time, "ns SIM INFO @ arbit sim: result: id:%d, isSd:%d, subId:%d, data %h", resultHdl.id, resultHdl.isSd, resultHdl.subId, resultHdl.stream.data); + // if (writebackHdl.valid) $display($time, "ns SIM INFO @ arbit sim: wb: id:%d, isSd:%d, subId:%d, data %h", writebackHdl.id, writebackHdl.isSd, writebackHdl.subId, writebackHdl.stream.data); + end + endrule + + rule genStraddle; + let hdl = arbitFifo.first; + arbitFifo.deq; + let sideBandBE0 = tuple3(0,0,unpack(0)); + let sideBandBE1 = tuple3(0,0,unpack(0)); + if (hdl.isSd && hdl.stream.isFirst) begin + byteEnAFifo.deq; + byteEnBFifo.deq; + if (hdl.id == 0) begin + sideBandBE0 = byteEnAFifo.first; + sideBandBE1 = byteEnBFifo.first; + end + else begin + sideBandBE0 = byteEnBFifo.first; + sideBandBE1 = byteEnAFifo.first; + end + end + else if (hdl.isSd) begin + if (hdl.subId == 0) begin + sideBandBE0 = byteEnAFifo.first; + byteEnAFifo.deq; + end + else begin + sideBandBE0 = byteEnBFifo.first; + byteEnBFifo.deq; + end + end + else if (!hdl.isSd && hdl.stream.isFirst) begin + if (hdl.id == 0) begin + sideBandBE0 = byteEnAFifo.first; + byteEnAFifo.deq; + end + else begin + sideBandBE0 = byteEnBFifo.first; + byteEnBFifo.deq; + end + end + let sideBand = genRQSideBand(hdl, sideBandBE0, sideBandBE1); + let axiStream = ReqReqAxiStream { + tData : hdl.stream.data, + tKeep : -1, + tLast : True, + tUser : pack(sideBand) + }; + axiStreamOutFifo.enq(axiStream); + // $display($time, "ns SIM INFO @ mkDataStreamToAxis: tx a AXIS frame, isSop:%d, isSopPtr:%d/%d, isEop:%d, isEopPtr:%d/%d, BE0:%b/%b, BE1:%b/%b, tData:%h", + // sideBand.isSop.isSop, sideBand.isSop.isSopPtrs[0], sideBand.isSop.isSopPtrs[1], sideBand.isEop.isEop, sideBand.isEop.isEopPtrs[0], sideBand.isEop.isEopPtrs[1], + // tpl_1(sideBandBE0), tpl_2(sideBandBE0), tpl_1(sideBandBE1), tpl_2(sideBandBE1), axiStream.tData); + endrule + + Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) dataFifoInIfc = newVector; + Vector#(DMA_PATH_NUM, FifoIn#(RqSideBandSignal)) byteEnFifoInIfc = newVector; + dataFifoInIfc[0] = convertFifoToFifoIn(dataAFifo); + dataFifoInIfc[1] = convertFifoToFifoIn(dataBFifo); + byteEnFifoInIfc[0] = convertFifoToFifoIn(byteEnAFifo); + byteEnFifoInIfc[1] = convertFifoToFifoIn(byteEnBFifo); + interface dataFifoIn = dataFifoInIfc; + interface byteEnFifoIn = byteEnFifoInIfc; + interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); +endmodule + +// module mkOldConvertDataStreamsToStraddleAxis(ConvertDataStreamsToStraddleAxis); +// FIFOF#(RqSideBandSignal) byteEnAFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); +// FIFOF#(RqSideBandSignal) byteEnBFifo <- mkSizedFIFOF(valueOf(BYTEEN_INFIFO_DEPTH)); + +// StreamShiftComplex shiftA <- mkStreamShiftComplex(fromInteger(valueOf(STRADDLE_THRESH_BYTE_WIDTH))); +// StreamShiftComplex shiftB <- mkStreamShiftComplex(fromInteger(valueOf(STRADDLE_THRESH_BYTE_WIDTH))); + +// FIFOF#(ReqReqAxiStream) axiStreamOutFifo <- mkFIFOF; + +// Reg#(Bool) isInStreamAReg <- mkReg(False); +// Reg#(Bool) isInStreamBReg <- mkReg(False); +// Reg#(Bool) isInShiftAReg <- mkReg(False); +// Reg#(Bool) isInShiftBReg <- mkReg(False); +// Reg#(Bool) roundRobinReg <- mkReg(False); + +// function Bool hasStraddleSpace(DataStream sdStream); +// return !unpack(sdStream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); +// endfunction + +// function Bool isValidShiftStream(DataStream shiftStream); +// Bool valid = !unpack(shiftStream.byteEn[0]) && unpack(shiftStream.byteEn[valueOf(STRADDLE_THRESH_BYTE_WIDTH)]); +// return valid; +// endfunction + +// function PcieRequesterRequestSideBandFrame genRQSideBand( +// PcieTlpCtlIsEopCommon isEop, PcieTlpCtlIsSopCommon isSop, RqSideBandSignal byteEnA, RqSideBandSignal byteEnB +// ); +// let {firstByteEnA, lastByteEnA} = byteEnA; +// let {firstByteEnB, lastByteEnB} = byteEnB; +// let sideBand = PcieRequesterRequestSideBandFrame { +// // Do not use parity check in the core +// parity : 0, +// // Do not support progress track +// seqNum1 : 0, +// seqNum0 : 0, +// //TODO: Do not support Transaction Processing Hint now, maybe we need TPH for better performance +// tphSteeringTag : 0, +// tphIndirectTagEn : 0, +// tphType : 0, +// tphPresent : 0, +// // Do not support discontinue +// discontinue : False, +// // Indicates end of the tlp +// isEop : isEop, +// // Indicates starts of a new tlp +// isSop : isSop, +// // Disable when use DWord-aligned Mode +// addrOffset : 0, +// // Indicates byte enable in the first/last DWord +// lastByteEn : {pack(lastByteEnB), pack(lastByteEnA)}, +// firstByteEn : {pack(firstByteEnB), pack(firstByteEnA)} +// }; +// return sideBand; +// endfunction + +// // Pipeline stage 1: get the shift datastream + +// // Pipeline Stage 2: get the axiStream data +// rule genStraddlePcie; +// DataStream sendingStream = getEmptyStream; +// DataStream pendingStream = getEmptyStream; +// Bool isSendingA = True; + +// // In streamA sending epoch, waiting streamA until isLast +// if (isInStreamAReg) begin +// let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; +// sendingStream = isInShiftAReg ? shiftStreamA : oriStreamA; +// shiftA.streamFifoOut.deq; +// isSendingA = True; +// if (shiftB.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin +// let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; +// shiftB.streamFifoOut.deq; +// if (isValidShiftStream(shiftStreamB)) begin +// pendingStream = shiftStreamB; +// end +// end +// end +// // In streamB sending epoch, waiting streamB until isLast +// else if (isInStreamBReg) begin +// let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; +// sendingStream = isInShiftBReg ? shiftStreamB : oriStreamB; +// shiftB.streamFifoOut.deq; +// isSendingA = False; +// if (shiftA.streamFifoOut.notEmpty && sendingStream.isLast && hasStraddleSpace(sendingStream)) begin +// let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; +// shiftA.streamFifoOut.deq; +// if (isValidShiftStream(shiftStreamA)) begin +// pendingStream = shiftStreamA; +// end +// end +// end +// // In Idle, choose one stream to enter new epoch +// else begin +// if (shiftA.streamFifoOut.notEmpty && shiftB.streamFifoOut.notEmpty) begin +// roundRobinReg <= !roundRobinReg; +// if (roundRobinReg) begin +// let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; +// sendingStream = oriStreamA; +// shiftA.streamFifoOut.deq; +// isSendingA = True; +// if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin +// let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; +// shiftB.streamFifoOut.deq; +// if (isValidShiftStream(shiftStreamB)) begin +// pendingStream = shiftStreamB; +// end +// end +// end +// else begin +// let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; +// sendingStream = oriStreamB; +// shiftB.streamFifoOut.deq; +// isSendingA = False; +// if (sendingStream.isLast && hasStraddleSpace(sendingStream)) begin +// let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; +// shiftA.streamFifoOut.deq; +// if (isValidShiftStream(shiftStreamA)) begin +// pendingStream = shiftStreamA; +// end +// end +// end +// end +// else if (shiftA.streamFifoOut.notEmpty) begin +// let {oriStreamA, shiftStreamA} = shiftA.streamFifoOut.first; +// sendingStream = oriStreamA; +// shiftA.streamFifoOut.deq; +// isSendingA = True; +// roundRobinReg <= False; +// end +// else if (shiftB.streamFifoOut.notEmpty) begin +// let {oriStreamB, shiftStreamB} = shiftB.streamFifoOut.first; +// sendingStream = oriStreamB; +// shiftB.streamFifoOut.deq; +// isSendingA = False; +// roundRobinReg <= True; +// end +// else begin +// // Do nothing +// end +// end + +// if (!isByteEnZero(sendingStream.byteEn)) begin +// // Change the registers and generate PcieAxiStream +// let rqSideBandSignal0 = tuple3(0, 0, unpack(0)); +// let rqSideBandSignal1 = tuple3(0, 0, unpack(0)); +// if (isSendingA) begin +// isInStreamAReg <= !sendingStream.isLast; +// isInShiftAReg <= sendingStream.isLast ? False : isInShiftAReg; +// // Only A sop +// if (sendingStream.isFirst && !pendingStream.isFirst) begin +// rqSideBandSignal0 = byteEnAFifo.first; +// byteEnAFifo.deq; +// end +// // A sop and B sop +// else if (sendingStream.isFirst && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin +// isInStreamBReg <= !pendingStream.isLast; +// isInShiftBReg <= !pendingStream.isLast; +// rqSideBandSignal0 = byteEnAFifo.first; +// byteEnAFifo.deq; +// rqSideBandSignal1 = byteEnBFifo.first; +// byteEnBFifo.deq; +// end +// // Only B sop +// else if (sendingStream.isLast && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin +// isInStreamBReg <= !pendingStream.isLast; +// isInShiftBReg <= !pendingStream.isLast; +// rqSideBandSignal0 = byteEnBFifo.first; +// byteEnBFifo.deq; +// end +// end +// else begin +// isInStreamBReg <= !sendingStream.isLast; +// isInShiftBReg <= sendingStream.isLast ? False : isInShiftBReg; +// // Only B sop +// if (sendingStream.isFirst && !pendingStream.isFirst) begin +// rqSideBandSignal0 = byteEnBFifo.first; +// byteEnBFifo.deq; +// end +// // B sop and A sop +// else if (sendingStream.isFirst && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin +// isInStreamAReg <= !pendingStream.isLast; +// isInShiftAReg <= !pendingStream.isLast; +// rqSideBandSignal0 = byteEnBFifo.first; +// byteEnBFifo.deq; +// rqSideBandSignal1 = byteEnAFifo.first; +// byteEnAFifo.deq; +// end +// else if (sendingStream.isLast && hasStraddleSpace(sendingStream) && pendingStream.isFirst) begin +// isInStreamAReg <= !pendingStream.isLast; +// isInShiftAReg <= !pendingStream.isLast; +// rqSideBandSignal0 = byteEnAFifo.first; +// byteEnAFifo.deq; +// end +// end + +// let isSop = PcieTlpCtlIsSopCommon { +// isSopPtrs : replicate(0), +// isSop : 0 +// }; +// let isEop = PcieTlpCtlIsEopCommon { +// isEopPtrs : replicate(0), +// isEop : 0 +// }; + +// if (sendingStream.isFirst && pendingStream.isFirst) begin +// isSop.isSop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); +// isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); +// isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); +// end +// else if (sendingStream.isFirst) begin +// isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); +// isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); +// end +// else if (pendingStream.isFirst) begin +// isSop.isSop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); +// isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_32)); +// end +// if (pendingStream.isLast && isValidShiftStream(pendingStream)) begin +// isEop.isEop = fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)); +// isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(sendingStream.byteEn)); +// isEop.isEopPtrs[1] = truncate(convertByteEn2DwordPtr(pendingStream.byteEn)); +// end +// else if (sendingStream.isLast) begin +// isEop.isEop = fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)); +// isEop.isEopPtrs[0] = truncate(convertByteEn2DwordPtr(sendingStream.byteEn)); +// end + +// let sideBand = genRQSideBand(isEop, isSop, rqSideBandSignal0, rqSideBandSignal1); +// let axiStream = ReqReqAxiStream { +// tData : sendingStream.data | pendingStream.data, +// tKeep : -1, +// tLast : True, +// tUser : pack(sideBand) +// }; +// axiStreamOutFifo.enq(axiStream); +// // $display($time, "ns SIM INFO @ mkDataStreamToAxis: tx a AXIS frame, isSop:%d, isSopPtr:%d/%d, isEop:%d, isEopPtr:%d/%d, BE0:%b/%b, BE1:%b/%b, tData:%h", +// // isSop.isSop, isSop.isSopPtrs[0], isSop.isSopPtrs[1], isEop.isEop, isEop.isEopPtrs[0], isEop.isEopPtrs[1], tpl_1(rqSideBandSignal0), tpl_2(rqSideBandSignal0), tpl_1(rqSideBandSignal1), tpl_2(rqSideBandSignal1), axiStream.tData); +// if (isEop.isEop >= fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT)) && isEop.isEopPtrs[0] == 0) begin +// $display($time, "ns SIM Warning @ mkDataStreamToAxis: sendingstream byteEn %b", sendingStream.byteEn); +// end +// else if (isEop.isEop == fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)) && isEop.isEopPtrs[1] == 0) begin +// $display($time, "ns SIM Warning @ mkDataStreamToAxis: pendingstream byteEn %b", pendingStream.byteEn); +// end +// end +// endrule + +// Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) dataFifoInIfc = newVector; +// Vector#(DMA_PATH_NUM, FifoIn#(RqSideBandSignal)) byteEnFifoInIfc = newVector; +// dataFifoInIfc[0] = shiftA.streamFifoIn; +// dataFifoInIfc[1] = shiftB.streamFifoIn; +// byteEnFifoInIfc[0] = convertFifoToFifoIn(byteEnAFifo); +// byteEnFifoInIfc[1] = convertFifoToFifoIn(byteEnBFifo); +// interface dataFifoIn = dataFifoInIfc; +// interface byteEnFifoIn = byteEnFifoInIfc; +// interface axiStreamFifoOut = convertFifoToFifoOut(axiStreamOutFifo); +// endmodule + +interface ConvertStraddleAxisToDataStream; + interface FifoIn#(ReqCmplAxiStream) axiStreamFifoIn; + interface Vector#(DMA_PATH_NUM, FifoOut#(StraddleStream)) dataFifoOut; +endinterface + +module mkConvertStraddleAxisToDataStream(ConvertStraddleAxisToDataStream); + FIFOF#(ReqCmplAxiStream) axiStreamInFifo <- mkSizedFIFOF(16); // it seems that if we dessert AXI's ready signal, the IP core will also deassert it's valid signal some beat later + Vector#(DMA_PATH_NUM, FIFOF#(StraddleStream)) outFifos <- replicateM(mkFIFOF); + + // During TLP varibles + Vector#(DMA_PATH_NUM, Reg#(Bool)) isInTlpRegs <- replicateM(mkReg(False)); + Vector#(DMA_PATH_NUM, Reg#(Bool)) isCompleted <- replicateM(mkReg(False)); + Vector#(DMA_PATH_NUM, Reg#(SlotToken)) tagReg <- replicateM(mkReg(0)); + + function PcieRequesterCompleteDescriptor getDescriptorFromData(PcieTlpCtlIsSopPtr isSopPtr, Data data); + if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin + return unpack(truncate(data)); + end + else begin + return unpack(truncate(data >> valueOf(STRADDLE_THRESH_BIT_WIDTH))); + end + endfunction + + function Bool isMyValidTlp(DmaPathNo path, PcieRequesterCompleteDescriptor desc); + Bool valid = (desc.status == fromInteger(valueOf(SUCCESSFUL_CMPL))) && (!desc.isPoisoned); + Bool pathMatch = (truncate(path) == desc.tag[valueOf(DES_NONEXTENDED_TAG_WIDTH) - 1]); + return valid && pathMatch; + endfunction + + Reg#(Bool) recvDelayReg <- mkReg(False); + Reg#(Bit#(32)) recvDelayCounterReg <- mkReg(0); + + Vector#(2, Probe#(Bit#(4))) rxStraddleProbeVec <- replicateM(mkProbe); + rule flip; + recvDelayCounterReg <= recvDelayCounterReg + 1; + if (recvDelayCounterReg > 3000) begin + recvDelayReg <= True; + end + endrule + + // rule debug; + // if (!outFifos[0].notFull) begin + // $display("time=%0t, outFifos[0] FULL", $time); + // end + // if (!outFifos[1].notFull) begin + // $display("time=%0t, outFifos[1] FULL", $time); + // end + // if (!axiStreamInFifo.notFull) begin + // $display("time=%0t, axiStreamInFifo FULL", $time); + // end + // endrule + + rule parseAxiStream if (recvDelayReg); + let axiStream = axiStreamInFifo.first; + axiStreamInFifo.deq; + PcieRequesterCompleteSideBandFrame sideBand = unpack(axiStream.tUser); + let isEop = sideBand.isEop; + let isSop = sideBand.isSop; + // $display($time, "ns SIM INFO @ mkAxisToDataStream: rx a AXIS frame, isSop:%h, isEop:%d, tData:%h", isSop.isSop, isEop.isEop, axiStream.tData); + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + let sdStream = getEmptyStraddleStream; + // 2 New TLP + if (isSop.isSop == fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT))) begin + let desc0 = getDescriptorFromData(isSop.isSopPtrs[0], axiStream.tData); + let desc1 = getDescriptorFromData(isSop.isSopPtrs[1], axiStream.tData); + // Both belong to this path + if (isMyValidTlp(pathIdx, desc0) && isMyValidTlp(pathIdx, desc1)) begin + sdStream.data = axiStream.tData; + sdStream.byteEn = sideBand.dataByteEn; + sdStream.isDoubleFrame = True; + sdStream.isFirst = replicate(True); + sdStream.isLast[0] = True; + sdStream.isLast[1] = unpack(isEop.isEop[1]); + sdStream.tag[0] = truncate(desc0.tag); + sdStream.tag[1] = truncate(desc1.tag); + sdStream.isCompleted[0] = desc0.isRequestCompleted; + sdStream.isCompleted[1] = desc1.isRequestCompleted; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[1]; + isInTlpRegs[pathIdx] <= !sdStream.isLast[1]; + isCompleted[pathIdx] <= desc1.isRequestCompleted; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 0: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 1; + end + // 1 belongs to this path + else if (isMyValidTlp(pathIdx, desc1)) begin + let isSopPtr = isSop.isSopPtrs[1]; + sdStream.data = getStraddleData(isSopPtr, axiStream.tData); + sdStream.byteEn = getStraddleByteEn(isSopPtr, sideBand.dataByteEn); + sdStream.isDoubleFrame = False; + sdStream.isFirst[0] = True; + sdStream.isLast[0] = unpack(isEop.isEop[1]); + sdStream.tag[0] = truncate(desc1.tag); + sdStream.isCompleted[0] = desc1.isRequestCompleted; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[0]; + isInTlpRegs[pathIdx] <= !sdStream.isLast[0]; + isCompleted[pathIdx] <= desc1.isRequestCompleted; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 1: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 2; + end + // 0 belongs to this path + else if (isMyValidTlp(pathIdx, desc0)) begin + let isSopPtr = isSop.isSopPtrs[0]; + sdStream.data = getStraddleData(isSopPtr, axiStream.tData); + sdStream.byteEn = getStraddleByteEn(isSopPtr, sideBand.dataByteEn); + sdStream.isDoubleFrame = False; + sdStream.isFirst[0] = True; + sdStream.isLast[0] = True; + sdStream.tag[0] = truncate(desc0.tag); + sdStream.isCompleted[0] = desc0.isRequestCompleted; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[0]; + isInTlpRegs[pathIdx] <= False; + isCompleted[pathIdx] <= False; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 2: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 3; + end + end + // Only 1 New Tlp + else if (isSop.isSop == fromInteger(valueOf(SINGLE_TLP_IN_THIS_BEAT))) begin + let isSopPtr = isSop.isSopPtrs[0]; + let desc = getDescriptorFromData(isSopPtr, axiStream.tData); + // The new Tlp starts in Lane0 + if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_0))) begin + if (isMyValidTlp(pathIdx, desc)) begin + sdStream.data = axiStream.tData; + sdStream.byteEn = sideBand.dataByteEn; + sdStream.isDoubleFrame = False; + sdStream.isFirst[0] = True; + sdStream.isLast[0] = unpack(isEop.isEop[0]); + sdStream.tag[0] = truncate(desc.tag); + sdStream.isCompleted[0] = desc.isRequestCompleted; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[0]; + isInTlpRegs[pathIdx] <= !sdStream.isLast[0]; + isCompleted[pathIdx] <= desc.isRequestCompleted; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 3: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 4; + end + end + // The new Tlp starts in Lane32 + else if (isSopPtr == fromInteger(valueOf(ISSOP_LANE_32))) begin + if (isMyValidTlp(pathIdx, desc) && isInTlpRegs[pathIdx]) begin + sdStream.data = axiStream.tData; + sdStream.byteEn = sideBand.dataByteEn; + sdStream.isDoubleFrame = True; + sdStream.isFirst[0] = False; + sdStream.isLast[0] = True; + sdStream.isFirst[1] = True; + sdStream.isLast[1] = unpack(isEop.isEop[1]); + sdStream.tag[0] = tagReg[pathIdx]; + sdStream.tag[1] = truncate(desc.tag); + sdStream.isCompleted[0] = isCompleted[pathIdx]; + sdStream.isCompleted[1] = desc.isRequestCompleted; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[1]; + isInTlpRegs[pathIdx] <= !sdStream.isLast[1]; + isCompleted[pathIdx] <= desc.isRequestCompleted; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 4: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 5; + end + else if (isMyValidTlp(pathIdx, desc)) begin + sdStream.data = getStraddleData(isSopPtr, axiStream.tData); + sdStream.byteEn = getStraddleByteEn(isSopPtr, sideBand.dataByteEn); + sdStream.isDoubleFrame = False; + sdStream.isFirst[0] = True; + sdStream.isLast[0] = unpack(isEop.isEop[1]); + sdStream.tag[0] = truncate(desc.tag); + sdStream.isCompleted[0] = desc.isRequestCompleted; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[0]; + isInTlpRegs[pathIdx] <= !sdStream.isLast[0]; + isCompleted[pathIdx] <= desc.isRequestCompleted; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 5: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 6; + end + else if (isInTlpRegs[pathIdx]) begin + sdStream.data = getStraddleData(0, axiStream.tData); + sdStream.byteEn = getStraddleByteEn(0, sideBand.dataByteEn); + sdStream.isDoubleFrame = False; + sdStream.isFirst[0] = False; + sdStream.isLast[0] = True; + sdStream.tag[0] = tagReg[pathIdx]; + sdStream.isCompleted[0] = isCompleted[pathIdx]; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[0]; + isInTlpRegs[pathIdx] <= False; + isCompleted[pathIdx] <= False; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 6: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 7; + end + end + end + // 0 new Tlp + else begin + if (isInTlpRegs[pathIdx]) begin + sdStream.data = axiStream.tData; + sdStream.byteEn = sideBand.dataByteEn; + sdStream.isDoubleFrame = False; + sdStream.isFirst[0] = False; + sdStream.isLast[0] = unpack(isEop.isEop[0]); + sdStream.tag[0] = tagReg[pathIdx]; + sdStream.isCompleted[0] = isCompleted[pathIdx]; + outFifos[pathIdx].enq(sdStream); + tagReg[pathIdx] <= sdStream.tag[0]; + isInTlpRegs[pathIdx] <= !sdStream.isLast[0]; + // $display($time, "ns SIM INFO @ mkAxisToDataStream case 7: outputStraddleStream [%d], sdStream=", pathIdx, fshow(sdStream), ", sideBand=", fshow(sideBand)); + rxStraddleProbeVec[pathIdx] <= 8; + end + end + + end + endrule + + Vector#(DMA_PATH_NUM, FifoOut#(StraddleStream)) outIfcs = newVector; + for (Integer pathIdx = 0; pathIdx < valueOf(DMA_PATH_NUM); pathIdx = pathIdx + 1) begin + outIfcs[pathIdx] = convertFifoToFifoOut(outFifos[pathIdx]); + end + interface axiStreamFifoIn = convertFifoToFifoIn(axiStreamInFifo); + interface dataFifoOut = outIfcs; +endmodule + + + + diff --git a/src/XilBdmaPcieAxiStreamTypes.bsv b/src/XilBdmaPcieAxiStreamTypes.bsv new file mode 100644 index 0000000..bc833bc --- /dev/null +++ b/src/XilBdmaPcieAxiStreamTypes.bsv @@ -0,0 +1,99 @@ +import FIFOF :: *; +import GetPut :: *; +import PAClib :: *; + +import BusConversion :: *; +import SemiFifo :: *; + +typedef 8 BYTE_WIDTH; +typedef 2 WORD_BYTES; +typedef 4 DWORD_BYTES; +typedef TMul#(WORD_BYTES, BYTE_WIDTH) WORD_WIDTH; +typedef TMul#(DWORD_BYTES, BYTE_WIDTH) DWORD_WIDTH; + +typedef 512 PCIE_AXIS_DATA_WIDTH; +typedef TDiv#(PCIE_AXIS_DATA_WIDTH, DWORD_WIDTH) PCIE_AXIS_KEEP_WIDTH; + +typedef struct { + Bit#(PCIE_AXIS_DATA_WIDTH) tData; + Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep; + Bool tLast; + Bit#(usrWidth) tUser; +} PcieAxiStream#(numeric type usrWidth) deriving(Bits, FShow, Eq, Bounded); + +(*always_ready, always_enabled*) +interface RawPcieAxiStreamMaster#(numeric type usrWidth); + (* result = "tvalid" *) method Bool tValid; + (* result = "tdata" *) method Bit#(PCIE_AXIS_DATA_WIDTH) tData; + (* result = "tkeep" *) method Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep; + (* result = "tlast" *) method Bool tLast; + (* result = "tuser" *) method Bit#(usrWidth) tUser; + (* always_enabled, prefix = "" *) method Action tReady((* port="tready" *) Bool ready); +endinterface + +(* always_ready, always_enabled *) +interface RawPcieAxiStreamSlave#(numeric type usrWidth); + (* prefix = "" *) + method Action tValid ( + (* port="tvalid" *) Bool tValid, + (* port="tdata" *) Bit#(PCIE_AXIS_DATA_WIDTH) tData, + (* port="tkeep" *) Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep, + (* port="tlast" *) Bool tLast, + (* port="tuser" *) Bit#(usrWidth) tUser + ); + (* result="tready" *) method Bool tReady; +endinterface + +module mkFifoOutToRawPcieAxiStreamMaster#(FifoOut#(PcieAxiStream#(usrWidth)) pipe + )(RawPcieAxiStreamMaster#(usrWidth)); + let rawBus <- mkFifoOutToRawBusMaster(pipe); + return convertRawBusToRawPcieAxiStreamMaster(rawBus); +endmodule + +module mkFifoInToRawPcieAxiStreamSlave#(FifoIn#(PcieAxiStream#(usrWidth)) pipe + )(RawPcieAxiStreamSlave#(usrWidth)); + let rawBus <- mkFifoInToRawBusSlave(pipe); + return convertRawBusToRawPcieAxiStreamSlave(rawBus); +endmodule + +function RawPcieAxiStreamMaster#(usrWidth) convertRawBusToRawPcieAxiStreamMaster( + RawBusMaster#(PcieAxiStream#(usrWidth)) rawBus +); + return ( + interface RawPcieAxiStreamMaster; + method Bool tValid = rawBus.valid; + method Bit#(PCIE_AXIS_DATA_WIDTH) tData = rawBus.data.tData; + method Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep = rawBus.data.tKeep; + method Bool tLast = rawBus.data.tLast; + method Bit#(usrWidth) tUser = rawBus.data.tUser; + method Action tReady(Bool rdy); + rawBus.ready(rdy); + endmethod + endinterface + ); +endfunction + +function RawPcieAxiStreamSlave#(usrWidth) convertRawBusToRawPcieAxiStreamSlave( + RawBusSlave#(PcieAxiStream#(usrWidth)) rawBus + ); + return ( + interface RawPcieAxiStreamSlave; + method Bool tReady = rawBus.ready; + method Action tValid( + Bool valid, + Bit#(PCIE_AXIS_DATA_WIDTH) tData, + Bit#(PCIE_AXIS_KEEP_WIDTH) tKeep, + Bool tLast, + Bit#(usrWidth) tUser + ); + PcieAxiStream#(usrWidth) axiStream = PcieAxiStream { + tData: tData, + tKeep: tKeep, + tLast: tLast, + tUser: tUser + }; + rawBus.validData(valid, axiStream); + endmethod + endinterface + ); +endfunction \ No newline at end of file diff --git a/src/XilBdmaPcieConfigurator.bsv b/src/XilBdmaPcieConfigurator.bsv new file mode 100644 index 0000000..50b7bb9 --- /dev/null +++ b/src/XilBdmaPcieConfigurator.bsv @@ -0,0 +1,421 @@ +import GetPut::*; +import DReg::*; +import XilBdmaPcieTypes::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaDmaTypes::*; + +typedef 256 PCIE_CFG_VF_FLR_INPROC_EXTEND_WIDTH; + +interface PcieConfigurator; + interface RawPcieConfiguration rawConfiguration; + // TODO: translate raw Ifcs to bluespec style Get Ifcs + interface Get#(TlpSizeCfg) tlpSizeCfg; + method Action initCfg; +endinterface + +module mkPcieConfigurator(PcieConfigurator); + // TODO: the powerStateChangeAck must waitng for completing Done + Reg#(Bool) powerStateChangeIntrReg <- mkReg(False); + + // status wires + Wire#(PcieCfgMaxPayloadSize) mpsSettingWire <- mkWire; + Wire#(PCieCfgMaxReadReqSize) mrrsSettingWire <- mkWire; + Wire#(PCieCfgCurrentSpeed) speedSettingWire <- mkWire; + Wire#(PcieCfgNegotiatedWidth) linkWidthSettingWire <- mkWire; + + // Cfg Regs + Reg#(TlpSizeCfg) tlpSizeCfgReg <- mkReg(TlpSizeCfg { + mps : fromInteger(valueOf(DEFAULT_MPS)), + mpsWidth : fromInteger(valueOf(DEFAULT_MPS_WIDTH)), + mrrs : fromInteger(valueOf(DEFAULT_MRRS)), + mrrsWidth : fromInteger(valueOf(DEFAULT_MRRS_WIDTH)) + }); + + + // Here has a 2-stage pipeline for FLR, according to the Xilinx PCIe Example Design + // Reg0 means stage0, and Reg1 means stage1 + Reg#(PcieCfgFlrDone) cfgFlrDoneReg0 <- mkReg(0); + Reg#(PcieCfgFlrDone) cfgFlrDoneReg1 <- mkReg(0); + Reg#(PcieCfgVFFlrFuncNum) cfgVFFlrFuncNumReg <- mkReg(0); + Reg#(PcieCfgVFFlrFuncNum) cfgVFFlrFuncNumReg1 <- mkReg(0); + Reg#(Bool) cfgVFFlrDoneReg1 <- mkReg(False); + Reg#(Bit#(PCIE_CFG_VF_FLR_INPROC_EXTEND_WIDTH)) cfgVfFlrInprocReg0 <- mkReg(0); + Reg#(PcieCfgFlowControlSel) flowControlSelReg <- mkReg(0); + + rule functionLevelRst; + cfgVFFlrFuncNumReg <= cfgVFFlrFuncNumReg + 1; + cfgFlrDoneReg1 <= cfgFlrDoneReg0; + cfgVFFlrDoneReg1 <= unpack(cfgVfFlrInprocReg0[cfgVFFlrFuncNumReg]); + cfgVFFlrFuncNumReg1 <= cfgVFFlrFuncNumReg; + endrule + + rule updateFlowControlSelReg; + case (flowControlSelReg) + 0: flowControlSelReg <= 2; + 2: flowControlSelReg <= 4; + 4: flowControlSelReg <= 5; + 5: flowControlSelReg <= 6; + 6: flowControlSelReg <= 0; + endcase + endrule + + method Action initCfg; + TlpPayloadSize defaultTlpMaxSize = fromInteger(valueOf(DEFAULT_TLP_SIZE)); + TlpPayloadSizeWidth defaultTlpMaxSizeWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)); + let mps = defaultTlpMaxSize << mpsSettingWire; + let mpsWidth = defaultTlpMaxSizeWidth + zeroExtend(mpsSettingWire); + let mrrs = defaultTlpMaxSize << mrrsSettingWire; + let mrrsWidth = defaultTlpMaxSizeWidth + zeroExtend(mrrsSettingWire); + tlpSizeCfgReg <= TlpSizeCfg { + mps : mps, + mpsWidth : mpsWidth, + mrrs : mrrs, + mrrsWidth : mrrsWidth + }; + endmethod + + interface Get tlpSizeCfg; + method ActionValue#(TlpSizeCfg) get(); + return tlpSizeCfgReg; + endmethod + endinterface + + interface RawPcieConfiguration rawConfiguration; + + // not use mgmt + interface RawPcieCfgMgmt mgmt; + method PcieCfgMgmtAddr addr; + return 0; + endmethod + + method PcieCfgMgmtByteEn byteEn; + return 0; + endmethod + + method Bool debugAccess; + return False; + endmethod + + method PcieCfgMgmtFuncNum funcNum; + return 0; + endmethod + + method Bool read; + return False; + endmethod + + method PCieCfgMgmtData writeData; + return 0; + endmethod + + method Bool write; + return False; + endmethod + + method Action getResp( + PCieCfgMgmtData cfgMgmtRdData, + Bool cfgMgmtRdWrDone); + endmethod + endinterface + + // assign to 0 + interface RawPcieCfgPm pm; + method Bool aspmL1EntryReject; + return False; + endmethod + method Bool aspmL0EntryDisable; + return False; + endmethod + endinterface + + // Doesn't support msi now + interface RawPcieCfgMsi msi; + method PcieCfgMsiInt msiInt; + return 0; + endmethod + + method PcieCfgMsiFuncNum funcNum; + return 0; + endmethod + + method PcieCfgMsiPendingStatus pendingStatus; + return 0; + endmethod + + method PcieCfgMsiPendingStatusFuncNum pendingStatusFuncNum; + return 0; + endmethod + + method Bool pendingStatusDataEn; + return False; + endmethod + + method PcieCfgMsiSel sel; + return 0; + endmethod + + method PcieCfgMsiAttr attr; + return 0; + endmethod + + method Bool tphPresent; + return False; + endmethod + + method PcieCfgMsiTphType tphType; + return 0; + endmethod + + method PcieCfgMsiTphStTag tphStTag; + return 0; + endmethod + + method Action getMsiSignals( + PcieCfgMsiEn msiEn, + Bool msiSent, + Bool msiFail, + PcieCfgMsiMmEn msiMmEn, + Bool maskUpdate, + PcieCfgMsiData data); + endmethod + endinterface + + // Only for Legacy Devices + interface RawPcieCfgInterrupt interrupt; + method PcieCfgIntrInt intrInt; + return 0; + endmethod + + method PcieCfgIntrPending intrPending; + return 0; + endmethod + + method Action isIntrSent(Bool isSent); + endmethod + endinterface + + interface RawPcieCfgControl control; + method Bool hotResetOut; + return False; + endmethod + + method Action hotResetIn(Bool hotReset); + endmethod + + method Bool cfgSpaceEn; + return True; + endmethod + + method PcieCfgDsn deviceSerialNum; + return 0; + endmethod + + method PcieCfgDsBusNum downStreamBusNum; + return 0; + endmethod + + method PcieCfgDsDeviceNum downStreamDeviceNum; + return 0; + endmethod + + method PcieCfgDsFuncNum downStreamFuncNum; + return 0; + endmethod + + // TODO: the powerStateChangeAck must waitng for completing Done + method Bool powerStateChangeAck; + return powerStateChangeIntrReg; + endmethod + + method Action powerStateChangeIntr(Bool powerStateChangeIntrrupt); + powerStateChangeIntrReg <= powerStateChangeIntrrupt; + endmethod + + method PcieCfgDsPortNum downStreamPortNum; + return 0; + endmethod + + method Bool errorCorrectableOut; + return False; + endmethod + + method Action getError( + Bool errorCorrectable, + Bool errorFatal, + Bool errorNonFatal); + endmethod + + method Bool errorUncorrectable; + return False; + endmethod + + method PcieCfgFlrDone funcLevelRstDone; + PcieCfgFlrDone cfgFlrDone = 0; + cfgFlrDone[0] = (~cfgFlrDoneReg1[0]) & cfgFlrDoneReg0[0]; + cfgFlrDone[1] = (~cfgFlrDoneReg1[1]) & cfgFlrDoneReg0[1]; + return cfgFlrDone; + endmethod + + method Bool vfFuncLevelRstDone; + return cfgVFFlrDoneReg1; + endmethod + + method PcieCfgVFFlrFuncNum vfFlrFuncNum; + return cfgVFFlrFuncNumReg1; + endmethod + + method Action getInproc( + PcieCfgFlrInProc flrInProcess, + PcieCfgVFFlrInProc vfFlrInProcess + ); + cfgFlrDoneReg0 <= flrInProcess; + cfgVfFlrInprocReg0 <= zeroExtend(vfFlrInProcess); + endmethod + + method Bool reqPmTransL23Ready; + return False; + endmethod + + method Bool linkTrainEn; + return True; + endmethod + + method Action busNumber(PcieCfgBusNum busNum); + endmethod + + method PcieCfgVendId vendId; + return 0; + endmethod + + method PcieCfgVendId subsysVendId; + return 0; + endmethod + + method PcieCfgDevId devIdPf0; + return 0; + endmethod + + method PcieCfgDevId devIdPf1; + return 0; + endmethod + + method PcieCfgDevId devIdPf2; + return 0; + endmethod + + method PcieCfgDevId devIdPf3; + return 0; + endmethod + + method PcieCfgRevId revIdPf0; + return 0; + endmethod + + method PcieCfgRevId revIdPf1; + return 0; + endmethod + + method PcieCfgRevId revIdPf2; + return 0; + endmethod + + method PcieCfgRevId revIdPf3; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf0; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf1; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf2; + return 0; + endmethod + + method PcieCfgSubsysId subsysIdPf3; + return 0; + endmethod + endinterface + + interface RawPcieCfgFC flowControl; + method Action flowControl( + PcieCfgFlowControlHeaderCredit postedHeaderCredit, + PcieCfgFlowControlHeaderCredit nonPostedHeaderCredit, + PcieCfgFlowControlHeaderCredit cmplHeaderCredit, + PcieCfgFlowControlDataCredit postedDataCredit, + PcieCfgFlowControlDataCredit nonPostedDataCredit, + PcieCfgFlowControlDataCredit cmplDataCredit); + endmethod + + method PcieCfgFlowControlSel flowControlSel; + return flowControlSelReg; + endmethod + endinterface + + // Doesn't support sending Meg + interface RawPcieCfgMsgTx msgTx; + method Bool msegTransmit; + return False; + endmethod + + method PcieCfgMsgTransType msegTransmitType; + return 0; + endmethod + + method PcieCfgMsgTransData msegTransmitData; + return 0; + endmethod + + method Action msegTransmitDone(Bool isDone); + endmethod + endinterface + + interface RawPcieCfgMsgRx msgRx; + method Action receiveMsg( + Bool isMsgReceived, + PcieCfgMsgRecvData recvData, + PcieCfgMsgRecvType recvType + ); + endmethod + endinterface + + interface RawPcieCfgStatus status; + method Action getStatus ( + PcieCfgPhyLinkDown phyLinkDown, + PcieCfgPhyLinkStatus phyLinkStatus, + PcieCfgNegotiatedWidth negotiatedWidth, + PCieCfgCurrentSpeed currentSpeed, + PcieCfgMaxPayloadSize maxPayloadSize, + PCieCfgMaxReadReqSize maxReadReqSize, + PcieCfgFunctionStatus functionStatus, + PcieCfgVirtualFuncStatus virtualFuncStatus, + PcieCfgFuncPowerState functionPowerState, + PcieCfgVFPowerState virtualFuncPowerState, + PcieCfgLinkPowerState linkPowerState, + PcieCfgLocalError localError, + Bool localErrorValid, + PcieCfgRxPmState rxPmState, + PcieCfgTxPmState txPmState, + PcieCfgLtssmState ltssmState, + PcieCfgRcbStatus rcbStatus, + PcieCfgDpaSubstageChange dpaSubstageChange, + PcieCfgObffEn obffEnable + ); + mpsSettingWire <= maxPayloadSize; + mrrsSettingWire <= maxReadReqSize; + endmethod + endinterface + + interface RawPcieCfgTransmitFC txFlowControl; + method Action getTransCredit( + PcieCfgTfcNphAv nphAvailable, + PcieCfgTfcNpdAv npdAvailable); + endmethod + endinterface + + endinterface + +endmodule diff --git a/src/XilBdmaPcieDescriptorTypes.bsv b/src/XilBdmaPcieDescriptorTypes.bsv new file mode 100644 index 0000000..ab3fd50 --- /dev/null +++ b/src/XilBdmaPcieDescriptorTypes.bsv @@ -0,0 +1,166 @@ + +import XilBdmaPcieAxiStreamTypes::*; + +typedef 64 RQ_DESCRIPTOR_WIDTH; +typedef TDiv#(TSub#(PCIE_AXIS_DATA_WIDTH, RQ_DESCRIPTOR_WIDTH), DWORD_WIDTH) MAX_DWORD_CNT_OF_FIRST; + +typedef Bit#(1) ReserveBit1; +typedef Bit#(2) ReserveBit2; +typedef Bit#(6) ReserveBit6; + +typedef 128 DES_CQ_DESCRIPTOR_WIDTH; +typedef 3 DES_ATTR_WIDTH; +typedef 3 DES_TC_WIDTH; +typedef 6 DES_BAR_APERTURE_WIDTH; +typedef 3 DES_BAR_ID_WIDTH; +typedef 8 DES_TARGET_FUNCTION_WIDTH; +typedef 8 DES_TAG_WIDTH; +typedef 8 DES_NONEXTENDED_TAG_WIDTH; +typedef 16 DES_BDF_WIDTH; +typedef 4 DES_REQ_TYPE_WIDTH; +typedef 11 DES_DWORD_COUNT_WIDTH; +typedef 62 DES_ADDR_WIDTH; +typedef 2 DES_ADDR_TYPE_WIDTH; + +typedef Bit#(DES_ATTR_WIDTH) Attributes; +typedef Bit#(DES_TC_WIDTH) TrafficClass; +typedef Bit#(DES_BAR_APERTURE_WIDTH) BarAperture; +typedef Bit#(DES_BAR_ID_WIDTH) BarId; +typedef Bit#(DES_TARGET_FUNCTION_WIDTH) TargetFunction; +typedef Bit#(DES_TAG_WIDTH) Tag; +typedef Bit#(DES_BDF_WIDTH) BusDeviceFunc; +typedef Bit#(DES_REQ_TYPE_WIDTH) ReqType; +typedef Bit#(DES_DWORD_COUNT_WIDTH) DwordCount; +typedef Bit#(DES_ADDR_WIDTH) Address; +typedef Bit#(DES_ADDR_TYPE_WIDTH) AddrType; + +typedef 3'b000 ATTR_NO_SNOOP; + +// 16bytes Completer Request Descriptor Format for Memory, I/O, and Atomic Options +typedef struct { + // DW + 3 + ReserveBit1 reserve0; + Attributes attributes; + TrafficClass trafficClass; + BarAperture barAperture; + BarId barId; + TargetFunction targetFunction; + Tag tag; + // DW + 2 + BusDeviceFunc requesterId; + ReserveBit1 reserve1; + ReqType reqType; + DwordCount dwordCnt; + // DW + 1 & DW + 0 + Address address; + AddrType addrType; +} PcieCompleterRequestDescriptor deriving(Bits, Eq, Bounded, FShow); + +typedef 96 DES_CC_DESCRIPTOR_WIDTH; +typedef 3 DES_CMPL_STATUS_WIDTH; +typedef 13 DES_CMPL_BYTE_CNT_WIDTH; +typedef 7 DES_CC_LOWER_ADDR_WIDTH; +typedef Bit#(DES_CMPL_STATUS_WIDTH) CmplStatus; +typedef Bit#(DES_CMPL_BYTE_CNT_WIDTH) CmplByteCnt; +typedef Bit#(DES_CC_LOWER_ADDR_WIDTH) CCLowerAddr; + +typedef 0 DES_CC_STAUS_SUCCESS; +typedef 1 DES_CC_STATUS_UPSUPPORT; +typedef 4 DES_CC_STATUS_ABORT; + +typedef struct { + // DW + 2 + ReserveBit1 reserve0; + Attributes attributes; + TrafficClass trafficClass; + Bool completerIdEn; + BusDeviceFunc completerId; + Tag tag; + // DW + 1 + BusDeviceFunc requesterId; + ReserveBit1 reserve1; + Bool isPoisoned; + CmplStatus status; + DwordCount dwordCnt; + // DW + 0 + ReserveBit2 reserve2; + Bool isLockedReadCmpl; + CmplByteCnt byteCnt; + ReserveBit6 reserve3; + AddrType addrType; + ReserveBit1 reserve4; + CCLowerAddr lowerAddr; +} PcieCompleterCompleteDescriptor deriving(Bits, Eq, Bounded, FShow); + +typedef 128 DES_RQ_DESCRIPTOR_WIDTH; + +typedef struct { + // DW + 3 + Bool forceECRC; + Attributes attributes; + TrafficClass trafficClass; + Bool requesterIdEn; + BusDeviceFunc completerId; + Tag tag; + // DW + 2 + BusDeviceFunc requesterId; + Bool isPoisoned; + ReqType reqType; + DwordCount dwordCnt; + // DW + 1 & DW + 0 + Address address; + AddrType addrType; +} PcieRequesterRequestDescriptor deriving(Bits, Eq, Bounded, FShow); + +typedef 96 DES_RC_DESCRIPTOR_WIDTH; +typedef 4 DES_ERROR_CODE_WIDTH; +typedef 12 DES_RC_LOWER_ADDR_WIDTH; + +typedef Bit#(DES_ERROR_CODE_WIDTH) ErrorCode; +typedef Bit#(DES_RC_LOWER_ADDR_WIDTH) RCLowerAddr; + +typedef struct { + // DW + 2 + ReserveBit1 reserve0; + Attributes attributes; + TrafficClass trafficClass; + ReserveBit1 reserve1; + BusDeviceFunc completerId; + Tag tag; + // DW + 1 + BusDeviceFunc requesterId; + ReserveBit1 reserve2; + Bool isPoisoned; + CmplStatus status; + DwordCount dwordCnt; + ReserveBit1 reserve3; + Bool isRequestCompleted; + Bool isLockedReadCmpl; + CmplByteCnt byteCnt; + ErrorCode errorcode; + RCLowerAddr lowerAddr; +} PcieRequesterCompleteDescriptor deriving(Bits, Eq, Bounded, FShow); + +// Pcie Tlp types of descriptor +typedef 4'b0000 MEM_READ_REQ; +typedef 4'b0001 MEM_WRITE_REQ; +typedef 4'b0010 IO_READ_REQ; +typedef 4'b0011 IO_WRITE_REQ; +typedef 4'b0100 MEM_FETCHADD_REQ; +typedef 4'b0101 MEM_UNCOND_SWAP_REQ; +typedef 4'b0110 MEM_COMP_SWAP_REQ; +typedef 4'b0111 LOCK_READ_REQ; // allowed only in legacy devices +typedef 4'b1100 COMMON_MESG; +typedef 4'b1101 VENDOR_DEF_MESG; +typedef 4'b1110 ATS_MESG; + +// Pcie Addr Types +typedef 2'b00 UNTRANSLATED_ADDR; +typedef 2'b01 TRANSLATION_REQ; +typedef 2'b10 TRANSLATED_ADDR; + +//Cmpl Status +typedef 3'b000 SUCCESSFUL_CMPL; +typedef 3'b001 UNSUPPORTED_REQ; +typedef 3'b010 CFG_REQ_RETRY_STATUS; +typedef 3'b100 COMPLETER_ABORT; diff --git a/src/XilBdmaPcieTypes.bsv b/src/XilBdmaPcieTypes.bsv new file mode 100755 index 0000000..946d66d --- /dev/null +++ b/src/XilBdmaPcieTypes.bsv @@ -0,0 +1,523 @@ +import Vector::*; + +import XilBdmaPcieAxiStreamTypes::*; + +typedef 512 PCIE_TLP_BYTES; +typedef TLog#(PCIE_TLP_BYTES) PCIE_TLP_BYTES_WIDTH; + +typedef 2 PCIE_STRADDLE_NUM; // set straddle of RC and RQ same in the Xilinx IP GUI + +typedef 512 PCIE_TDATA_WIDTH; +typedef 64 PCIE_TDATA_BYTES; +typedef 16 PCIE_TDATA_DWORDS; +// Indicate DWORD valid of tDATA +typedef PCIE_TDATA_DWORDS PCIE_TKEEP_WIDTH; + +// tUser width vary among RR, RC, CR and CC +typedef 183 PCIE_COMPLETER_REQUEST_TUSER_WIDTH; +typedef 81 PCIE_COMPLETER_COMPLETE_TUSER_WIDTH; +typedef 137 PCIE_REQUESTER_REQUEST_TUSER_WIDTH; +typedef 161 PCIE_REQUESTER_COMPLETE_TUSER_WIDTH; + +typedef PcieAxiStream#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) CmplReqAxiStream; +typedef PcieAxiStream#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) CmplCmplAxiStream; +typedef PcieAxiStream#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) ReqReqAxiStream; +typedef PcieAxiStream#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) ReqCmplAxiStream; + +// PcieTlpCtl**: SideBand Signals delivered in tUser defined by PG213 +typedef 8 PCIE_TLP_FIRST_BE_WIDTH; +typedef 8 PCIE_TLP_LAST_BE_WIDTH; +typedef Bit#(PCIE_TLP_FIRST_BE_WIDTH) PcieTlpCtlFirstByteEn; +typedef Bit#(PCIE_TLP_LAST_BE_WIDTH) PcieTlpCtlLastByteEn; + +typedef PCIE_TDATA_BYTES PCIE_TLP_BYTE_EN_WIDTH; +typedef Bit#(PCIE_TLP_BYTE_EN_WIDTH) PcieTlpCtlByteEn; + +typedef 2 PCIE_TLP_ISSOP_WIDTH; +typedef 2 PCIE_TLP_ISSOP_PTR_WIDTH; +typedef Bit#(PCIE_TLP_ISSOP_WIDTH) PcieTlpCtlIsSop; +typedef Bit#(PCIE_TLP_ISSOP_PTR_WIDTH) PcieTlpCtlIsSopPtr; + +typedef 2 PCIE_TLP_ISEOP_WIDTH; +typedef 4 PCIE_TLP_ISEOP_PTR_WIDTH; +typedef Bit#(PCIE_TLP_ISEOP_WIDTH) PcieTlpCtlIsEop; +typedef Bit#(PCIE_TLP_ISEOP_PTR_WIDTH) PcieTlpCtlIsEopPtr; + +typedef 2 PCIE_TPH_PRESENT_WIDTH; +typedef 4 PCIE_TPH_TYPE_WIDTH; +typedef 16 PCIE_TPH_STTAG; +typedef 2 PCIE_TPH_INDIRECT_TAGEN_WIDTH; +typedef Bit#(PCIE_TPH_PRESENT_WIDTH) PcieTlpCtlTphPresent; +typedef Bit#(PCIE_TPH_TYPE_WIDTH) PcieTlpCtlTphType; +typedef Bit#(PCIE_TPH_STTAG) PcieTlpCtlTphSteeringTag; +typedef Bit#(PCIE_TPH_INDIRECT_TAGEN_WIDTH) PcieTlpCtlTphIndirectTagEn; + +typedef 64 PCIE_TLP_PARITY; +typedef Bit#(PCIE_TLP_PARITY) PcieTlpCtlParity; + +typedef 4 PCIE_TLP_ADDR_OFFSET_WIDTH; +typedef Bit#(PCIE_TLP_ADDR_OFFSET_WIDTH) PcieTlpCtlAddrOffset; + +typedef 6 PCIE_TLP_SEQ_NUM_WIDTH; +typedef Bit#(PCIE_TLP_SEQ_NUM_WIDTH) PcieTlpCtlSeqNum; + +typedef 4 PCIE_TLP_RC_ISSOP_WIDTH; +typedef Bit#(PCIE_TLP_RC_ISSOP_WIDTH) PcieTlpCtlIsSopRC; + +typedef 4 PCIE_TLP_RC_ISEOP_WIDTH; +typedef Bit#(PCIE_TLP_RC_ISEOP_WIDTH) PcieTlpCtlIsEopRC; + +// Signals the start of a new TLP, 6 bit. +typedef struct { + Vector#(PCIE_TLP_ISSOP_WIDTH, PcieTlpCtlIsSopPtr) isSopPtrs; + PcieTlpCtlIsSop isSop; +} PcieTlpCtlIsSopCommon deriving(Bits, Bounded, Eq, FShow); + +// Signals the start of a new TLP, 12 bit. +typedef struct { + Vector#(PCIE_TLP_RC_ISSOP_WIDTH, PcieTlpCtlIsSopPtr) isSopPtrs; + PcieTlpCtlIsSopRC isSop; +} PcieTlpCtlIsSopReqCpl deriving(Bits, Bounded, Eq, FShow); + +// Indicates a TLP is ending in this beat, 10bit. +typedef struct { + Vector#(PCIE_TLP_ISEOP_WIDTH, PcieTlpCtlIsEopPtr) isEopPtrs; + PcieTlpCtlIsEop isEop; +} PcieTlpCtlIsEopCommon deriving(Bits, Bounded, Eq, FShow); + +// Indicates a TLP is ending in this beat, 20bit. +typedef struct { + Vector#(PCIE_TLP_RC_ISEOP_WIDTH, PcieTlpCtlIsEopPtr) isEopPtrs; + PcieTlpCtlIsEopRC isEop; +} PcieTlpCtlIsEopReqCpl deriving(Bits, Bounded, Eq, FShow); + +// Only support at most 2 TLP straddle mode on RQ&RC +typedef 2'b00 NO_TLP_IN_THIS_BEAT; +typedef 2'b01 SINGLE_TLP_IN_THIS_BEAT; +typedef 2'b11 DOUBLE_TLP_IN_THIS_BEAT; + +typedef 2'b00 ISSOP_LANE_0; +typedef 2'b10 ISSOP_LANE_32; + +// 183bit tUser of PcieCompleterRequeste AXIS-slave +typedef struct { + PcieTlpCtlParity parity; + PcieTlpCtlTphSteeringTag tphSteeringTag; + PcieTlpCtlTphType tphType; + PcieTlpCtlTphPresent tphPresent; + Bool discontinue; + PcieTlpCtlIsEopCommon isEop; + PcieTlpCtlIsSopCommon isSop; + PcieTlpCtlByteEn dataByteEn; + PcieTlpCtlLastByteEn lastByteEn; + PcieTlpCtlFirstByteEn firstByteEn; +} PcieCompleterRequestSideBandFrame deriving(Bits, Bounded, Eq, FShow); + +// 81bit tUser of PcieCompleterComplete AXIS-master +typedef struct { + PcieTlpCtlParity parity; + Bool discontinue; + PcieTlpCtlIsEopCommon isEop; + PcieTlpCtlIsSopCommon isSop; +} PcieCompleterCompleteSideBandFrame deriving(Bits, Bounded, Eq, FShow); + +// 137bit tUser of PcieRequesterRequeste AXIS-master +typedef struct { + PcieTlpCtlParity parity; + PcieTlpCtlSeqNum seqNum1; + PcieTlpCtlSeqNum seqNum0; + PcieTlpCtlTphSteeringTag tphSteeringTag; + PcieTlpCtlTphIndirectTagEn tphIndirectTagEn; + PcieTlpCtlTphType tphType; + PcieTlpCtlTphPresent tphPresent; + Bool discontinue; + PcieTlpCtlIsEopCommon isEop; + PcieTlpCtlIsSopCommon isSop; + PcieTlpCtlAddrOffset addrOffset; + PcieTlpCtlLastByteEn lastByteEn; + PcieTlpCtlFirstByteEn firstByteEn; +} PcieRequesterRequestSideBandFrame deriving(Bits, Bounded, Eq, FShow); + +// 161bit tUser of PcieRequesterComplete AXIS-slave +typedef struct { +PcieTlpCtlParity parity; +Bool discontinue; +PcieTlpCtlIsEopReqCpl isEop; +PcieTlpCtlIsSopReqCpl isSop; +PcieTlpCtlByteEn dataByteEn; +} PcieRequesterCompleteSideBandFrame deriving(Bits, Bounded, Eq, FShow); + + +// PCIe raw interfaces +typedef 2 PCIE_CR_NP_REQ_WIDTH; +typedef 6 PCIE_CR_NP_REQ_COUNT_WIDTH; +typedef Bit#(PCIE_CR_NP_REQ_WIDTH) PcieNonPostedRequst; +typedef Bit#(PCIE_CR_NP_REQ_COUNT_WIDTH) PcieNonPostedRequstCount; + +// Interface to PCIe IP Completer Interface +(* always_ready, always_enabled *) +interface RawPcieCompleterRequest; + (* prefix = "s_axis_cq" *) interface RawPcieAxiStreamSlave#(PCIE_COMPLETER_REQUEST_TUSER_WIDTH) rawAxiStreamSlave; + (* result = "pcie_cq_np_req" *) method PcieNonPostedRequst nonPostedReqCreditIncrement; + (* prefix = "" *) method Action nonPostedReqCreditCnt( + (* port = "pcie_cq_np_req_count" *) PcieNonPostedRequstCount nonPostedpReqCount ); +endinterface + +(* always_ready, always_enabled *) +interface RawPcieCompleterComplete; + (* prefix = "m_axis_cc" *) interface RawPcieAxiStreamMaster#(PCIE_COMPLETER_COMPLETE_TUSER_WIDTH) rawAxiStreamMaster; +endinterface + +typedef 8 PCIE_RQ_TAG_WIDTH; +typedef Bit#(PCIE_RQ_TAG_WIDTH) PcieRqTag; +typedef PcieTlpCtlSeqNum PcieRqSeqNum; + +// Interface to PCIe IP Requester Interface +(* always_ready, always_enabled *) +interface RawPcieRequesterRequest; + (* prefix = "m_axis_rq" *) interface RawPcieAxiStreamMaster#(PCIE_REQUESTER_REQUEST_TUSER_WIDTH) rawAxiStreamMaster; + (* prefix = "pcie_rq" *) method Action pcieProgressTrack( + (* port = "tag_vld0" *) Bool tagValid0, + (* port = "tag_vld1" *) Bool tagValid1, + (* port = "tag0" *) PcieRqTag tag0, + (* port = "tag1" *) PcieRqTag tag1, + (* port = "seq_num_vld0" *) Bool seqNumValid0, + (* port = "seq_num_vld1" *) Bool seqNumValid1, + (* port = "seq_num0" *) PcieRqSeqNum seqNum0, + (* port = "seq_num1" *) PcieRqSeqNum seqNum1 + ); +endinterface + +(* always_ready, always_enabled *) +interface RawPcieRequesterComplete; + (* prefix = "s_axis_rc" *) interface RawPcieAxiStreamSlave#(PCIE_REQUESTER_COMPLETE_TUSER_WIDTH) rawAxiStreamSlave; +endinterface + +// Pcie Configuration Interfaces +typedef 10 PCIE_CFG_MGMT_ADDR_WIDTH; +typedef 4 PCIE_CFG_MGMT_BE_WIDTH; +typedef 8 PCIE_CFG_MGMT_FUNC_NUM_WIDTH; +typedef 32 PCIE_CFG_MGMT_DATA_WIDTH; + +typedef Bit#(PCIE_CFG_MGMT_ADDR_WIDTH) PcieCfgMgmtAddr; +typedef Bit#(PCIE_CFG_MGMT_BE_WIDTH) PcieCfgMgmtByteEn; +typedef Bit#(PCIE_CFG_MGMT_FUNC_NUM_WIDTH) PcieCfgMgmtFuncNum; +typedef Bit#(PCIE_CFG_MGMT_DATA_WIDTH) PCieCfgMgmtData; + +(* always_ready, always_enabled *) +interface RawPcieCfgMgmt; + (* result = "addr" *) method PcieCfgMgmtAddr addr; + (* result = "byte_enable" *) method PcieCfgMgmtByteEn byteEn; + (* result = "debug_access" *) method Bool debugAccess; + (* result = "function_number" *) method PcieCfgMgmtFuncNum funcNum; + (* result = "read" *) method Bool read; + (* result = "write_data" *) method PCieCfgMgmtData writeData; + (* result = "write" *) method Bool write; + (* prefix = "" *) method Action getResp( + (* port = "read_data" *) PCieCfgMgmtData cfgMgmtRdData, + (* port = "read_write_done" *) Bool cfgMgmtRdWrDone); +endinterface + +(* always_ready, always_enabled *) +interface RawPcieCfgPm; + (* result = "aspm_l1_entry_reject" *) method Bool aspmL1EntryReject; + (* result = "aspm_tx_l0s_entry_disable" *) method Bool aspmL0EntryDisable; +endinterface + +typedef 4 PCIE_CFG_MSI_ENABLE_WIDTH; +typedef 32 PCIE_CFG_MSI_INT_WIDTH; +typedef 8 PCIE_CFG_MSI_FUNC_NUM_WIDTH; +typedef 12 PCIE_CFG_MSI_MMENABLE_WIDTH; +typedef 32 PCIE_CFG_MSI_PENDING_STATUS_WIDTH; +typedef 2 PCIE_CFG_MSI_PENDING_STATUS_FUNC_NUM_WIDTH; +typedef 2 PCIE_CFG_MSI_SELECT_WIDTH; +typedef 32 PCIE_CFG_MSI_DATA; +typedef 3 PCIE_CFG_MSI_ATTR; +typedef 2 PCIE_CFG_MSI_TPH_TYPE_WIDTH; +typedef 8 PCIE_CFG_MSI_TPH_ST_TAG_WIDTH; + +typedef Bit#(PCIE_CFG_MSI_ENABLE_WIDTH) PcieCfgMsiEn; +typedef Bit#(PCIE_CFG_MSI_INT_WIDTH) PcieCfgMsiInt; +typedef Bit#(PCIE_CFG_MSI_FUNC_NUM_WIDTH) PcieCfgMsiFuncNum; +typedef Bit#(PCIE_CFG_MSI_MMENABLE_WIDTH) PcieCfgMsiMmEn; +typedef Bit#(PCIE_CFG_MSI_PENDING_STATUS_WIDTH) PcieCfgMsiPendingStatus; +typedef Bit#(PCIE_CFG_MSI_PENDING_STATUS_FUNC_NUM_WIDTH) PcieCfgMsiPendingStatusFuncNum; +typedef Bit#(PCIE_CFG_MSI_SELECT_WIDTH) PcieCfgMsiSel; +typedef Bit#(PCIE_CFG_MSI_DATA) PcieCfgMsiData; +typedef Bit#(PCIE_CFG_MSI_ATTR) PcieCfgMsiAttr; +typedef Bit#(PCIE_CFG_MSI_TPH_TYPE_WIDTH) PcieCfgMsiTphType; +typedef Bit#(PCIE_CFG_MSI_TPH_ST_TAG_WIDTH) PcieCfgMsiTphStTag; + +(* always_ready, always_enabled *) +interface RawPcieCfgMsi; + (* result = "int" *) method PcieCfgMsiInt msiInt; + (* result = "function_number" *) method PcieCfgMsiFuncNum funcNum; + (* result = "pending_status" *) method PcieCfgMsiPendingStatus pendingStatus; + (* result = "pending_status_function_num" *) method PcieCfgMsiPendingStatusFuncNum pendingStatusFuncNum; + (* result = "pending_status_data_enable" *) method Bool pendingStatusDataEn; + (* result = "select" *) method PcieCfgMsiSel sel; + (* result = "attr" *) method PcieCfgMsiAttr attr; + (* result = "tph_present" *) method Bool tphPresent; + (* result = "tph_type" *) method PcieCfgMsiTphType tphType; + (* result = "tph_st_tag" *) method PcieCfgMsiTphStTag tphStTag; + (* prefix = "" *) method Action getMsiSignals( + (* port = "enable" *) PcieCfgMsiEn msiEn, + (* port = "sent" *) Bool msiSent, + (* port = "fail" *) Bool msiFail, + (* port = "mmenable" *) PcieCfgMsiMmEn msiMmEn, + (* port = "mask_update" *) Bool maskUpdate, + (* port = "data" *) PcieCfgMsiData data + ); +endinterface + +typedef 4 PCIE_CFG_INTR_INT_WIDTH; +typedef 4 PCIE_CFG_INTR_PENDING_WIDTH; +typedef Bit#(PCIE_CFG_INTR_INT_WIDTH) PcieCfgIntrInt; +typedef Bit#(PCIE_CFG_INTR_PENDING_WIDTH) PcieCfgIntrPending; + +(* always_ready, always_enabled *) +interface RawPcieCfgInterrupt; + (* result = "int" *) method PcieCfgIntrInt intrInt; + (* result = "pending" *) method PcieCfgIntrPending intrPending; + (* prefix = "" *) method Action isIntrSent( + (* port = "sent" *) Bool isSent); +endinterface + +typedef 64 PCIE_CFG_DSN_WIDTH; +typedef Bit#(PCIE_CFG_DSN_WIDTH) PcieCfgDsn; + +typedef 8 PCIE_CFG_DS_BUS_NUM_WIDTH; +typedef 5 PCIE_CFG_DS_DEVICE_NUM_WIDTH; +typedef 3 PCIE_CFG_DS_FUNC_NUM_WIDTH; +typedef 8 PCIE_CFG_DS_PORT_NUM_WIDTH; +typedef Bit#(PCIE_CFG_DS_BUS_NUM_WIDTH) PcieCfgDsBusNum; +typedef Bit#(PCIE_CFG_DS_DEVICE_NUM_WIDTH) PcieCfgDsDeviceNum; +typedef Bit#(PCIE_CFG_DS_FUNC_NUM_WIDTH) PcieCfgDsFuncNum; +typedef Bit#(PCIE_CFG_DS_PORT_NUM_WIDTH) PcieCfgDsPortNum; + +typedef 4 PCIE_CFG_FLR_DONE_WIDTH; +typedef 8 PCIE_CFG_VF_FLR_FUNCNUM_WIDTH; +typedef 4 PCIE_CFG_FLR_INPROC_WIDTH; +typedef 252 PCIE_CFG_VF_FLR_INPROC_WIDTH; +typedef Bit#(PCIE_CFG_FLR_DONE_WIDTH) PcieCfgFlrDone; +typedef Bit#(PCIE_CFG_VF_FLR_FUNCNUM_WIDTH) PcieCfgVFFlrFuncNum; +typedef Bit#(PCIE_CFG_FLR_DONE_WIDTH) PcieCfgFlrInProc; +typedef Bit#(PCIE_CFG_VF_FLR_INPROC_WIDTH) PcieCfgVFFlrInProc; + +typedef 8 PCIE_CFG_BUS_NUM_WIDTH; +typedef 16 PCIE_CFG_VEND_ID_WIDTH; +typedef 16 PCIE_CFG_DEV_ID_WIDTH; +typedef 8 PCIE_CFG_REV_ID_WIDTH; +typedef 16 PCIE_CFG_SUBSYS_ID_WIDTH; +typedef Bit#(PCIE_CFG_BUS_NUM_WIDTH) PcieCfgBusNum; +typedef Bit#(PCIE_CFG_VEND_ID_WIDTH) PcieCfgVendId; +typedef Bit#(PCIE_CFG_DEV_ID_WIDTH) PcieCfgDevId; +typedef Bit#(PCIE_CFG_REV_ID_WIDTH) PcieCfgRevId; +typedef Bit#(PCIE_CFG_SUBSYS_ID_WIDTH) PcieCfgSubsysId; + +(* always_ready, always_enabled *) +interface RawPcieCfgControl; + (* result = "hot_reset_out" *) method Bool hotResetOut; + (* prefix = "" *) method Action hotResetIn( + (* port = "hot_reset_in" *) Bool hotReset); + (* result = "config_space_enable" *) method Bool cfgSpaceEn; + (* result = "dsn" *) method PcieCfgDsn deviceSerialNum; + (* result = "ds_bus_number" *) method PcieCfgDsBusNum downStreamBusNum; + (* result = "ds_device_number" *) method PcieCfgDsDeviceNum downStreamDeviceNum; + (* result = "ds_function_number" *) method PcieCfgDsFuncNum downStreamFuncNum; + (* result = "power_state_change_ack" *) method Bool powerStateChangeAck; + (* prefix = "" *) method Action powerStateChangeIntr( + (* port = "power_state_change_interrupt" *) Bool powerStateChangeIntrrupt); + (* result = "ds_port_number" *) method PcieCfgDsPortNum downStreamPortNum; + (* result = "err_cor_in" *) method Bool errorCorrectableOut; + (* prefix = "" *) method Action getError( + (* port = "err_cor_out" *) Bool errorCorrectable, + (* port = "err_fatal_out" *) Bool errorFatal, + (* port = "err_nonfatal_out" *) Bool errorNonFatal); + (* result = "err_uncor_in" *) method Bool errorUncorrectable; + (* result = "flr_done" *) method PcieCfgFlrDone funcLevelRstDone; + (* result = "vf_flr_done" *) method Bool vfFuncLevelRstDone; + (* result = "vf_flr_func_num" *) method PcieCfgVFFlrFuncNum vfFlrFuncNum; + (* prefix = "" *) method Action getInproc( + (* port = "flr_in_process" *) PcieCfgFlrInProc flrInProcess, + (* port = "vf_flr_in_process" *) PcieCfgVFFlrInProc vfFlrInProcess); + (* result = "req_pm_transition_l23_ready" *) method Bool reqPmTransL23Ready; + (* result = "link_training_enable" *) method Bool linkTrainEn; + (* prefix = "" *) method Action busNumber( + (* port = "bus_number" *) PcieCfgBusNum busNum); + (* result = "vend_id" *) method PcieCfgVendId vendId; + (* result = "subsys_vend_id" *) method PcieCfgVendId subsysVendId; + (* result = "dev_id_pf0" *) method PcieCfgDevId devIdPf0; + (* result = "dev_id_pf1" *) method PcieCfgDevId devIdPf1; + (* result = "dev_id_pf2" *) method PcieCfgDevId devIdPf2; + (* result = "dev_id_pf3" *) method PcieCfgDevId devIdPf3; + (* result = "rev_id_pf0" *) method PcieCfgRevId revIdPf0; + (* result = "rev_id_pf1" *) method PcieCfgRevId revIdPf1; + (* result = "rev_id_pf2" *) method PcieCfgRevId revIdPf2; + (* result = "rev_id_pf3" *) method PcieCfgRevId revIdPf3; + (* result = "subsys_id_pf0" *) method PcieCfgSubsysId subsysIdPf0; + (* result = "subsys_id_pf1" *) method PcieCfgSubsysId subsysIdPf1; + (* result = "subsys_id_pf2" *) method PcieCfgSubsysId subsysIdPf2; + (* result = "subsys_id_pf3" *) method PcieCfgSubsysId subsysIdPf3; +endinterface + +typedef 8 PCIE_CFG_FC_HEADER_WIDTH; +typedef 12 PCIE_CFG_FC_DATA_WIDTH; +typedef 3 PCIE_CFG_FC_SEL_WIDTH; +typedef Bit#(PCIE_CFG_FC_HEADER_WIDTH) PcieCfgFlowControlHeaderCredit; +typedef Bit#(PCIE_CFG_FC_DATA_WIDTH) PcieCfgFlowControlDataCredit; +typedef Bit#(PCIE_CFG_FC_SEL_WIDTH) PcieCfgFlowControlSel; + +(* always_ready, always_enabled *) +interface RawPcieCfgFC; + (* prefix = "" *) method Action flowControl( + (* port = "ph" *) PcieCfgFlowControlHeaderCredit postedHeaderCredit, + (* port = "nph" *) PcieCfgFlowControlHeaderCredit nonPostedHeaderCredit, + (* port = "cplh" *) PcieCfgFlowControlHeaderCredit cmplHeaderCredit, + (* port = "pd" *) PcieCfgFlowControlDataCredit postedDataCredit, + (* port = "npd" *) PcieCfgFlowControlDataCredit nonPostedDataCredit, + (* port = "cpld" *) PcieCfgFlowControlDataCredit cmplDataCredit + ); + (* result = "sel" *) method PcieCfgFlowControlSel flowControlSel; +endinterface + +typedef 3 PCIE_CFG_MSG_TXTYPE_WIDTH; +typedef 32 PCIE_CFG_MSG_TXDATA_WIDTH; +typedef Bit#(PCIE_CFG_MSG_TXTYPE_WIDTH) PcieCfgMsgTransType; +typedef Bit#(PCIE_CFG_MSG_TXDATA_WIDTH) PcieCfgMsgTransData; +(* always_ready, always_enabled *) +interface RawPcieCfgMsgTx; + (* result = "transmit" *) method Bool msegTransmit; + (* result = "transmit_type" *) method PcieCfgMsgTransType msegTransmitType; + (* result = "transmit_data" *) method PcieCfgMsgTransData msegTransmitData; + (* prefix = "" *) method Action msegTransmitDone( + (* port = "transmit_done" *) Bool isDone); +endinterface + +typedef 8 PCIE_CFG_MSG_RXDATA_WIDTH; +typedef 5 PCIE_CFG_MSG_RXTYPE_WIDTH; +typedef Bit#(PCIE_CFG_MSG_RXTYPE_WIDTH) PcieCfgMsgRecvType; +typedef Bit#(PCIE_CFG_MSG_RXDATA_WIDTH) PcieCfgMsgRecvData; + +(* always_ready, always_enabled *) +interface RawPcieCfgMsgRx; + (* prefix = "" *) method Action receiveMsg( + (* port = "received" *) Bool isMsgReceived, + (* port = "received_data" *) PcieCfgMsgRecvData recvData, + (* port = "received_type" *) PcieCfgMsgRecvType recvType + ); +endinterface + +typedef 1 PCIE_CFG_PHY_LINK_DOWN_WIDTH; +typedef 2 PCIE_CFG_PHY_LINK_STATUS_WIDTH; +typedef Bit#(PCIE_CFG_PHY_LINK_DOWN_WIDTH) PcieCfgPhyLinkDown; +typedef Bit#(PCIE_CFG_PHY_LINK_STATUS_WIDTH) PcieCfgPhyLinkStatus; + +typedef 3 PCIE_CFG_NEGOTIATED_WIDTH_WIDTH; +typedef 2 PCIE_CFG_CURRENT_SPEED_WIDTH; +typedef 2 PCIE_CFG_MAX_PAYLOAD_WIDTH; +typedef 3 PCIE_CFG_MAX_READ_REQ_WIDTH; +typedef Bit#(PCIE_CFG_NEGOTIATED_WIDTH_WIDTH) PcieCfgNegotiatedWidth; +typedef Bit#(PCIE_CFG_CURRENT_SPEED_WIDTH) PCieCfgCurrentSpeed; +typedef Bit#(PCIE_CFG_MAX_PAYLOAD_WIDTH) PcieCfgMaxPayloadSize; +typedef Bit#(PCIE_CFG_MAX_READ_REQ_WIDTH) PCieCfgMaxReadReqSize; + +typedef 16 PCIE_CFG_FUNCTIONS_STATUS_WIDTH; +typedef 504 PCIE_CFG_VIRTUAL_FUNCTIONS_STATUS_WIDTH; +typedef 12 PCIE_CFG_FUNCTIONS_POWER_STATE_WIDTH; +typedef 756 PCIE_CFG_VIRTUAL_FUNC_POWER_STATE_WIDTH; +typedef 2 PCIE_CFG_LINK_POWER_STATE_WIDTH; +typedef Bit#(PCIE_CFG_FUNCTIONS_STATUS_WIDTH) PcieCfgFunctionStatus; +typedef Bit#(PCIE_CFG_VIRTUAL_FUNCTIONS_STATUS_WIDTH) PcieCfgVirtualFuncStatus; +typedef Bit#(PCIE_CFG_FUNCTIONS_POWER_STATE_WIDTH) PcieCfgFuncPowerState; +typedef Bit#(PCIE_CFG_VIRTUAL_FUNC_POWER_STATE_WIDTH) PcieCfgVFPowerState; +typedef Bit#(PCIE_CFG_LINK_POWER_STATE_WIDTH) PcieCfgLinkPowerState; + +typedef 5 PCIE_CFG_LOCAL_ERROR_WIDTH; +typedef Bit#(PCIE_CFG_LOCAL_ERROR_WIDTH) PcieCfgLocalError; + +typedef 2 PCIE_CFG_RX_PM_STATE_WIDTH; +typedef 2 PCIE_CFG_TX_PM_STATE_WIDTH; +typedef Bit#(PCIE_CFG_RX_PM_STATE_WIDTH) PcieCfgRxPmState; +typedef Bit#(PCIE_CFG_TX_PM_STATE_WIDTH) PcieCfgTxPmState; + +typedef 6 PCIE_CFG_LTSSM_STATE_WIDTH; +typedef Bit#(PCIE_CFG_LTSSM_STATE_WIDTH) PcieCfgLtssmState; + +typedef 4 PCIE_CFG_RCB_STATUS; +typedef 4 PCIE_CFG_DPA_SUBSTAGE_CHANGE_WIDTH; +typedef 2 PCIE_CFG_OBFF_ENABLE_WIDTH; +typedef Bit#(PCIE_CFG_RCB_STATUS) PcieCfgRcbStatus; +typedef Bit#(PCIE_CFG_DPA_SUBSTAGE_CHANGE_WIDTH) PcieCfgDpaSubstageChange; +typedef Bit#(PCIE_CFG_OBFF_ENABLE_WIDTH) PcieCfgObffEn; + + +(* always_ready, always_enabled *) +interface RawPcieCfgStatus; + (* prefix = "" *) method Action getStatus ( + (* port = "phy_link_down" *) PcieCfgPhyLinkDown phyLinkDown, + (* port = "phy_link_status" *) PcieCfgPhyLinkStatus phyLinkStatus, + (* port = "negotiated_width" *) PcieCfgNegotiatedWidth negotiatedWidth, + (* port = "current_speed" *) PCieCfgCurrentSpeed currentSpeed, + (* port = "max_payload" *) PcieCfgMaxPayloadSize maxPayloadSize, + (* port = "max_read_req" *) PCieCfgMaxReadReqSize maxReadReqSize, + (* port = "function_status" *) PcieCfgFunctionStatus functionStatus, + (* port = "vf_status" *) PcieCfgVirtualFuncStatus virtualFuncStatus, + (* port = "function_power_state" *) PcieCfgFuncPowerState functionPowerState, + (* port = "vf_power_state" *) PcieCfgVFPowerState virtualFuncPowerState, + (* port = "link_power_state" *) PcieCfgLinkPowerState linkPowerState, + (* port = "local_error_out" *) PcieCfgLocalError localError, + (* port = "local_error_valid" *) Bool localErrorValid, + (* port = "rx_pm_state" *) PcieCfgRxPmState rxPmState, + (* port = "tx_pm_state" *) PcieCfgTxPmState txPmState, + (* port = "ltssm_state" *) PcieCfgLtssmState ltssmState, + (* port = "rcb_status" *) PcieCfgRcbStatus rcbStatus, + (* port = "dpa_substage_change" *) PcieCfgDpaSubstageChange dpaSubstageChange, + (* port = "obff_enable" *) PcieCfgObffEn obffEnable + ); +endinterface + +typedef 4 PCIE_CFG_TFC_NPH_WIDTH; +typedef 4 PCIE_CFG_TFC_NPD_WIDTH; +typedef Bit#(PCIE_CFG_TFC_NPH_WIDTH) PcieCfgTfcNphAv; +typedef Bit#(PCIE_CFG_TFC_NPD_WIDTH) PcieCfgTfcNpdAv; + +(* always_ready, always_enabled *) +interface RawPcieCfgTransmitFC; + (* prefix = "" *) method Action getTransCredit( + (* port = "nph_av" *) PcieCfgTfcNphAv nphAvailable, + (* port = "npd_av" *) PcieCfgTfcNpdAv npdAvailable + ); +endinterface + +(* always_ready, always_enabled *) +interface RawPcieConfiguration; + (* prefix = "cfg_mgmt" *) interface RawPcieCfgMgmt mgmt; + (* prefix = "cfg_pm" *) interface RawPcieCfgPm pm; + (* prefix = "cfg_interrupt_msi" *) interface RawPcieCfgMsi msi; + (* prefix = "cfg_interrupt" *) interface RawPcieCfgInterrupt interrupt; + (* prefix = "cfg" *) interface RawPcieCfgControl control; + (* prefix = "cfg_fc" *) interface RawPcieCfgFC flowControl; + (* prefix = "cfg_msg" *) interface RawPcieCfgMsgTx msgTx; + (* prefix = "cfg_msg" *) interface RawPcieCfgMsgRx msgRx; + (* prefix = "cfg" *) interface RawPcieCfgStatus status; + (* prefix = "pcie_tfc" *) interface RawPcieCfgTransmitFC txFlowControl; +endinterface + +(* always_ready, always_enabled *) +interface RawXilinxPcieIp; + // Raw PCIe interfaces, connected to the Xilinx PCIe IP + (* prefix = "" *) interface RawPcieRequesterRequest requesterRequest; + (* prefix = "" *) interface RawPcieRequesterComplete requesterComplete; + (* prefix = "" *) interface RawPcieCompleterRequest completerRequest; + (* prefix = "" *) interface RawPcieCompleterComplete completerComplete; + (* prefix = "" *) interface RawPcieConfiguration configuration; + (* prefix = "" *) method Action linkUp( + (* port = "user_lnk_up" *) Bool isLinkUp); +endinterface + +(* always_ready, always_enabled *) +interface RawXilinxPcieIpCompleter; + (* prefix = "" *) interface RawPcieCompleterRequest completerRequest; + (* prefix = "" *) interface RawPcieCompleterComplete completerComplete; +endinterface diff --git a/src/XilBdmaPrimUtils.bsv b/src/XilBdmaPrimUtils.bsv new file mode 100644 index 0000000..68fec4f --- /dev/null +++ b/src/XilBdmaPrimUtils.bsv @@ -0,0 +1,480 @@ +import FIFOF::*; +import Vector::*; + +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaDmaTypes::*; + +function Action immAssert(Bool condition, String assertName, Fmt assertFmtMsg); + action + let pos = printPosition(getStringPosition(assertName)); + // let pos = printPosition(getEvalPosition(condition)); + if (!condition) begin + $error( + "ImmAssert failed in %m @time=%0t: %s-- %s: ", + $time, pos, assertName, assertFmtMsg + ); + $finish(1); + end + endaction +endfunction + +function ByteModDWord byteModDWord(Bit#(tSz) bytes) provisos(Add#(_a, BYTE_DWORD_SHIFT_WIDTH, tSz)); + return truncate(bytes); +endfunction + +function DataBytePtr convertByteEn2BytePtr (ByteEn byteEn); + DataBytePtr ptr = 0; + case(byteEn) matches + 'b0000000000000000000000000000000000000000000000000000000000000001: ptr = 1; + 'b000000000000000000000000000000000000000000000000000000000000001?: ptr = 2; + 'b00000000000000000000000000000000000000000000000000000000000001??: ptr = 3; + 'b0000000000000000000000000000000000000000000000000000000000001???: ptr = 4; + 'b000000000000000000000000000000000000000000000000000000000001????: ptr = 5; + 'b00000000000000000000000000000000000000000000000000000000001?????: ptr = 6; + 'b0000000000000000000000000000000000000000000000000000000001??????: ptr = 7; + 'b000000000000000000000000000000000000000000000000000000001???????: ptr = 8; + 'b00000000000000000000000000000000000000000000000000000001????????: ptr = 9; + 'b0000000000000000000000000000000000000000000000000000001?????????: ptr = 10; + 'b000000000000000000000000000000000000000000000000000001??????????: ptr = 11; + 'b00000000000000000000000000000000000000000000000000001???????????: ptr = 12; + 'b0000000000000000000000000000000000000000000000000001????????????: ptr = 13; + 'b000000000000000000000000000000000000000000000000001?????????????: ptr = 14; + 'b00000000000000000000000000000000000000000000000001??????????????: ptr = 15; + 'b0000000000000000000000000000000000000000000000001???????????????: ptr = 16; + 'b000000000000000000000000000000000000000000000001????????????????: ptr = 17; + 'b00000000000000000000000000000000000000000000001?????????????????: ptr = 18; + 'b0000000000000000000000000000000000000000000001??????????????????: ptr = 19; + 'b000000000000000000000000000000000000000000001???????????????????: ptr = 20; + 'b00000000000000000000000000000000000000000001????????????????????: ptr = 21; + 'b0000000000000000000000000000000000000000001?????????????????????: ptr = 22; + 'b000000000000000000000000000000000000000001??????????????????????: ptr = 23; + 'b00000000000000000000000000000000000000001???????????????????????: ptr = 24; + 'b0000000000000000000000000000000000000001????????????????????????: ptr = 25; + 'b000000000000000000000000000000000000001?????????????????????????: ptr = 26; + 'b00000000000000000000000000000000000001??????????????????????????: ptr = 27; + 'b0000000000000000000000000000000000001???????????????????????????: ptr = 28; + 'b000000000000000000000000000000000001????????????????????????????: ptr = 29; + 'b00000000000000000000000000000000001?????????????????????????????: ptr = 30; + 'b0000000000000000000000000000000001??????????????????????????????: ptr = 31; + 'b000000000000000000000000000000001???????????????????????????????: ptr = 32; + 'b00000000000000000000000000000001????????????????????????????????: ptr = 33; + 'b0000000000000000000000000000001?????????????????????????????????: ptr = 34; + 'b000000000000000000000000000001??????????????????????????????????: ptr = 35; + 'b00000000000000000000000000001???????????????????????????????????: ptr = 36; + 'b0000000000000000000000000001????????????????????????????????????: ptr = 37; + 'b000000000000000000000000001?????????????????????????????????????: ptr = 38; + 'b00000000000000000000000001??????????????????????????????????????: ptr = 39; + 'b0000000000000000000000001???????????????????????????????????????: ptr = 40; + 'b000000000000000000000001????????????????????????????????????????: ptr = 41; + 'b00000000000000000000001?????????????????????????????????????????: ptr = 42; + 'b0000000000000000000001??????????????????????????????????????????: ptr = 43; + 'b000000000000000000001???????????????????????????????????????????: ptr = 44; + 'b00000000000000000001????????????????????????????????????????????: ptr = 45; + 'b0000000000000000001?????????????????????????????????????????????: ptr = 46; + 'b000000000000000001??????????????????????????????????????????????: ptr = 47; + 'b00000000000000001???????????????????????????????????????????????: ptr = 48; + 'b0000000000000001????????????????????????????????????????????????: ptr = 49; + 'b000000000000001?????????????????????????????????????????????????: ptr = 50; + 'b00000000000001??????????????????????????????????????????????????: ptr = 51; + 'b0000000000001???????????????????????????????????????????????????: ptr = 52; + 'b000000000001????????????????????????????????????????????????????: ptr = 53; + 'b00000000001?????????????????????????????????????????????????????: ptr = 54; + 'b0000000001??????????????????????????????????????????????????????: ptr = 55; + 'b000000001???????????????????????????????????????????????????????: ptr = 56; + 'b00000001????????????????????????????????????????????????????????: ptr = 57; + 'b0000001?????????????????????????????????????????????????????????: ptr = 58; + 'b000001??????????????????????????????????????????????????????????: ptr = 59; + 'b00001???????????????????????????????????????????????????????????: ptr = 60; + 'b0001????????????????????????????????????????????????????????????: ptr = 61; + 'b001?????????????????????????????????????????????????????????????: ptr = 62; + 'b01??????????????????????????????????????????????????????????????: ptr = 63; + 'b1???????????????????????????????????????????????????????????????: ptr = 64; + default : ptr = 0; + endcase + return ptr; +endfunction + +function ByteEn convertBytePtr2ByteEn (DataBytePtr bytePtr); + ByteEn byteEn = 0; + case(bytePtr) + 1 : byteEn = 'h0000000000000001; + 2 : byteEn = 'h0000000000000003; + 3 : byteEn = 'h0000000000000007; + 4 : byteEn = 'h000000000000000F; + 5 : byteEn = 'h000000000000001F; + 6 : byteEn = 'h000000000000003F; + 7 : byteEn = 'h000000000000007F; + 8 : byteEn = 'h00000000000000FF; + 9 : byteEn = 'h00000000000001FF; + 10 : byteEn = 'h00000000000003FF; + 11 : byteEn = 'h00000000000007FF; + 12 : byteEn = 'h0000000000000FFF; + 13 : byteEn = 'h0000000000001FFF; + 14 : byteEn = 'h0000000000003FFF; + 15 : byteEn = 'h0000000000007FFF; + 16 : byteEn = 'h000000000000FFFF; + 17 : byteEn = 'h000000000001FFFF; + 18 : byteEn = 'h000000000003FFFF; + 19 : byteEn = 'h000000000007FFFF; + 20 : byteEn = 'h00000000000FFFFF; + 21 : byteEn = 'h00000000001FFFFF; + 22 : byteEn = 'h00000000003FFFFF; + 23 : byteEn = 'h00000000007FFFFF; + 24 : byteEn = 'h0000000000FFFFFF; + 25 : byteEn = 'h0000000001FFFFFF; + 26 : byteEn = 'h0000000003FFFFFF; + 27 : byteEn = 'h0000000007FFFFFF; + 28 : byteEn = 'h000000000FFFFFFF; + 29 : byteEn = 'h000000001FFFFFFF; + 30 : byteEn = 'h000000003FFFFFFF; + 31 : byteEn = 'h000000007FFFFFFF; + 32 : byteEn = 'h00000000FFFFFFFF; + 33 : byteEn = 'h00000001FFFFFFFF; + 34 : byteEn = 'h00000003FFFFFFFF; + 35 : byteEn = 'h00000007FFFFFFFF; + 36 : byteEn = 'h0000000FFFFFFFFF; + 37 : byteEn = 'h0000001FFFFFFFFF; + 38 : byteEn = 'h0000003FFFFFFFFF; + 39 : byteEn = 'h0000007FFFFFFFFF; + 40 : byteEn = 'h000000FFFFFFFFFF; + 41 : byteEn = 'h000001FFFFFFFFFF; + 42 : byteEn = 'h000003FFFFFFFFFF; + 43 : byteEn = 'h000007FFFFFFFFFF; + 44 : byteEn = 'h00000FFFFFFFFFFF; + 45 : byteEn = 'h00001FFFFFFFFFFF; + 46 : byteEn = 'h00003FFFFFFFFFFF; + 47 : byteEn = 'h00007FFFFFFFFFFF; + 48 : byteEn = 'h0000FFFFFFFFFFFF; + 49 : byteEn = 'h0001FFFFFFFFFFFF; + 50 : byteEn = 'h0003FFFFFFFFFFFF; + 51 : byteEn = 'h0007FFFFFFFFFFFF; + 52 : byteEn = 'h000FFFFFFFFFFFFF; + 53 : byteEn = 'h001FFFFFFFFFFFFF; + 54 : byteEn = 'h003FFFFFFFFFFFFF; + 55 : byteEn = 'h007FFFFFFFFFFFFF; + 56 : byteEn = 'h00FFFFFFFFFFFFFF; + 57 : byteEn = 'h01FFFFFFFFFFFFFF; + 58 : byteEn = 'h03FFFFFFFFFFFFFF; + 59 : byteEn = 'h07FFFFFFFFFFFFFF; + 60 : byteEn = 'h0FFFFFFFFFFFFFFF; + 61 : byteEn = 'h1FFFFFFFFFFFFFFF; + 62 : byteEn = 'h3FFFFFFFFFFFFFFF; + 63 : byteEn = 'h7FFFFFFFFFFFFFFF; + 64 : byteEn = 'hFFFFFFFFFFFFFFFF; + default : byteEn = 0; + endcase + return byteEn; +endfunction + +function DWordByteEn convertDWordOffset2FirstByteEn (ByteModDWord dwOffset); + DWordByteEn dwByteEn = 0; + case(dwOffset) + 0: dwByteEn = 'b1111; + 1: dwByteEn = 'b1110; + 2: dwByteEn = 'b1100; + 3: dwByteEn = 'b1000; + default: dwByteEn = 'b0000; + endcase + return dwByteEn; +endfunction + +function DWordByteEn convertDWordOffset2LastByteEn (ByteModDWord dwOffset); + DWordByteEn dwByteEn = 0; + case(dwOffset) + 0: dwByteEn = 'b0001; + 1: dwByteEn = 'b0011; + 2: dwByteEn = 'b0111; + 3: dwByteEn = 'b1111; + default: dwByteEn = 'b0000; + endcase + return dwByteEn; +endfunction + +function Data selectDataByByteMask (Data data, ByteEn byteEn); + Data result = 0; + for (Integer byteIdx = 0; byteIdx < valueOf(BYTE_EN_WIDTH); byteIdx = byteIdx + 1) begin + let bitIdxLo = byteIdx * valueOf(BYTE_WIDTH); + let bitIdxHi = (byteIdx + 1) * valueOf(BYTE_WIDTH) - 1; + if (byteEn[byteIdx] == 1'b1) begin + result[bitIdxHi:bitIdxLo] = Byte'(data[bitIdxHi:bitIdxLo]); + end + end + return result; +endfunction + +// DWordPtr strarts from 0 not 1 to align to PcieTlpIsEop +function DataDwordPtr convertByteEn2DwordPtr (ByteEn byteEn); + DataDwordPtr ptr = 0; + case(byteEn) matches + 'b0000000000000000000000000000000000000000000000000000000000000001: ptr = 0; + 'b000000000000000000000000000000000000000000000000000000000000001?: ptr = 0; + 'b00000000000000000000000000000000000000000000000000000000000001??: ptr = 0; + 'b0000000000000000000000000000000000000000000000000000000000001???: ptr = 0; + 'b000000000000000000000000000000000000000000000000000000000001????: ptr = 1; + 'b00000000000000000000000000000000000000000000000000000000001?????: ptr = 1; + 'b0000000000000000000000000000000000000000000000000000000001??????: ptr = 1; + 'b000000000000000000000000000000000000000000000000000000001???????: ptr = 1; + 'b00000000000000000000000000000000000000000000000000000001????????: ptr = 2; + 'b0000000000000000000000000000000000000000000000000000001?????????: ptr = 2; + 'b000000000000000000000000000000000000000000000000000001??????????: ptr = 2; + 'b00000000000000000000000000000000000000000000000000001???????????: ptr = 2; + 'b0000000000000000000000000000000000000000000000000001????????????: ptr = 3; + 'b000000000000000000000000000000000000000000000000001?????????????: ptr = 3; + 'b00000000000000000000000000000000000000000000000001??????????????: ptr = 3; + 'b0000000000000000000000000000000000000000000000001???????????????: ptr = 3; + 'b000000000000000000000000000000000000000000000001????????????????: ptr = 4; + 'b00000000000000000000000000000000000000000000001?????????????????: ptr = 4; + 'b0000000000000000000000000000000000000000000001??????????????????: ptr = 4; + 'b000000000000000000000000000000000000000000001???????????????????: ptr = 4; + 'b00000000000000000000000000000000000000000001????????????????????: ptr = 5; + 'b0000000000000000000000000000000000000000001?????????????????????: ptr = 5; + 'b000000000000000000000000000000000000000001??????????????????????: ptr = 5; + 'b00000000000000000000000000000000000000001???????????????????????: ptr = 5; + 'b0000000000000000000000000000000000000001????????????????????????: ptr = 6; + 'b000000000000000000000000000000000000001?????????????????????????: ptr = 6; + 'b00000000000000000000000000000000000001??????????????????????????: ptr = 6; + 'b0000000000000000000000000000000000001???????????????????????????: ptr = 6; + 'b000000000000000000000000000000000001????????????????????????????: ptr = 7; + 'b00000000000000000000000000000000001?????????????????????????????: ptr = 7; + 'b0000000000000000000000000000000001??????????????????????????????: ptr = 7; + 'b000000000000000000000000000000001???????????????????????????????: ptr = 7; + 'b00000000000000000000000000000001????????????????????????????????: ptr = 8; + 'b0000000000000000000000000000001?????????????????????????????????: ptr = 8; + 'b000000000000000000000000000001??????????????????????????????????: ptr = 8; + 'b00000000000000000000000000001???????????????????????????????????: ptr = 8; + 'b0000000000000000000000000001????????????????????????????????????: ptr = 9; + 'b000000000000000000000000001?????????????????????????????????????: ptr = 9; + 'b00000000000000000000000001??????????????????????????????????????: ptr = 9; + 'b0000000000000000000000001???????????????????????????????????????: ptr = 9; + 'b000000000000000000000001????????????????????????????????????????: ptr = 10; + 'b00000000000000000000001?????????????????????????????????????????: ptr = 10; + 'b0000000000000000000001??????????????????????????????????????????: ptr = 10; + 'b000000000000000000001???????????????????????????????????????????: ptr = 10; + 'b00000000000000000001????????????????????????????????????????????: ptr = 11; + 'b0000000000000000001?????????????????????????????????????????????: ptr = 11; + 'b000000000000000001??????????????????????????????????????????????: ptr = 11; + 'b00000000000000001???????????????????????????????????????????????: ptr = 11; + 'b0000000000000001????????????????????????????????????????????????: ptr = 12; + 'b000000000000001?????????????????????????????????????????????????: ptr = 12; + 'b00000000000001??????????????????????????????????????????????????: ptr = 12; + 'b0000000000001???????????????????????????????????????????????????: ptr = 12; + 'b000000000001????????????????????????????????????????????????????: ptr = 13; + 'b00000000001?????????????????????????????????????????????????????: ptr = 13; + 'b0000000001??????????????????????????????????????????????????????: ptr = 13; + 'b000000001???????????????????????????????????????????????????????: ptr = 13; + 'b00000001????????????????????????????????????????????????????????: ptr = 14; + 'b0000001?????????????????????????????????????????????????????????: ptr = 14; + 'b000001??????????????????????????????????????????????????????????: ptr = 14; + 'b00001???????????????????????????????????????????????????????????: ptr = 14; + 'b0001????????????????????????????????????????????????????????????: ptr = 15; + 'b001?????????????????????????????????????????????????????????????: ptr = 15; + 'b01??????????????????????????????????????????????????????????????: ptr = 15; + 'b1???????????????????????????????????????????????????????????????: ptr = 15; + default : ptr = 0; + endcase + return ptr; +endfunction + +function Data getDataLowBytes(Data data, DataBytePtr ptr); + Data temp = 0; + case(ptr) + 1 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*1 -1 : 0])); + 2 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*2 -1 : 0])); + 3 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*3 -1 : 0])); + 4 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*4 -1 : 0])); + 5 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*5 -1 : 0])); + 6 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*6 -1 : 0])); + 7 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*7 -1 : 0])); + 8 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*8 -1 : 0])); + 9 : temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*9 -1 : 0])); + 10: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*10-1 : 0])); + 11: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*11-1 : 0])); + 12: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*12-1 : 0])); + 13: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*13-1 : 0])); + 14: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*14-1 : 0])); + 15: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*15-1 : 0])); + 16: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*16-1 : 0])); + 17: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*17-1 : 0])); + 18: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*18-1 : 0])); + 19: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*19-1 : 0])); + 20: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*20-1 : 0])); + 21: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*21-1 : 0])); + 22: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*22-1 : 0])); + 23: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*23-1 : 0])); + 24: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*24-1 : 0])); + 25: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*25-1 : 0])); + 26: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*26-1 : 0])); + 27: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*27-1 : 0])); + 28: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*28-1 : 0])); + 29: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*29-1 : 0])); + 30: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*30-1 : 0])); + 31: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*31-1 : 0])); + 32: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*32-1 : 0])); + 33: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*33-1 : 0])); + 34: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*34-1 : 0])); + 35: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*35-1 : 0])); + 36: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*36-1 : 0])); + 37: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*37-1 : 0])); + 38: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*38-1 : 0])); + 39: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*39-1 : 0])); + 40: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*40-1 : 0])); + 41: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*41-1 : 0])); + 42: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*42-1 : 0])); + 43: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*43-1 : 0])); + 44: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*44-1 : 0])); + 45: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*45-1 : 0])); + 46: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*46-1 : 0])); + 47: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*47-1 : 0])); + 48: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*48-1 : 0])); + 49: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*49-1 : 0])); + 50: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*50-1 : 0])); + 51: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*51-1 : 0])); + 52: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*52-1 : 0])); + 53: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*53-1 : 0])); + 54: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*54-1 : 0])); + 55: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*55-1 : 0])); + 56: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*56-1 : 0])); + 57: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*57-1 : 0])); + 58: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*58-1 : 0])); + 59: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*59-1 : 0])); + 60: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*60-1 : 0])); + 61: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*61-1 : 0])); + 62: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*62-1 : 0])); + 63: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*63-1 : 0])); + 64: temp = zeroExtend(Data'(data[valueOf(BYTE_WIDTH)*64-1 : 0])); + default: temp = 0; + endcase + return temp; +endfunction + +function DmaMemAddr getAddrLowBits(DmaMemAddr addr, Bit#(TLog#(DMA_MEM_ADDR_WIDTH)) ptr); + DmaMemAddr temp = 0; + case(ptr) + 1 : temp = zeroExtend(DmaMemAddr'(addr[1 -1:0])); + 2 : temp = zeroExtend(DmaMemAddr'(addr[2 -1:0])); + 3 : temp = zeroExtend(DmaMemAddr'(addr[3 -1:0])); + 4 : temp = zeroExtend(DmaMemAddr'(addr[4 -1:0])); + 5 : temp = zeroExtend(DmaMemAddr'(addr[5 -1:0])); + 6 : temp = zeroExtend(DmaMemAddr'(addr[6 -1:0])); + 7 : temp = zeroExtend(DmaMemAddr'(addr[7 -1:0])); + 8 : temp = zeroExtend(DmaMemAddr'(addr[8 -1:0])); + 9 : temp = zeroExtend(DmaMemAddr'(addr[9 -1:0])); + 10: temp = zeroExtend(DmaMemAddr'(addr[10-1:0])); + 11: temp = zeroExtend(DmaMemAddr'(addr[11-1:0])); + 12: temp = zeroExtend(DmaMemAddr'(addr[12-1:0])); + 13: temp = zeroExtend(DmaMemAddr'(addr[13-1:0])); + 14: temp = zeroExtend(DmaMemAddr'(addr[14-1:0])); + 15: temp = zeroExtend(DmaMemAddr'(addr[15-1:0])); + 16: temp = zeroExtend(DmaMemAddr'(addr[16-1:0])); + 17: temp = zeroExtend(DmaMemAddr'(addr[17-1:0])); + 18: temp = zeroExtend(DmaMemAddr'(addr[18-1:0])); + 19: temp = zeroExtend(DmaMemAddr'(addr[19-1:0])); + 20: temp = zeroExtend(DmaMemAddr'(addr[20-1:0])); + 21: temp = zeroExtend(DmaMemAddr'(addr[21-1:0])); + 22: temp = zeroExtend(DmaMemAddr'(addr[22-1:0])); + 23: temp = zeroExtend(DmaMemAddr'(addr[23-1:0])); + 24: temp = zeroExtend(DmaMemAddr'(addr[24-1:0])); + 25: temp = zeroExtend(DmaMemAddr'(addr[25-1:0])); + 26: temp = zeroExtend(DmaMemAddr'(addr[26-1:0])); + 27: temp = zeroExtend(DmaMemAddr'(addr[27-1:0])); + 28: temp = zeroExtend(DmaMemAddr'(addr[28-1:0])); + 29: temp = zeroExtend(DmaMemAddr'(addr[29-1:0])); + 30: temp = zeroExtend(DmaMemAddr'(addr[30-1:0])); + 31: temp = zeroExtend(DmaMemAddr'(addr[31-1:0])); + 32: temp = zeroExtend(DmaMemAddr'(addr[32-1:0])); + 33: temp = zeroExtend(DmaMemAddr'(addr[33-1:0])); + 34: temp = zeroExtend(DmaMemAddr'(addr[34-1:0])); + 35: temp = zeroExtend(DmaMemAddr'(addr[35-1:0])); + 36: temp = zeroExtend(DmaMemAddr'(addr[36-1:0])); + 37: temp = zeroExtend(DmaMemAddr'(addr[37-1:0])); + 38: temp = zeroExtend(DmaMemAddr'(addr[38-1:0])); + 39: temp = zeroExtend(DmaMemAddr'(addr[39-1:0])); + 40: temp = zeroExtend(DmaMemAddr'(addr[40-1:0])); + 41: temp = zeroExtend(DmaMemAddr'(addr[41-1:0])); + 42: temp = zeroExtend(DmaMemAddr'(addr[42-1:0])); + 43: temp = zeroExtend(DmaMemAddr'(addr[43-1:0])); + 44: temp = zeroExtend(DmaMemAddr'(addr[44-1:0])); + 45: temp = zeroExtend(DmaMemAddr'(addr[45-1:0])); + 46: temp = zeroExtend(DmaMemAddr'(addr[46-1:0])); + 47: temp = zeroExtend(DmaMemAddr'(addr[47-1:0])); + 48: temp = zeroExtend(DmaMemAddr'(addr[48-1:0])); + 49: temp = zeroExtend(DmaMemAddr'(addr[49-1:0])); + 50: temp = zeroExtend(DmaMemAddr'(addr[50-1:0])); + 51: temp = zeroExtend(DmaMemAddr'(addr[51-1:0])); + 52: temp = zeroExtend(DmaMemAddr'(addr[52-1:0])); + 53: temp = zeroExtend(DmaMemAddr'(addr[53-1:0])); + 54: temp = zeroExtend(DmaMemAddr'(addr[54-1:0])); + 55: temp = zeroExtend(DmaMemAddr'(addr[55-1:0])); + 56: temp = zeroExtend(DmaMemAddr'(addr[56-1:0])); + 57: temp = zeroExtend(DmaMemAddr'(addr[57-1:0])); + 58: temp = zeroExtend(DmaMemAddr'(addr[58-1:0])); + 59: temp = zeroExtend(DmaMemAddr'(addr[59-1:0])); + 60: temp = zeroExtend(DmaMemAddr'(addr[60-1:0])); + 61: temp = zeroExtend(DmaMemAddr'(addr[61-1:0])); + 62: temp = zeroExtend(DmaMemAddr'(addr[62-1:0])); + 63: temp = zeroExtend(DmaMemAddr'(addr[63-1:0])); + default: temp = 0; + endcase + return temp; +endfunction + +typedef 32 CNTFIFO_SIZE_WIDTH; +typedef UInt#(CNTFIFO_SIZE_WIDTH) FifoSize; + +interface CounteredFIFOF#(type t); + method Action enq (t x); + method Action deq; + method t first; + method Action clear; + method Bool notFull; + method Bool notEmpty; + method FifoSize getCurSize; +endinterface + +module mkCounteredFIFOF#(Integer depth)(CounteredFIFOF#(t)) provisos(Bits#(t, tSz)); + Wire#(Bool) hasDeqCall <- mkDWire(False); + Wire#(Bool) hasEnqCall <- mkDWire(False); + Reg#(FifoSize) curSize <- mkReg(0); + FIFOF#(t) fifo <- mkSizedFIFOF(depth); + + rule updateSize; + case({pack(hasEnqCall), pack(hasDeqCall)}) + 2'b10: curSize <= curSize + 1; + 2'b01: curSize <= curSize -1; + default: curSize <= curSize; + endcase + endrule + + method Action enq (t x); + fifo.enq(x); + hasEnqCall <= True; + endmethod + + method Action deq; + fifo.deq; + hasDeqCall <= True; + endmethod + + method t first = fifo.first; + method Action clear = fifo.clear; + method Bool notFull = fifo.notFull; + method Bool notEmpty = fifo.notEmpty; + + method FifoSize getCurSize = curSize; +endmodule + +function ByteParity calByteParity(Byte data); + return (data[0] ^ data[1] ^ data[2] ^ data[3] ^ data[4] ^ data[5] ^ data[6] ^ data[7]); +endfunction + +typedef Bit#(BYTE_EN_WIDTH) DataParity; +typedef Bit#(TDiv#(DWORD_WIDTH, BYTE_WIDTH)) DwordParity; + +function DataParity calDataParity(Data data); + Vector#(BYTE_EN_WIDTH, Byte) dataBytes = unpack(data); + Vector#(BYTE_EN_WIDTH, ByteParity) dataParities= newVector(); + for (Integer idx = 0; idx < valueOf(BYTE_EN_WIDTH); idx = idx + 1) begin + dataParities[idx] = calByteParity(dataBytes[idx]); + end + return pack(dataParities); +endfunction diff --git a/src/XilBdmaPrioritySearchBuffer.bsv b/src/XilBdmaPrioritySearchBuffer.bsv new file mode 100644 index 0000000..f8fbbcc --- /dev/null +++ b/src/XilBdmaPrioritySearchBuffer.bsv @@ -0,0 +1,39 @@ +import Vector :: *; + +interface PrioritySearchBuffer#(numeric type depth, type t_tag, type t_val); + method Action enq(t_tag tag, t_val value); + method ActionValue#(Maybe#(t_val)) search(t_tag tag); +endinterface + +// A fifo like buffer, if buffer is full, new enq will lead to deq of the oldest element. +// if the same tags exist in the buffer, the newest enququed element will be returned. +module mkPrioritySearchBuffer#(numeric depth)(PrioritySearchBuffer#(depth, t_tag, t_val)) provisos ( + Bits#(t_tag, sz_tag), + Bits#(t_val, sz_val), + Eq#(t_tag), + FShow#(t_tag), + FShow#(t_val), + FShow#(Tuple2#(t_tag, t_val)) +); + Vector#(depth, Reg#(Maybe#(Tuple2#(t_tag, t_val)))) bufferVec <- replicateM(mkReg(tagged Invalid)); + + method Action enq(t_tag tag, t_val value); + for (Integer idx=0; idx < valueOf(depth)-1; idx=idx+1) begin + bufferVec[idx+1] <= bufferVec[idx]; + end + bufferVec[0] <= tagged Valid tuple2(tag, value); + endmethod + + method ActionValue#(Maybe#(t_val)) search(t_tag tag); + Maybe#(t_val) ret = tagged Invalid; + for (Integer idx=valueOf(depth)-1; idx >=0 ; idx=idx-1) begin + if (bufferVec[idx] matches tagged Valid .readTuple) begin + let {readTag, readVal} = readTuple; + if (readTag == tag) begin + ret = tagged Valid readVal; + end + end + end + return ret; + endmethod +endmodule \ No newline at end of file diff --git a/src/XilBdmaSimpleModeUtils.bsv b/src/XilBdmaSimpleModeUtils.bsv new file mode 100644 index 0000000..436e2a4 --- /dev/null +++ b/src/XilBdmaSimpleModeUtils.bsv @@ -0,0 +1,280 @@ +import Vector::*; +import RegFile::*; +import GetPut::*; +import SemiFifo::*; +import FIFOF::*; +import BRAM::*; + +import XilBdmaDmaTypes::*; +import XilBdmaStreamUtils::*; + +// function Bit#(TMul#(2,n)) doubleExtend(Bit#(n) lo, Bit#(n) hi) provisos(Add#(1, _a, n), Add#(_b, n, TMul#(2, n))); +// return zeroExtend(lo) | (zeroExtend(hi) << valueOf(n)); +// endfunction + +// interface DmaSimpleCore; +// // from H2C user Ifc, where the addr is already aligned to DWord +// interface FifoIn#(CsrRequest) reqFifoIn; +// interface FifoOut#(CsrResponse) respFifoOut; +// // new dma descriptor (conncet to H2C user Ifc) +// interface Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOut; +// endinterface + +// (* synthesize *) +// module mkDmaSimpleCore(DmaSimpleCore); +// FIFOF#(CsrRequest) reqFifo <- mkFIFOF; +// FIFOF#(CsrResponse) respFifo <- mkFIFOF; + +// RegFile#(DmaRegIndex, DmaCsrValue) controlRegFile <- mkRegFileFull; + +// Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOutIfc = newVector; +// Vector#(DMA_PATH_NUM, PhyAddrBram) paTableBram = newVector; +// for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin +// paTableBram[pathIdx] <- mkPhyAddrBram; +// c2hReqFifoOutIfc[pathIdx] = paTableBram[pathIdx].paReqFifoOut; +// end + +// function DmaRegBlockIdx getRegBlockIdx (DmaCsrAddr csrAddr); +// DmaRegBlockIdx idx = truncate(csrAddr >> valueOf(DMA_INTERNAL_REG_BLOCK_WIDTH)); +// return idx; +// endfunction + +// function Tuple3#(DmaRegIndex, DmaRegIndex, DmaRegIndex) getDescRegIdxs(DmaPathNo pathIdx); +// DmaRegIndex baseRegIdx = 0; +// Tuple3#(DmaRegIndex, DmaRegIndex, DmaRegIndex) result = tuple3(0, 0, 0); +// if (pathIdx == 0) +// result = tuple3(fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_LO_OFFSET))), +// fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_VA_HI_OFFSET))), +// fromInteger(valueOf(TAdd#(REG_ENGINE_0_OFFSET, REG_REQ_BYTES_OFFSET)))); +// else if (pathIdx == 1) +// result = tuple3(fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_LO_OFFSET))), +// fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_VA_HI_OFFSET))), +// fromInteger(valueOf(TAdd#(REG_ENGINE_1_OFFSET, REG_REQ_BYTES_OFFSET)))); +// return result; +// endfunction + +// function ActionValue#(DmaRequest) genVaReq(RegFile#(DmaRegIndex, DmaCsrValue) regFile, DmaPathNo pathIdx, Bool isWrite); +// actionvalue +// let {addrLoIdx, addrHiIdx, lenIdx} = getDescRegIdxs(pathIdx); +// let addrLo = regFile.sub(addrLoIdx); +// let addrHi = regFile.sub(addrHiIdx); +// let length = regFile.sub(lenIdx); +// let desc = DmaRequest { +// startAddr : doubleExtend(addrLo, addrHi), +// length : length, +// isWrite : isWrite +// }; +// return desc; +// endactionvalue +// endfunction + +// rule map; +// let req = reqFifo.first; +// reqFifo.deq; +// let blockIdx = getRegBlockIdx(req.addr); +// DmaRegIndex regIdx = truncate(req.addr); +// // Write Request +// if (req.isWrite) begin +// // Block 0 : DMA Inner Ctrl Regs +// if (blockIdx == 0) begin +// if (regIdx == fromInteger(valueOf(REG_ENGINE_0_OFFSET)) || regIdx == fromInteger(valueOf(REG_ENGINE_1_OFFSET))) begin +// DmaPathNo pathIdx = (regIdx == fromInteger(valueOf(REG_ENGINE_0_OFFSET))) ? 0 : 1; +// let desc <- genVaReq(controlRegFile, pathIdx, unpack(truncate(req.value))); +// $display($time, "ns SIM INFO @ mkDmaSimpleCore: doorbell%d triggerd, va:%h bytes:%d", pathIdx, desc.startAddr, desc.length); +// if (desc.startAddr > 0) begin +// paTableBram[pathIdx].vaReqFifoIn.enq(desc); +// end +// end +// // if not doorbell, write the register +// else begin +// controlRegFile.upd(regIdx, req.value); +// // $display($time, "ns SIM INFO @ mkDmaSimpleCore: register writing regIdx:%d value:%d", regIdx, req.value); +// end +// end +// // Block 1~2 : Channel 0 Va-Pa Table +// else if (blockIdx <= fromInteger(valueOf(PA_TABLE0_BLOCK_OFFSET))) begin +// let vaReq = CsrRequest { +// addr : req.addr - fromInteger(valueOf(DMA_PA_TABLE0_OFFSET)), +// value : req.value, +// isWrite : True +// }; +// // $display($time, "ns SIM INFO @ mkDmaSimpleCore: paTableBram0 writing addr:%d value:%d", vaReq.addr, req.value); +// paTableBram[0].paSetFifoIn.enq(vaReq); +// end +// // Block 3~4 : Channel 1 Va-Pa Table +// else begin +// let vaReq = CsrRequest { +// addr : req.addr - fromInteger(valueOf(DMA_PA_TABLE1_OFFSET)), +// value : req.value, +// isWrite : True +// }; +// paTableBram[1].paSetFifoIn.enq(vaReq); +// // $display($time, "ns SIM INFO @ mkDmaSimpleCore: paTableBram1 writing addr:%d value:%d", vaReq.addr, req.value); +// end +// end +// // Read Request +// else begin +// if (blockIdx == 0 && regIdx <= fromInteger(valueOf(DMA_USING_REG_LEN))) begin +// let value = controlRegFile.sub(regIdx); +// let resp = CsrResponse { +// addr : req.addr, +// value : value +// }; +// respFifo.enq(resp); +// end +// else begin +// let resp = CsrResponse { +// addr : req.addr, +// value : 0 +// }; +// respFifo.enq(resp); +// end +// end +// endrule + +// interface reqFifoIn = convertFifoToFifoIn(reqFifo); +// interface respFifoOut = convertFifoToFifoOut(respFifo); +// interface c2hReqFifoOut = c2hReqFifoOutIfc; +// endmodule + +// typedef 3 BRAM_LATENCY; + +// interface PhyAddrBram; +// // Address transfer +// interface FifoIn#(DmaRequest) vaReqFifoIn; +// interface FifoOut#(DmaRequest) paReqFifoOut; +// // va-pa table set +// interface FifoIn#(CsrRequest) paSetFifoIn; +// endinterface + +// // This module does not check if the request address is valid(in MR) +// (* synthesize *) +// module mkPhyAddrBram(PhyAddrBram); +// FIFOF#(DmaRequest) vaReqFifo <- mkFIFOF; +// FIFOF#(DmaRequest) paReqFifo <- mkFIFOF; +// FIFOF#(CsrRequest) paSetFifo <- mkFIFOF; +// FIFOF#(DmaRequest) pendingFifo <- mkSizedFIFOF(valueOf(BRAM_LATENCY)); + +// BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrLoBram <- mkBRAM1Server(defaultValue); +// BRAM1Port#(PaBramAddr, DmaCsrValue) phyAddrHiBram <- mkBRAM1Server(defaultValue); + +// DmaMemAddr pageMask = (valueOf(IS_HUGE_PAGE)>0) ? 'h1FFFFF : 'hFFF; + +// function Bool isLoAddr(DmaCsrAddr addr); +// return unpack(addr[0]); +// endfunction + +// // The Csr Address map to Bram Address. As 0:pa_lo[0], 1:pa_hi[0], 2:pa_lo[1], 3:pa_hi[1],..., csrAddr:pa_lo[csrAddr/2]. +// function PaBramAddr convertCsrAddrToBramAddr(DmaCsrAddr csrAddr); +// let addr = csrAddr >> 1; +// return truncate(addr); +// endfunction + +// function PaBramAddr convertDmaAddrToBramAddr(DmaMemAddr dmaAddr); +// DmaMemAddr pageIdx = 0; +// if (valueOf(IS_HUGE_PAGE) > 0) begin +// pageIdx = (dmaAddr) >> valueOf(HUGE_PAGE_SIZE_WIDTH); +// end +// else begin +// pageIdx = (dmaAddr) >> valueOf(PAGE_SIZE_WIDTH); +// end +// return truncate(pageIdx); +// endfunction + +// rule putVaReq; +// // if is setting va-pa table +// if (paSetFifo.notEmpty) begin +// let paSet = paSetFifo.first; +// paSetFifo.deq; +// let bramAddr = convertCsrAddrToBramAddr(paSet.addr); +// let bramReq = BRAMRequest { +// write : True, +// responseOnWrite : False, +// address : bramAddr, +// datain : paSet.value +// }; +// if (isLoAddr(paSet.addr)) begin +// phyAddrLoBram.portA.request.put(bramReq); +// // $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa low:%h", bramAddr, bramReq.datain ); +// end +// else begin +// phyAddrHiBram.portA.request.put(bramReq); +// // $display($time, "ns SIM INFO @ mkPhyAddrBram: pa writing, va offset:%d, mapping pa high:%h", bramAddr, bramReq.datain); +// end + +// end +// // if is getting phy address +// else begin +// let vaReq = vaReqFifo.first; +// vaReqFifo.deq; +// let bramReq = BRAMRequest { +// write : False, +// responseOnWrite : False, +// address : convertDmaAddrToBramAddr(vaReq.startAddr), +// datain : 0 +// }; +// phyAddrLoBram.portA.request.put(bramReq); +// phyAddrHiBram.portA.request.put(bramReq); +// pendingFifo.enq(vaReq); +// // $display($time, "ns SIM INFO @ mkPhyAddrBram: receive pa mapping request, va:%h, bramAddr:%d", vaReq.startAddr, bramReq.address); +// end +// endrule + +// rule getPaReq; +// let pa_lo <- phyAddrLoBram.portA.response.get; +// let pa_hi <- phyAddrHiBram.portA.response.get; +// DmaMemAddr pa = doubleExtend(pa_lo, pa_hi); +// let oriReq = pendingFifo.first; +// pendingFifo.deq; +// // $display($time, "ns SIM INFO @ mkPhyAddrBram: got a pa mapping, va:%h pa:%h pa_lo:%h pa_hi:%h", oriReq.startAddr, pa, pa_lo, pa_hi); +// oriReq.startAddr = pa | (oriReq.startAddr & pageMask); +// paReqFifo.enq(oriReq); +// endrule + +// interface vaReqFifoIn = convertFifoToFifoIn(vaReqFifo); +// interface paReqFifoOut = convertFifoToFifoOut(paReqFifo); +// interface paSetFifoIn = convertFifoToFifoIn(paSetFifo); +// endmodule + +// typedef 12 DUMMY_ADDR_WIDTH; +// typedef Bit#(DUMMY_ADDR_WIDTH) DummyAddr; + +// interface GenericCsr; +// interface FifoIn#(CsrRequest) reqFifoIn; +// interface FifoOut#(CsrResponse) respFifoOut; +// endinterface + +// (* synthesize *) +// module mkDummyCsr(GenericCsr); +// FIFOF#(CsrRequest) reqFifo <- mkFIFOF; +// FIFOF#(CsrResponse) respFifo <- mkFIFOF; +// FIFOF#(DmaCsrAddr) pendingFifo <- mkSizedFIFOF(valueOf(BRAM_LATENCY)); +// BRAM1Port#(DummyAddr, DmaCsrValue) bram <- mkBRAM1Server(defaultValue); + +// rule request; +// let req = reqFifo.first; +// reqFifo.deq; +// pendingFifo.enq(req.addr); +// let bramReq = BRAMRequest { +// write : req.isWrite, +// responseOnWrite : False, +// address : truncate(req.addr), +// datain : req.value +// }; +// bram.portA.request.put(bramReq); +// endrule + +// rule response; +// let value <- bram.portA.response.get; +// let addr = pendingFifo.first; +// pendingFifo.deq; +// let resp = CsrResponse { +// addr : addr, +// value : value +// }; +// respFifo.enq(resp); +// endrule + +// interface reqFifoIn = convertFifoToFifoIn(reqFifo); +// interface respFifoOut = convertFifoToFifoOut(respFifo); +// endmodule diff --git a/src/XilBdmaStreamUtils.bsv b/src/XilBdmaStreamUtils.bsv new file mode 100755 index 0000000..bcfb156 --- /dev/null +++ b/src/XilBdmaStreamUtils.bsv @@ -0,0 +1,871 @@ +import Vector::*; +import FIFOF::*; +import GetPut::*; +import Connectable::*; +import SemiFifo::*; + +import XilBdmaPrimUtils::*; +import XilBdmaDmaTypes::*; +import XilBdmaPcieAxiStreamTypes::*; + +typedef 32 STREAM_SIZE_WIDTH; +typedef UInt#(STREAM_SIZE_WIDTH) StreamSize; +typedef Bit#(TAdd#(1, TLog#(STREAM_SIZE_WIDTH))) StreamSizeBitPtr; + +typedef struct { + DataStream stream; + DataBytePtr bytePtr; +} StreamWithPtr deriving(Bits, Bounded, Eq, FShow); + +interface StreamPipe; + interface FifoIn#(DataStream) streamFifoIn; + interface FifoOut#(DataStream) streamFifoOut; +endinterface + +interface StreamSplit; + interface FifoIn#(DataStream) inputStreamFifoIn; + interface FifoIn#(StreamSize) splitLocationFifoIn; + interface FifoOut#(DataStream) outputStreamFifoOut; +endinterface + +function Bool isByteEnZero(ByteEn byteEn); + return !unpack(byteEn[0]); +endfunction + +function Bool isByteEnFull(ByteEn byteEn); + return unpack(byteEn[valueOf(BYTE_EN_WIDTH)-1]); +endfunction + +function DataStream getEmptyStream (); + return DataStream { + data: 0, + byteEn: 0, + isFirst: False, + isLast: True + }; +endfunction + +function StreamWithPtr getEmptyStreamWithPtr (); + return StreamWithPtr { + stream : getEmptyStream, + bytePtr: 0 + }; +endfunction + +function DataBitPtr getMaxBitPtr (); + return fromInteger(valueOf(DATA_WIDTH)); +endfunction + +function DataBytePtr getMaxBytePtr (); + return fromInteger(valueOf(BYTE_EN_WIDTH)); +endfunction + +// Concat two DataStream frames into one. StreamA.isLast must be True, otherwise the function will return a empty frame to end the stream. +function Tuple2#(StreamWithPtr, StreamWithPtr) getConcatStream (StreamWithPtr streamA, StreamWithPtr streamB); + Bool isCallLegally = (streamA.stream.isLast && streamA.bytePtr <= getMaxBytePtr && streamA.bytePtr >= 0); + DataBitPtr bitPtrA = zeroExtend(streamA.bytePtr) << fromInteger(valueOf(BYTE_WIDTH_WIDTH)); + + // Fill the low PtrA bytes by streamA data + Data concatDataA = streamA.stream.data; + ByteEn concatByteEnA = streamA.stream.byteEn; + + // Fill the high bytes by streamB data + Data concatDataB = streamB.stream.data << bitPtrA; + ByteEn concatByteEnB = streamB.stream.byteEn << streamA.bytePtr; + Data concatData = concatDataA | concatDataB; + ByteEn concatByteEn = concatByteEnA | concatByteEnB; + + // Get the remain bytes of streamB data + DataBitPtr resBitPtr = getMaxBitPtr - bitPtrA; + DataBytePtr resBytePtr = getMaxBytePtr - streamA.bytePtr; + Data remainData = streamB.stream.data >> resBitPtr; + ByteEn remainByteEn = streamB.stream.byteEn >> resBytePtr; + + // Get if the concat frame is the last, i.e. can streamB be contained by the residual empty bytes + Bool isConcatStreamLast = streamB.stream.isLast; + DataBytePtr remainBytePtr = 0; + DataBytePtr concatStreamPtr = streamA.bytePtr; + if (resBytePtr < streamB.bytePtr ) begin + isConcatStreamLast = False; + remainBytePtr = streamB.bytePtr - resBytePtr; + concatStreamPtr = getMaxBytePtr; + end + else begin + concatStreamPtr = streamA.bytePtr + streamB.bytePtr; + end + + // package the return concatStream and remainStream + DataStream concatStream = getEmptyStream; + DataStream remainStream = getEmptyStream; + if(isCallLegally) begin + concatStream = DataStream { + data : concatData, + byteEn : concatByteEn, + isFirst: streamA.stream.isFirst, + isLast : isConcatStreamLast + }; + remainStream = DataStream { + data : remainData, + byteEn : remainByteEn, + isFirst: False, + isLast : True + }; + end + let concatStreamWithPtr = StreamWithPtr { + stream : concatStream, + bytePtr: concatStreamPtr + }; + let remainStreamWithPtr = StreamWithPtr { + stream : remainStream, + bytePtr: remainBytePtr + }; + return tuple2(concatStreamWithPtr, remainStreamWithPtr); +endfunction + +typedef 5 STREAM_SPLIT_LATENCY; +typedef 3 STREAM_SPLIT_INNER_LATENCY; + +(*synthesize*) +module mkStreamSplit(StreamSplit ifc); + + Reg#(StreamSize) streamByteCntReg <- mkReg(0); + + FIFOF#(StreamSize) splitLocationFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_INNER_LATENCY)); + FIFOF#(DataStream) inputFifo <- mkLFIFOF; + FIFOF#(DataStream) outputFifo <- mkLFIFOF; + FIFOF#(StreamWithPtr) prepareFifo <- mkLFIFOF; + FIFOF#(StreamWithPtr) assertFifo <- mkLFIFOF; + FIFOF#(DataBytePtr) splitPtrFifo <- mkSizedFIFOF(valueOf(STREAM_SPLIT_INNER_LATENCY)); + + Reg#(StreamWithPtr) remainStreamWpReg <- mkRegU; + + Reg#(Bool) hasRemainReg <- mkReg(False); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + Reg#(Bool) isSplittedReg <- mkReg(False); + + // Pipeline stage 1: get the bytePtr of the input stream frame + rule prepareStream; + let stream = inputFifo.first; + inputFifo.deq; + StreamWithPtr streamWithPtr = StreamWithPtr { + stream: stream, + bytePtr: convertByteEn2BytePtr(stream.byteEn) + }; + prepareFifo.enq(streamWithPtr); + endrule + + // Pipeline stage 2: assert if splitLocation in this beat and calculate the offsetBytePtr + rule assertSplitStream; + let stream = prepareFifo.first.stream; + let bytePtr = prepareFifo.first.bytePtr; + prepareFifo.deq; + let splitLocation = splitLocationFifo.first; + if (stream.isLast) begin + splitLocationFifo.deq; + end + DataBytePtr offsetBytePtr = 0; + let curLocation = unpack(zeroExtend(bytePtr)) + streamByteCntReg; + if (!isSplittedReg) begin + if (curLocation > splitLocation) begin + offsetBytePtr = truncate(pack(splitLocation - streamByteCntReg)); + end + else if (curLocation == splitLocation) begin + offsetBytePtr = bytePtr; + end + end + // $display($time, "ns SIM INFO @ mkStreamSplit: curLocation:%d, splitLocation:%d, offset:%d", curLocation, splitLocation, offsetBytePtr); + splitPtrFifo.enq(offsetBytePtr); + if (offsetBytePtr > 0 && !stream.isLast) begin + isSplittedReg <= True; + end + else if (stream.isLast) begin + isSplittedReg <= False; + end + streamByteCntReg <= stream.isLast ? 0 : streamByteCntReg + unpack(zeroExtend(bytePtr)); + assertFifo.enq(prepareFifo.first); + endrule + + // Pipeline stage 3: split the stream frame or output it without modify accroding to offsetBytePtr + rule execSplitStream; + // Only output remainStreamReg + if (hasRemainReg && hasLastRemainReg) begin + outputFifo.enq(remainStreamWpReg.stream); + hasRemainReg <= False; + hasLastRemainReg <= False; + end + // not the last remain stream + else if (assertFifo.notEmpty && splitPtrFifo.notEmpty) begin + let streamWp = assertFifo.first; + let offsetBytePtr = splitPtrFifo.first; + assertFifo.deq; + splitPtrFifo.deq; + // split location not in this beat, do nothing + if (!hasRemainReg && offsetBytePtr == 0) begin + outputFifo.enq(streamWp.stream); + hasRemainReg <= False; + hasLastRemainReg <= False; + end + // split the frame in this cycle to a isLast=True frame and a remain frame + else if (!hasRemainReg && offsetBytePtr > 0) begin + DataBitPtr offsetBitPtr = zeroExtend(offsetBytePtr) << valueOf(BYTE_WIDTH_WIDTH); + let splitStream = DataStream { + data: getDataLowBytes(streamWp.stream.data, offsetBytePtr), + byteEn: convertBytePtr2ByteEn(offsetBytePtr), + isFirst: streamWp.stream.isFirst, + isLast: True + }; + outputFifo.enq(splitStream); + let remainStream = DataStream { + data: streamWp.stream.data >> offsetBitPtr, + byteEn: streamWp.stream.byteEn >> offsetBytePtr, + isFirst: True, + isLast: True + }; + hasRemainReg <= streamWp.stream.isLast ? !isByteEnZero(remainStream.byteEn) : True; + hasLastRemainReg <= streamWp.stream.isLast; + remainStreamWpReg <= StreamWithPtr { + stream : remainStream, + bytePtr: streamWp.bytePtr - offsetBytePtr + }; + end + // concat the stream frame with the remainReg + else begin + let {concatStreamWp, remainStreamWp} = getConcatStream(remainStreamWpReg, streamWp); + outputFifo.enq(concatStreamWp.stream); + hasRemainReg <= streamWp.stream.isLast ? !isByteEnZero(remainStreamWp.stream.byteEn) : True; + hasLastRemainReg <= streamWp.stream.isLast; + remainStreamWpReg <= remainStreamWp; + end + end + endrule + + interface inputStreamFifoIn = convertFifoToFifoIn(inputFifo); + interface splitLocationFifoIn = convertFifoToFifoIn(splitLocationFifo); + interface outputStreamFifoOut = convertFifoToFifoOut(outputFifo); + +endmodule + +typedef 2 STREAM_SHIFT_LATENCY; + +// module mkStreamShift#(DataBytePtr offset)(StreamPipe); +// FIFOF#(DataStream) inFifo <- mkLFIFOF; +// FIFOF#(DataStream) outFifo <- mkFIFOF; + +// DataBytePtr resByte = getMaxBytePtr - offset; +// DataBitPtr offsetBits = zeroExtend(offset) << valueOf(BYTE_WIDTH_WIDTH); +// DataBitPtr resBits = getMaxBitPtr - offsetBits; + +// Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); +// Reg#(Bool) hasLastRemainReg <- mkReg(False); + +// function Bool isShiftStreamLast(DataStream stream); +// Bool isLast = False; +// if (offset > 0 && offset < getMaxBytePtr) begin +// isLast = stream.isLast && !unpack(stream.byteEn[resByte]); +// end +// else if (offset == 0) begin +// isLast = stream.isLast; +// end +// else begin +// isLast = False; +// end +// return isLast; +// endfunction + +// rule execShift; +// if (hasLastRemainReg) begin +// outFifo.enq(remainStreamReg); +// hasLastRemainReg <= False; +// remainStreamReg <= getEmptyStream; +// end +// else begin +// let stream = inFifo.first; +// inFifo.deq; +// let shiftStream = DataStream { +// data : (stream.data << offsetBits) | remainStreamReg.data, +// byteEn : (stream.byteEn << offset) | remainStreamReg.byteEn, +// isFirst : stream.isFirst, +// isLast : isShiftStreamLast(stream) +// }; +// let remainStream = DataStream { +// data : stream.data >> resBits, +// byteEn : stream.byteEn >> resByte, +// isFirst : False, +// isLast : True +// }; +// outFifo.enq(shiftStream); +// remainStreamReg <= remainStream; +// hasLastRemainReg <= stream.isLast && !isByteEnZero(remainStream.byteEn); +// end +// endrule + +// interface streamFifoIn = convertFifoToFifoIn(inFifo); +// interface streamFifoOut = convertFifoToFifoOut(outFifo); +// endmodule + +// interface StreamShiftComplex; +// interface FifoIn#(DataStream) streamFifoIn; +// interface FifoOut#(Tuple2#(DataStream, DataStream)) streamFifoOut; +// endinterface + +// module mkStreamShiftComplex#(DataBytePtr offset)(StreamShiftComplex); +// FIFOF#(DataStream) inFifo <- mkLFIFOF; +// FIFOF#(Tuple2#(DataStream, DataStream)) outFifo <- mkFIFOF; + +// DataBytePtr resByte = getMaxBytePtr - offset; +// DataBitPtr offsetBits = zeroExtend(offset) << valueOf(BYTE_WIDTH_WIDTH); +// DataBitPtr resBits = getMaxBitPtr - offsetBits; + +// Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); +// Reg#(Bool) hasLastRemainReg <- mkReg(False); + +// function Bool isShiftStreamLast(DataStream stream); +// Bool isLast = False; +// if (offset > 0 && offset < getMaxBytePtr) begin +// isLast = stream.isLast && !unpack(stream.byteEn[resByte]); +// end +// else if (offset == 0) begin +// isLast = stream.isLast; +// end +// else begin +// isLast = False; +// end +// return isLast; +// endfunction + +// rule execShift; +// if (hasLastRemainReg) begin +// outFifo.enq(tuple2(getEmptyStream, remainStreamReg)); +// hasLastRemainReg <= False; +// remainStreamReg <= getEmptyStream; +// end +// else begin +// let stream = inFifo.first; +// inFifo.deq; +// let shiftStream = DataStream { +// data : (stream.data << offsetBits) | remainStreamReg.data, +// byteEn : (stream.byteEn << offset) | remainStreamReg.byteEn, +// isFirst : stream.isFirst, +// isLast : isShiftStreamLast(stream) +// }; +// let remainStream = DataStream { +// data : stream.data >> resBits, +// byteEn : stream.byteEn >> resByte, +// isFirst : False, +// isLast : True +// }; +// outFifo.enq(tuple2(stream, shiftStream)); +// remainStreamReg <= remainStream; +// hasLastRemainReg <= stream.isLast && !isByteEnZero(remainStream.byteEn); +// end +// endrule + +// interface streamFifoIn = convertFifoToFifoIn(inFifo); +// interface streamFifoOut = convertFifoToFifoOut(outFifo); +// endmodule + +typedef enum { + Align0 = 0, + Align1 = 1, + Align2 = 2, + Align3 = 3 +} AlignDwMode deriving(Bits, Eq, Bounded, FShow); + +interface StreamShiftAlignToDw; + interface FifoIn#(DataStream) dataFifoIn; + interface FifoOut#(DataStream) dataFifoOut; + method Action setAlignMode(AlignDwMode align); +endinterface + +typedef 2 STREAM_ALIGN_DW_LATENCY; + +(*synthesize*) +module mkStreamShiftAlignToDw#(DataBytePtr offset)(StreamShiftAlignToDw); + FIFOF#(DataStream) dataInFifo <- mkLFIFOF; + // FIFOF#(DataStream) pipeFifo <- mkFIFOF; + FIFOF#(DataStream) dataOutFifo <- mkLFIFOF; + FIFOF#(AlignDwMode) alignModeFifo <- mkFIFOF; + + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + + DataBytePtr resByte = getMaxBytePtr - (offset + 3); + DataBitPtr offsetBits = zeroExtend(offset) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr resBits = zeroExtend(resByte) << valueOf(BYTE_WIDTH_WIDTH); + ByteEn byteEnMask1 = 1 << (offset); + ByteEn byteEnMask2 = 1 << (offset + 1) | byteEnMask1 ; + ByteEn byteEnMask3 = 1 << (offset + 2) | byteEnMask2; + + // rule pipe; + // pipeFifo.enq(dataInFifo.first); + // dataInFifo.deq; + // endrule + + rule execShift; + if (hasLastRemainReg) begin + dataOutFifo.enq(remainStreamReg); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else begin + // let stream = pipeFifo.first; + // pipeFifo.deq; + let stream = dataInFifo.first; + dataInFifo.deq; + let shiftStream = DataStream { + data : stream.data << offsetBits, + byteEn : stream.byteEn << offset , + isFirst : stream.isFirst, + isLast : stream.isLast + }; + let remainStream = DataStream { + data : stream.data >> resBits, + byteEn : stream.byteEn >> resByte, + isFirst : False, + isLast : True + }; + let alignMode = alignModeFifo.first; + if (stream.isLast) begin + alignModeFifo.deq; + end + case (alignMode) + Align1: begin + shiftStream.data = shiftStream.data << valueOf(TMul#(1, BYTE_WIDTH)) | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn << 1 | byteEnMask1 | remainStreamReg.byteEn; + remainStream.data = remainStream.data >> valueOf(TMul#(2, BYTE_WIDTH)); + remainStream.byteEn = remainStream.byteEn >> 2; + end + Align2: begin + shiftStream.data = shiftStream.data << valueOf(TMul#(2, BYTE_WIDTH)) | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn << 2 | byteEnMask2 | remainStreamReg.byteEn; + remainStream.data = remainStream.data >> valueOf(TMul#(1, BYTE_WIDTH)); + remainStream.byteEn = remainStream.byteEn >> 1; + end + Align3: begin + shiftStream.data = shiftStream.data << valueOf(TMul#(3, BYTE_WIDTH)) | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn << 3 | byteEnMask3 | remainStreamReg.byteEn; + remainStream.data = remainStream.data; + remainStream.byteEn = remainStream.byteEn; + end + default: begin + shiftStream.data = shiftStream.data | remainStreamReg.data; + shiftStream.byteEn = shiftStream.byteEn | remainStreamReg.byteEn; + remainStream.data = remainStream.data >> valueOf(TMul#(3, BYTE_WIDTH)); + remainStream.byteEn = remainStream.byteEn >> 3; + end + endcase + shiftStream.isLast = shiftStream.isLast && isByteEnZero(remainStream.byteEn); + dataOutFifo.enq(shiftStream); + remainStreamReg <= remainStream; + hasLastRemainReg <= stream.isLast && !isByteEnZero(remainStream.byteEn); + end + endrule + + method Action setAlignMode(AlignDwMode align); + alignModeFifo.enq(align); + endmethod + + interface dataFifoIn = convertFifoToFifoIn(dataInFifo); + interface dataFifoOut = convertFifoToFifoOut(dataOutFifo); +endmodule + +typedef 3 STREAM_HEADER_REMOVE_LATENCY; + +// Remove the first N Bytes of a stream +// (*synthesize*) +// module mkStreamHeaderRemove#(DataBytePtr headerLen)(StreamPipe); +// FIFOF#(DataStream) inFifo <- mkLFIFOF; +// FIFOF#(DataStream) outFifo <- mkFIFOF; + +// Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); +// Reg#(Bool) hasLastRemainReg <- mkReg(False); + +// DataBitPtr headerBitLen = zeroExtend(headerLen) << valueOf(BYTE_WIDTH_WIDTH); +// DataBytePtr shiftLen = getMaxBytePtr - headerLen; +// DataBitPtr shiftBitLen = zeroExtend(shiftLen) << valueOf(BYTE_WIDTH_WIDTH); + +// rule removeHeader; +// if (hasLastRemainReg) begin +// outFifo.enq(remainStreamReg); +// hasLastRemainReg <= False; +// remainStreamReg <= getEmptyStream; +// end +// else begin +// let stream = inFifo.first; +// inFifo.deq; +// let remainStream = DataStream { +// data : stream.data >> headerBitLen, +// byteEn : stream.byteEn >> headerLen, +// isFirst : stream.isFirst, +// isLast : stream.isLast +// }; +// let newStream = DataStream { +// data : remainStreamReg.data | stream.data << shiftBitLen, +// byteEn : remainStreamReg.byteEn | stream.byteEn << shiftLen, +// isFirst : remainStreamReg.isFirst, +// isLast : isByteEnZero(remainStream.byteEn) +// }; + +// if (stream.isLast && stream.isFirst) begin +// outFifo.enq(remainStream); +// hasLastRemainReg <= False; +// remainStreamReg <= getEmptyStream; +// end +// else if (stream.isFirst) begin +// remainStreamReg <= remainStream; +// end +// else begin +// outFifo.enq(newStream); +// if (stream.isLast) begin +// if(isByteEnZero(remainStream.byteEn)) begin +// remainStreamReg <= getEmptyStream; +// hasLastRemainReg <= False; +// end +// else begin +// remainStreamReg <= remainStream; +// hasLastRemainReg <= True; +// end +// end +// else begin +// remainStreamReg <= remainStream; +// end +// end +// end +// endrule + +// interface streamFifoIn = convertFifoToFifoIn(inFifo); +// interface streamFifoOut = convertFifoToFifoOut(outFifo); +// endmodule + + +// // Only support one not full dataStream between streams +// module mkStreamReshape#(Bool debugEn)(StreamPipe); +// FIFOF#(DataStream) inFifo <- mkFIFOF; +// FIFOF#(DataStream) outFifo <- mkFIFOF; + +// //During Stream Varibles +// Reg#(DataBytePtr) rmBytePtrReg <- mkReg(0); +// Reg#(DataBitPtr) rmBitPtrReg <- mkReg(0); +// Reg#(DataBytePtr) rsBytePtrReg <- mkReg(0); +// Reg#(DataBitPtr) rsBitPtrReg <- mkReg(0); +// Reg#(Bool) isDetectedReg <- mkReg(False); +// Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); +// Reg#(Bool) hasLastRemainReg <- mkReg(False); + +// rule shape; +// if (hasLastRemainReg) begin +// outFifo.enq(remainStreamReg); +// if (debugEn) $display("mkStreamReshape state 0 outStream=", fshow(remainStreamReg)); +// isDetectedReg <= False; +// hasLastRemainReg <= False; +// end +// else begin +// let stream = inFifo.first; +// inFifo.deq; +// Bool isDetect = !stream.isLast && !isByteEnFull(stream.byteEn) && (!isDetectedReg); +// if (isDetect) begin +// let bytePtr = convertByteEn2BytePtr(stream.byteEn); +// DataBitPtr bitPtr = zeroExtend(bytePtr) << valueOf(BYTE_WIDTH_WIDTH); +// rmBytePtrReg <= bytePtr; +// rmBitPtrReg <= bitPtr; +// rsBytePtrReg <= getMaxBytePtr - bytePtr; +// rsBitPtrReg <= getMaxBitPtr - bitPtr; +// remainStreamReg <= stream; +// isDetectedReg <= True; +// if (bytePtr == 0) begin +// $display($time, "ns SIM Warning @ mkStreamReshape: detect bubble, bytePtr:%d, byteEn: %b", bytePtr, stream.byteEn); +// end +// end +// else begin +// if (isDetectedReg) begin +// let remainStream = DataStream { +// data : stream.data >> rsBitPtrReg, +// byteEn : stream.byteEn >> rsBytePtrReg, +// isFirst : stream.isFirst, +// isLast : True +// }; +// remainStreamReg <= remainStream; +// let isLast = isByteEnZero(remainStream.byteEn) && stream.isLast; +// let outStream = DataStream { +// data : (stream.data << rmBitPtrReg) | remainStreamReg.data, +// byteEn : (stream.byteEn << rmBytePtrReg) | remainStreamReg.byteEn, +// isFirst : remainStreamReg.isFirst, +// isLast : isLast +// }; +// outFifo.enq(outStream); +// if (debugEn) $display("mkStreamReshape state 1 outStream=", fshow(outStream)); +// hasLastRemainReg <= !isByteEnZero(remainStream.byteEn) && stream.isLast; +// isDetectedReg <= isLast ? False : isDetectedReg; +// end +// else begin +// outFifo.enq(stream); +// if (debugEn) $display("mkStreamReshape state 2 outStream=", fshow(stream)); +// end +// end +// end +// endrule + +// interface streamFifoIn = convertFifoToFifoIn(inFifo); +// interface streamFifoOut = convertFifoToFifoOut(outFifo); +// endmodule + + +typedef enum { + StreamReshapeStateIdle = 0, + StreamReshapeStateOutput = 1, + StreamReshapeStateOutputLast = 2, + StreamReshapeStateOutputNoShift = 3 +} StreamReshapeState deriving(Bits, FShow, Eq); + +// Only support one not full dataStream between streams +(*synthesize*) +module mkStreamReshape#(Bool debugEn)(StreamPipe); + FIFOF#(DataStream) inFifo <- mkLFIFOF; + FIFOF#(DataStream) outFifo <- mkFIFOF; + + //During Stream Varibles + Reg#(DataBytePtr) rmBytePtrReg <- mkReg(0); + Reg#(DataBitPtr) rmBitPtrReg <- mkReg(0); + Reg#(DataBytePtr) rsBytePtrReg <- mkReg(0); + Reg#(DataBitPtr) rsBitPtrReg <- mkReg(0); + // Reg#(Bool) isDetectedReg <- mkReg(False); + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + // Reg#(Bool) hasLastRemainReg <- mkReg(False); + + Reg#(StreamReshapeState) stateReg <- mkReg(StreamReshapeStateIdle); + + rule idleState if (stateReg == StreamReshapeStateIdle); + let stream = inFifo.first; + inFifo.deq; + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape StreamReshapeStateIdle inStream=", fshow(stream)); + Bool isDetect = !stream.isLast && !isByteEnFull(stream.byteEn); + if (isDetect) begin + let bytePtr = convertByteEn2BytePtr(stream.byteEn); + DataBitPtr bitPtr = zeroExtend(bytePtr) << valueOf(BYTE_WIDTH_WIDTH); + rmBytePtrReg <= bytePtr; + rmBitPtrReg <= bitPtr; + rsBytePtrReg <= getMaxBytePtr - bytePtr; + rsBitPtrReg <= getMaxBitPtr - bitPtr; + remainStreamReg <= stream; + stateReg <= StreamReshapeStateOutput; + end + else begin + stateReg <= StreamReshapeStateOutputNoShift; + remainStreamReg <= stream; + end + endrule + + rule outputState if (stateReg == StreamReshapeStateOutput); + let stream = inFifo.first; + inFifo.deq; + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape outputState inStream=", fshow(stream)); + + let remainStream = DataStream { + data : stream.data >> rsBitPtrReg, + byteEn : stream.byteEn >> rsBytePtrReg, + isFirst : stream.isFirst, + isLast : True + }; + remainStreamReg <= remainStream; + let isLast = isByteEnZero(remainStream.byteEn) && stream.isLast; + + let outStream = DataStream { + data : (stream.data << rmBitPtrReg) | remainStreamReg.data, + byteEn : (stream.byteEn << rmBytePtrReg) | remainStreamReg.byteEn, + isFirst : remainStreamReg.isFirst, + isLast : isLast + }; + outFifo.enq(outStream); + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape outputState outStream=", fshow(outStream)); + let hasLastRemain = (!isByteEnZero(remainStream.byteEn)) && stream.isLast; + if (stream.isLast) begin + if (hasLastRemain) begin + stateReg <= StreamReshapeStateOutputLast; + end + else begin + stateReg <= StreamReshapeStateIdle; + end + end + endrule + + rule outputLastState if (stateReg == StreamReshapeStateOutputLast); + outFifo.enq(remainStreamReg); + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape outputLastState outStream=", fshow(remainStreamReg)); + if (inFifo.notEmpty) begin + let stream = inFifo.first; + inFifo.deq; + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape outputLastState inStream=", fshow(stream)); + Bool isDetect = !stream.isLast && !isByteEnFull(stream.byteEn); + if (isDetect) begin + let bytePtr = convertByteEn2BytePtr(stream.byteEn); + DataBitPtr bitPtr = zeroExtend(bytePtr) << valueOf(BYTE_WIDTH_WIDTH); + rmBytePtrReg <= bytePtr; + rmBitPtrReg <= bitPtr; + rsBytePtrReg <= getMaxBytePtr - bytePtr; + rsBitPtrReg <= getMaxBitPtr - bitPtr; + remainStreamReg <= stream; + stateReg <= StreamReshapeStateOutput; + end + else begin + remainStreamReg <= stream; + stateReg <= StreamReshapeStateOutputNoShift; + end + end + else begin + stateReg <= StreamReshapeStateIdle; + end + endrule + + rule outputNoShiftState if (stateReg == StreamReshapeStateOutputNoShift); + outFifo.enq(remainStreamReg); + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape outputNoShiftState outStream=", fshow(remainStreamReg)); + if (remainStreamReg.isLast) begin + if (inFifo.notEmpty) begin + let stream = inFifo.first; + inFifo.deq; + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape outputNoShiftState inStream=", fshow(stream)); + Bool isDetect = !stream.isLast && !isByteEnFull(stream.byteEn); + if (isDetect) begin + let bytePtr = convertByteEn2BytePtr(stream.byteEn); + DataBitPtr bitPtr = zeroExtend(bytePtr) << valueOf(BYTE_WIDTH_WIDTH); + rmBytePtrReg <= bytePtr; + rmBitPtrReg <= bitPtr; + rsBytePtrReg <= getMaxBytePtr - bytePtr; + rsBitPtrReg <= getMaxBitPtr - bitPtr; + remainStreamReg <= stream; + stateReg <= StreamReshapeStateOutput; + end + else begin + remainStreamReg <= stream; + stateReg <= StreamReshapeStateOutputNoShift; + end + end + else begin + stateReg <= StreamReshapeStateIdle; + end + end + else begin + let stream = inFifo.first; + inFifo.deq; + // if (debugEn) $display("time=%0t", $time, "mkStreamReshape outputNoShiftState inStream=", fshow(stream)); + + Bool isDetect = !stream.isLast && !isByteEnFull(stream.byteEn); + if (isDetect) begin + let bytePtr = convertByteEn2BytePtr(stream.byteEn); + DataBitPtr bitPtr = zeroExtend(bytePtr) << valueOf(BYTE_WIDTH_WIDTH); + rmBytePtrReg <= bytePtr; + rmBitPtrReg <= bitPtr; + rsBytePtrReg <= getMaxBytePtr - bytePtr; + rsBitPtrReg <= getMaxBitPtr - bitPtr; + remainStreamReg <= stream; + stateReg <= StreamReshapeStateOutput; + end + else begin + remainStreamReg <= stream; + stateReg <= StreamReshapeStateOutputNoShift; + end + end + + endrule + + + interface streamFifoIn = convertFifoToFifoIn(inFifo); + interface streamFifoOut = convertFifoToFifoOut(outFifo); +endmodule + +typedef Bit#(DWORD_BYTES) DWordByteEn; + +(*synthesize*) +module mkStreamRemoveDescAndDW#(DataBytePtr headerLen)(StreamPipe); + FIFOF#(DataStream) inFifo <- mkLFIFOF; + FIFOF#(DataStream) outFifo <- mkFIFOF; + + Reg#(DataStream) remainStreamReg <- mkReg(getEmptyStream); + Reg#(Bool) hasLastRemainReg <- mkReg(False); + Reg#(DataBytePtr) removeByteReg <- mkReg(0); + Reg#(DataBytePtr) resByteReg <- mkReg(getMaxBytePtr); + + + function Tuple2#(DataBytePtr, DataBytePtr) getRemoveOffset(DWordByteEn dwByteEn); + case (dwByteEn) matches + 4'b??10: return tuple2(1 + headerLen, getMaxBytePtr - 1 - headerLen); + 4'b?100: return tuple2(2 + headerLen, getMaxBytePtr - 2 - headerLen); + 4'b1000: return tuple2(3 + headerLen, getMaxBytePtr - 3 - headerLen); + default: return tuple2(0 + headerLen, getMaxBytePtr - 0 - headerLen); + endcase + endfunction + + rule removeHeader; + if (hasLastRemainReg) begin + outFifo.enq(remainStreamReg); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else begin + let stream = inFifo.first; + inFifo.deq; + let removeByte = removeByteReg; + let resByte = resByteReg; + + if (stream.isFirst) begin + {removeByte, resByte} = getRemoveOffset(truncate(stream.byteEn >> headerLen)); + end + DataBitPtr removeBits = zeroExtend(removeByte) << valueOf(BYTE_WIDTH_WIDTH); + DataBitPtr resBits = zeroExtend(resByte) << valueOf(BYTE_WIDTH_WIDTH); + + let remainStream = DataStream { + data : stream.data >> removeBits, + byteEn : stream.byteEn >> removeByte, + isFirst : stream.isFirst, + isLast : stream.isLast + }; + let newStream = DataStream { + data : remainStreamReg.data | stream.data << resBits, + byteEn : remainStreamReg.byteEn | stream.byteEn << resByte, + isFirst : remainStreamReg.isFirst, + isLast : isByteEnZero(remainStream.byteEn) + }; + removeByteReg <= removeByte; + resByteReg <= resByte; + + if (stream.isLast && stream.isFirst) begin + outFifo.enq(remainStream); + hasLastRemainReg <= False; + remainStreamReg <= getEmptyStream; + end + else if (stream.isFirst) begin + remainStreamReg <= remainStream; + end + else begin + outFifo.enq(newStream); + if (stream.isLast) begin + if(isByteEnZero(remainStream.byteEn)) begin + remainStreamReg <= getEmptyStream; + hasLastRemainReg <= False; + end + else begin + remainStreamReg <= remainStream; + hasLastRemainReg <= True; + end + end + else begin + remainStreamReg <= remainStream; + end + end + end + endrule + + interface streamFifoIn = convertFifoToFifoIn(inFifo); + interface streamFifoOut = convertFifoToFifoOut(outFifo); +endmodule + + +function DataStream maskDataStreamWithByteEn(DataStream dsIn); + Vector#(BYTE_EN_WIDTH, Byte) maskVec= newVector; + for (Integer byteIdx = 0; byteIdx < valueOf(BYTE_EN_WIDTH); byteIdx = byteIdx + 1) begin + let byteEnForThisByte = dsIn.byteEn[byteIdx]; + maskVec[byteIdx] = {byteEnForThisByte, byteEnForThisByte, byteEnForThisByte, byteEnForThisByte, byteEnForThisByte, byteEnForThisByte, byteEnForThisByte, byteEnForThisByte}; + end + dsIn.data = dsIn.data & pack(maskVec); + return dsIn; +endfunction \ No newline at end of file diff --git a/src/XilBdmaTestUtils.bsv b/src/XilBdmaTestUtils.bsv new file mode 100644 index 0000000..f3ecd03 --- /dev/null +++ b/src/XilBdmaTestUtils.bsv @@ -0,0 +1,117 @@ +import Vector::*; +import FIFOF::*; + +import SemiFifo::*; +import XilBdmaDmaTypes::*; + +typedef 'hAB PSEUDO_DATA; +typedef 8 PSEUDO_DATA_WIDTH; + +function Data getPseudoData(); + Data pseudoData = fromInteger(valueOf(PSEUDO_DATA)); + for (Integer idx = 0; idx < valueOf(TDiv#(DATA_WIDTH, PSEUDO_DATA_WIDTH)); idx = idx + 1) begin + pseudoData = pseudoData | (pseudoData << idx*valueOf(PSEUDO_DATA_WIDTH)); + end + return pseudoData; +endfunction + +function DataStream getPsuedoStream (Bool isFirst, Bool isLast); + return DataStream{ + data: getPseudoData, + byteEn: -1, + isFirst: isFirst, + isLast: isLast + }; +endfunction + +interface TestModule; + interface Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) c2hDataFifoIn; + interface Vector#(DMA_PATH_NUM, FifoOut#(DataStream)) c2hDataFifoOut; + interface Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOut; + + interface FifoIn#(CsrRequest) h2cReqFifoIn; + interface FifoOut#(CsrResponse) h2cRespFifoOut; +endinterface + +typedef 250000 ONE_SECOND_COUNTER; +// typedef 250 ONE_SECOND_COUNTER; +typedef 'hfff0 TEST_BASE_ADDR; + +typedef Bit#(2) TestState; +typedef 0 IDLE; +typedef 1 WRITING; +typedef 2 READING; + +module mkTestModule(TestModule); + Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataInFifo <- replicateM(mkFIFOF); + Vector#(DMA_PATH_NUM, FIFOF#(DataStream)) dataOutFifo <- replicateM(mkFIFOF); + Vector#(DMA_PATH_NUM, FIFOF#(DmaRequest)) reqOutFifo <- replicateM(mkFIFOF); + FIFOF#(CsrRequest) csrReqFifo <- mkFIFOF; + FIFOF#(CsrResponse) csrRespFifo <- mkFIFOF; + + Reg#(UInt#(32)) cntReg <- mkReg(0); + Reg#(UInt#(4)) iterReg <- mkReg(0); + Reg#(TestState) stateReg <- mkReg(fromInteger(valueOf(IDLE))); + + Vector#(DMA_PATH_NUM, FifoIn#(DataStream)) c2hDataFifoInIfc = newVector; + Vector#(DMA_PATH_NUM, FifoOut#(DataStream)) c2hDataFifoOutIfc = newVector; + Vector#(DMA_PATH_NUM, FifoOut#(DmaRequest)) c2hReqFifoOutIfc = newVector; + + for (DmaPathNo pathIdx = 0; pathIdx < fromInteger(valueOf(DMA_PATH_NUM)); pathIdx = pathIdx + 1) begin + c2hDataFifoInIfc[pathIdx] = convertFifoToFifoIn(dataInFifo[pathIdx]); + c2hDataFifoOutIfc[pathIdx] = convertFifoToFifoOut(dataOutFifo[pathIdx]); + c2hReqFifoOutIfc[pathIdx] = convertFifoToFifoOut(reqOutFifo[pathIdx]); + end + + rule counter; + if (cntReg < fromInteger(valueOf(ONE_SECOND_COUNTER))) begin + cntReg <= cntReg + 1; + end + else begin + cntReg <= 0; + end + endrule + + rule generator; + case (stateReg) + fromInteger(valueOf(IDLE)): begin + if (cntReg == fromInteger(valueOf(ONE_SECOND_COUNTER)-1)) begin + stateReg <= fromInteger(valueOf(WRITING)); + iterReg <= iterReg + 1; + let stream = getPsuedoStream(True, False); + let req = DmaRequest { + startAddr : (zeroExtend(pack(iterReg))) + fromInteger(valueOf(TEST_BASE_ADDR)), + length : 128, + isWrite : True, + attr : defaultValue + }; + dataOutFifo[0].enq(stream); + reqOutFifo[0].enq(req); + end + end + fromInteger(valueOf(WRITING)): begin + stateReg <= fromInteger(valueOf(READING)); + let stream = getPsuedoStream(False, True); + dataOutFifo[0].enq(stream); + end + fromInteger(valueOf(READING)): begin + stateReg <= fromInteger(valueOf(IDLE)); + let req = DmaRequest { + startAddr : (zeroExtend(pack(iterReg))) + fromInteger(valueOf(TEST_BASE_ADDR)), + length : 128, + isWrite : False, + attr : defaultValue + }; + reqOutFifo[0].enq(req); + end + default: stateReg <= fromInteger(valueOf(IDLE)); + endcase + endrule + + interface c2hDataFifoIn = c2hDataFifoInIfc; + interface c2hDataFifoOut = c2hDataFifoOutIfc; + interface c2hReqFifoOut = c2hReqFifoOutIfc; + + interface h2cReqFifoIn = convertFifoToFifoIn(csrReqFifo); + interface h2cRespFifoOut = convertFifoToFifoOut(csrRespFifo); +endmodule \ No newline at end of file diff --git a/test/Makefile b/test/Makefile new file mode 100755 index 0000000..d25bc9d --- /dev/null +++ b/test/Makefile @@ -0,0 +1,22 @@ +include ../Makefile.base + +TESTFILE ?= TestDmaCore.bsv +TOPMODULE ?= mkSimpleConvertDataStreamsToStraddleAxisTb + +SIMSCRIPT = $(BUILDDIR)/$(TOPMODULE).sh + +compile: + mkdir -p $(BUILDDIR) + bsc -elab -sim -verbose $(BLUESIMFLAGS) $(DEBUGFLAGS) $(DIRFLAGS) $(MISCFLAGS) $(RECOMPILEFLAGS) $(RUNTIMEFLAGS) $(SCHEDFLAGS) $(TRANSFLAGS) -g $(TOPMODULE) $(TESTFILE) + +link: compile + bsc -sim $(BLUESIMFLAGS) $(DIRFLAGS) $(RECOMPILEFLAGS) $(SCHEDFLAGS) $(TRANSFLAGS) -e $(TOPMODULE) -o $(SIMSCRIPT) + +simulate: link + $(SIMSCRIPT) + +clean: + rm -rf $(BUILDDIR) + +.PHONY: compile link simulate clean +.DEFAULT_GOAL := simulate \ No newline at end of file diff --git a/test/TestCompletionFifo.bsv b/test/TestCompletionFifo.bsv new file mode 100644 index 0000000..b758329 --- /dev/null +++ b/test/TestCompletionFifo.bsv @@ -0,0 +1,162 @@ +import GetPut::*; +import Counter::*; +import FIFOF::*; +import Randomizable::*; +import LFSR::*; +import Vector::*; + +import SemiFifo::*; +import XilBdmaCompletionFifo::*; +import XilBdmaPrimUtils::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaDmaTypes::*; + +typedef 6 TEST_CHUNK_NUM; +typedef 16 TEST_SLOT_NUM; + +typedef Bit#(32) TestData; +typedef Bit#(TLog#(TEST_SLOT_NUM)) TestTag; +typedef Bit#(TLog#(TEST_CHUNK_NUM)) TestReq; +typedef Bit#(8) TimeInterval; + +(* doc = "testcase" *) +module mkCompletionFifoTb(Empty); + + CompletionFifo#(TEST_SLOT_NUM, TestData) dut <- mkCompletionFifo(valueOf(TEST_CHUNK_NUM)); + Randomize#(TestReq) reqGen <- mkConstrainedRandomizer(1, fromInteger(valueOf(TEST_CHUNK_NUM)-1)); + + FIFOF#(TestTag) tagFifo <- mkSizedFIFOF(valueOf(TEST_SLOT_NUM)); + FIFOF#(Tuple2#(TestTag, TestReq)) reqFifo <- mkSizedFIFOF(valueOf(TEST_SLOT_NUM)); + + Vector#(TEST_SLOT_NUM, Reg#(TestReq)) reqs <- replicateM(mkReg(0)); + Vector#(TEST_SLOT_NUM, Reg#(TestReq)) reqDones <- replicateM(mkReg(0)); + Vector#(TEST_SLOT_NUM, Reg#(Bool)) doneFlags <- replicateM(mkReg(True)); + + Reg#(Bool) initReg <- mkReg(False); + Reg#(TestTag) outPtrReg <- mkReg(0); + Reg#(TestData) dataReg <- mkReg(0); + + Reg#(UInt#(32)) sentChunksReg <- mkReg(0); + Reg#(UInt#(32)) recvChunksReg <- mkReg(0); + + rule init if (!initReg); + reqGen.cntrl.init; + initReg <= True; + endrule + + rule genRequest if (initReg); + if (dut.available) begin + let tag <- dut.reserve.get; + tagFifo.enq(tag); + let reqLen <- reqGen.next; + reqFifo.enq(tuple2(tag, reqLen)); + sentChunksReg <= sentChunksReg + unpack(zeroExtend(reqLen)); + $display("INFO: Gen Tag %h request %h", tag, reqLen); + end + endrule + + rule getResponse if (initReg); + outPtrReg <= outPtrReg == fromInteger(valueOf(TEST_SLOT_NUM)-1) ? 0 : outPtrReg + 1; + if (!doneFlags[outPtrReg]) begin + if (reqDones[outPtrReg] < reqs[outPtrReg]) begin + reqDones[outPtrReg] <= reqDones[outPtrReg] + 1; + dut.append.enq(tuple2(outPtrReg, zeroExtend(outPtrReg) << valueOf(TLog#(TEST_SLOT_NUM)) | zeroExtend(reqDones[outPtrReg]))); + end + else begin + $display("Debug: set tag %h done, dones %d, req %d", outPtrReg, reqDones[outPtrReg]-1, reqs[outPtrReg]); + dut.complete.put(outPtrReg); + doneFlags[outPtrReg] <= True; + end + end + else begin + if (reqFifo.notEmpty) begin + let {tag, reqLen} = reqFifo.first; + if (outPtrReg == tag) begin + reqDones[outPtrReg] <= 0; + reqs[outPtrReg] <= reqLen; + doneFlags[outPtrReg] <= False; + reqFifo.deq; + end + end + end + endrule + + rule readCompletionFifo if (initReg); + let data = dut.drain.first; + dataReg <= data; + immAssert( + (data > dataReg || dataReg == 0), + "order check @ mkCompletionFifoTb", + $format(data, dataReg) + ); + dut.drain.deq; + recvChunksReg <= recvChunksReg + 1; + $display("Debug: drain from CFifo %h", data); + endrule + + rule testFinish if (initReg); + if (recvChunksReg == sentChunksReg && recvChunksReg > 0) begin + $display("test CompletionFifo end!"); + $finish(); + end + endrule + +endmodule + +module mkSimpleCompletionFifoTb(Empty); + + CompletionFifo#(TEST_SLOT_NUM, TestData) dut <- mkCompletionFifo(valueOf(TEST_CHUNK_NUM)); + FIFOF#(TestTag) tagFifo <- mkSizedFIFOF(valueOf(TEST_SLOT_NUM)); + Reg#(Bool) initReg <- mkReg(False); + Reg#(UInt#(10)) testCntReg <- mkReg(0); + Reg#(UInt#(10)) testOutReg <- mkReg(0); + let testNum = 20; + + + rule init if (!initReg); + initReg <= True; + endrule + + rule genRequest if (initReg && testCntReg <= testNum); + if (dut.available) begin + let tag <- dut.reserve.get; + tagFifo.enq(tag); + $display("INFO: Gen Tag %d", tag); + testCntReg <= testCntReg + 1; + end + endrule + + rule getResponse if (initReg); + let tag = tagFifo.first; + tagFifo.deq; + dut.append.enq(tuple2(tag, zeroExtend(tag)*10)); + dut.complete.put(tag); + endrule + + rule getOrder if (initReg); + let data = dut.drain.first; + dut.drain.deq; + $display("INFO: %d drain %d", testOutReg, data); + testOutReg <= testOutReg + 1; + if (testOutReg == fromInteger(testNum)) begin + $finish(); + end + endrule + +endmodule + +interface CFifoInstTb; + interface Get#(TestTag) reserve; + interface FifoIn#(Tuple2#(TestTag, DataStream)) append; + interface Put#(TestTag) complete; + interface FifoOut#(DataStream) drain; +endinterface + +(* synthesize *) +module mkCompletionFifoInst(CFifoInstTb); + CompletionFifo#(TEST_SLOT_NUM, DataStream) cFifo <- mkCompletionFifo(valueOf(MAX_STREAM_NUM_PER_COMPLETION)); + interface reserve = cFifo.reserve; + interface append = cFifo.append; + interface complete = cFifo.complete; + interface drain = cFifo.drain; +endmodule diff --git a/test/TestDmaCompleter.bsv b/test/TestDmaCompleter.bsv new file mode 100644 index 0000000..c5d35df --- /dev/null +++ b/test/TestDmaCompleter.bsv @@ -0,0 +1,190 @@ + +import FIFOF::*; +import Vector::*; +import FShow::*; + +import SemiFifo::*; +import XilBdmaPrimUtils::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaPcieTypes::*; +import XilBdmaPcieDescriptorTypes::*; +import XilBdmaDmaTypes::*; +import XilBdmaDmaCompleter::*; +import TestDmacVivado::*; + +typedef 'hABCD TEST_DATA; +typedef 'h1A28 TEST_ADDR; + +typedef 2'b10 TRANSLATED_ADDR_TYPE; + +typedef 10 READ_TIMEOUT_THRESH; + +function PcieTlpCtlIsEopCommon getEmptyEop(); + return PcieTlpCtlIsEopCommon { + isEopPtrs: replicate(0), + isEop : 0 + }; +endfunction + +function PcieTlpCtlIsSopCommon getEmptySop(); + return PcieTlpCtlIsSopCommon { + isSopPtrs: replicate(0), + isSop : 0 + }; +endfunction + +function CmplReqAxiStream genPseudoHostRequest(DmaCsrValue testValue, DmaCsrAddr testAddr, Bool isWrite); + let descriptor = PcieCompleterRequestDescriptor { + reserve0 : 0, + attributes : 0, + trafficClass : 0, + barAperture : fromInteger(valueOf(DMA_CSR_ADDR_WIDTH)), + barId : 0, + targetFunction: 0, + tag : 0, + requesterId : fromInteger(valueOf(TEST_DATA)), + reserve1 : 0, + reqType : isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) :fromInteger(valueOf(MEM_READ_REQ)) , + dwordCnt : 1, + address : zeroExtend(testAddr >> valueOf(TSub#(DMA_MEM_ADDR_WIDTH, DES_ADDR_WIDTH))), + addrType : fromInteger(valueOf(TRANSLATED_ADDR_TYPE)) + }; + Data data = 0; + data = data | zeroExtend(pack(descriptor)); + data = data | zeroExtend(testValue) << valueOf(DES_CQ_DESCRIPTOR_WIDTH); + let sideBand = PcieCompleterRequestSideBandFrame { + parity : 0, + tphSteeringTag : 0, + tphType : 0, + tphPresent : 0, + discontinue : False, + isEop : getEmptyEop, + isSop : getEmptySop, + dataByteEn : isWrite ? 'hFFF : 'hFF, + lastByteEn : 'hF, + firstByteEn : 'hF + }; + return CmplReqAxiStream { + tData : data, + tKeep : fromInteger(valueOf(IDEA_CQ_TKEEP_OF_CSR)), + tLast : True, + tUser : pack(sideBand) + }; +endfunction + +(* doc = "testcase" *) +module mkTestDmaCompleterRequest(Empty); + CompleterRequest dut <- mkCompleterRequest; + Reg#(Bool) isInitReg <- mkReg(False); + + rule testInit if (!isInitReg); + $display("INFO: Start CompleterRequest test"); + let testAxiStram = genPseudoHostRequest(fromInteger(valueOf(TEST_DATA)), fromInteger(valueOf(TEST_ADDR)), True); + dut.axiStreamFifoIn.enq(testAxiStram); + isInitReg <= True; + endrule + + rule testOutput if (isInitReg); + dut.csrWriteReqFifoOut.deq; + let wrReq = dut.csrWriteReqFifoOut.first; + immAssert( + (wrReq.addr == fromInteger(valueOf(TEST_ADDR)) && wrReq.value == fromInteger(valueOf(TEST_DATA))), + "wrReq test @ mkTestDmaCompleterRequest", + $format("RawReq: Addr %h, Value %h \n But", fromInteger(valueOf(TEST_ADDR)), fromInteger(valueOf(TEST_DATA)),fshow(wrReq)) + ); + $display("INFO: Pass CompleterRequest test"); + $finish(); + endrule + +endmodule + +(* doc = "testcase" *) +module mkTestDmaCompleter(Empty); + TestDmacCsrWrRdLoopTb dut <- mkTestDmacCsrWrRdLoopTb; + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(Bool) isWriteDoneReg <- mkReg(False); + Reg#(Bool) isWriteDoneReg1 <- mkReg(False); + Reg#(Bool) isReadDoneReg <- mkReg(False); + Reg#(UInt#(32)) timeoutReg <- mkReg(0); + + function Action setEmptyRawAxiStream(); + return action + dut.rawPcie.completerRequest.rawAxiStreamSlave.tValid( + False, + 0, + 0, + False, + 0 + ); + endaction; + endfunction + + rule alwaysEnables; + dut.rawPcie.completerComplete.rawAxiStreamMaster.tReady(True); + dut.rawPcie.completerRequest.nonPostedReqCreditCnt(32); + endrule + + rule testInit; + if (!isInitReg) begin + setEmptyRawAxiStream; + isInitReg <= True; + $display("INFO: Start Completer test"); + end + else if (isInitReg && !isWriteDoneReg) begin + let testAxiStram = genPseudoHostRequest(fromInteger(valueOf(TEST_DATA)), fromInteger(valueOf(TEST_ADDR)), True); + dut.rawPcie.completerRequest.rawAxiStreamSlave.tValid( + True, + testAxiStram.tData, + testAxiStram.tKeep, + testAxiStram.tLast, + testAxiStram.tUser + ); + isWriteDoneReg <= True; + end + else if (isInitReg && isWriteDoneReg1 && !isReadDoneReg) begin + let testAxiStram = genPseudoHostRequest(0, fromInteger(valueOf(TEST_ADDR)), False); + dut.rawPcie.completerRequest.rawAxiStreamSlave.tValid( + True, + testAxiStram.tData, + testAxiStram.tKeep, + testAxiStram.tLast, + testAxiStram.tUser + ); + isReadDoneReg <= True; + end + else begin + setEmptyRawAxiStream; + isWriteDoneReg1 <= isWriteDoneReg; + end + endrule + + rule testOutput if (isInitReg); + if (timeoutReg > fromInteger(valueOf(READ_TIMEOUT_THRESH))) begin + $display("Error: no valid cc axiStream out until timeout!"); + $finish(); + end + else begin + if (dut.rawPcie.completerComplete.rawAxiStreamMaster.tValid) begin + let data = dut.rawPcie.completerComplete.rawAxiStreamMaster.tData; + let keep = dut.rawPcie.completerComplete.rawAxiStreamMaster.tKeep; + let isLast = dut.rawPcie.completerComplete.rawAxiStreamMaster.tLast; + immAssert( + (isLast && (keep == 'hF)), + "completer output keep&last check @ mkTestDmaCompleter", + $format("tKeep: %h, tLast: %h", keep, isLast) + ); + DmaCsrValue value = truncate(data >> valueOf(DES_CC_DESCRIPTOR_WIDTH)); + immAssert( + (value == fromInteger(valueOf(TEST_DATA))), + "complete output data check @ mkTestDmaCompleter", + $format("write value: %h, read value: %h", valueOf(TEST_DATA), value) + ); + $display("INFO: Pass Completer test"); + $finish(); + end + else begin + timeoutReg <= timeoutReg + 1; + end + end + endrule +endmodule diff --git a/test/TestDmaCore.bsv b/test/TestDmaCore.bsv new file mode 100755 index 0000000..2d94bb0 --- /dev/null +++ b/test/TestDmaCore.bsv @@ -0,0 +1,313 @@ +import GetPut::*; +import Randomizable::*; +import Vector::*; +import Connectable::*; + +import SemiFifo::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaDmaTypes::*; +import XilBdmaPrimUtils::*; +import XilBdmaPcieTypes::*; +import XilBdmaPcieDescriptorTypes::*; +import XilBdmaStreamUtils::*; +import XilBdmaPcieAdapter::*; +import TestStreamUtils::*; +import XilBdmaDmaUtils::*; +import XilBdmaDmaC2HPipe::*; + + +typedef 100000 CHUNK_PER_EPOCH_TEST_NUM; +typedef 64'hFFFFFFFFFFFFFFFF MAX_ADDRESS; +typedef 16'hFFFF MAX_TEST_LENGTH; +typedef 2'b00 DEFAULT_TLP_SIZE_SETTING; +typedef 4 CHUNK_TX_TEST_SETTING_NUM; +typedef 6 CHUNK_RX_TEST_SETTING_NUM; + +module mkChunkComputerTb(Empty); + + ChunkCompute dut <- mkChunkComputer(DMA_TX); + + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) epochCntReg <- mkReg(0); + + Reg#(DmaReqLen) lenRemainReg <- mkReg(0); + + Randomize#(DmaMemAddr) startAddrRandomVal <- mkConstrainedRandomizer(0, fromInteger(valueOf(MAX_ADDRESS)-1)); + Randomize#(DmaReqLen) lengthRandomVal <- mkConstrainedRandomizer(1, fromInteger(valueOf(MAX_TEST_LENGTH))); + + function Bool hasBoundary(DmaRequest request); + let highIdx = (request.startAddr + zeroExtend(request.length) - 1) >> valueOf(BUS_BOUNDARY_WIDTH); + let lowIdx = request.startAddr >> valueOf(BUS_BOUNDARY_WIDTH); + return (highIdx > lowIdx); + endfunction + + rule testInit if (!isInitReg); + startAddrRandomVal.cntrl.init; + lengthRandomVal.cntrl.init; + isInitReg <= True; + dut.maxReadReqSize.put(tuple2(fromInteger(valueOf(DEFAULT_TLP_SIZE)), fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)))); + $display("INFO: Start Test of mkChunkComputerTb"); + $display("INFO: Set Max Payload Size to ", valueOf(DEFAULT_TLP_SIZE)); + endrule + + rule testInput if (isInitReg && lenRemainReg == 0); + DmaMemAddr testAddr <- startAddrRandomVal.next; + DmaReqLen testLength <- lengthRandomVal.next; + let testEnd = testAddr + zeroExtend(testLength) - 1; + if (testEnd > testAddr && testEnd <= fromInteger(valueOf(MAX_ADDRESS))) begin + let request = DmaExtendRequest{ + startAddr : testAddr, + endAddr : testAddr + zeroExtend(testLength), + length : testLength, + tag : 0 + }; + lenRemainReg <= testLength; + dut.dmaRequestFifoIn.enq(request); + // $display("INFO: input ", fshow(request)); + end + else begin + lenRemainReg <= 0; + end + endrule + + rule testOutput if (isInitReg && lenRemainReg > 0); + let newRequest = dut.chunkRequestFifoOut.first; + dut.chunkRequestFifoOut.deq; + immAssert( + !hasBoundary(newRequest), + "has boundary assert @ mkChunkComputerTb", + fshow(newRequest) + ); + let newRemain = lenRemainReg - newRequest.length; + lenRemainReg <= newRemain; + if (newRemain == 0) begin + if (epochCntReg < fromInteger(valueOf(CHUNK_PER_EPOCH_TEST_NUM)-1)) begin + epochCntReg <= epochCntReg + 1; + end + else begin + epochCntReg <= 0; + testCntReg <= testCntReg + 1; + if (testCntReg == fromInteger(valueOf(CHUNK_TX_TEST_SETTING_NUM)-1)) begin + $display("INFO: ChunkComputer Test End."); + $finish(); + end + else begin + TlpPayloadSizeWidth mpsWidth = fromInteger(valueOf(DEFAULT_TLP_SIZE_WIDTH)) + truncate(pack(testCntReg)); + TlpPayloadSize mps = 1 << mpsWidth; + dut.maxReadReqSize.put(tuple2(mps, mpsWidth)); + $display("INFO: Set Max Payload Size to %d", mps); + end + end + end + endrule + +endmodule + +// Do not use any simple tests, run cocotb for whole verification + +typedef 25 SIMPLE_TEST_BYTELEN; +typedef 'hABCDEF SIMPLE_TEST_ADDR; + +module mkSimpleC2HWriteCoreTb(Empty); + C2HWriteCore dut <- mkC2HWriteCore(0); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + + rule testInput if (testCntReg < 1); + let req = DmaRequest { + startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), + length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), + isWrite : True, + attr : defaultValue + }; + dut.wrReqFifoIn.enq(req); + let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); + dut.dataFifoIn.enq(stream); + testCntReg <= testCntReg + 1; + endrule + + rule testOutput; + let stream = dut.tlpFifoOut.first; + dut.tlpFifoOut.deq; + $display(fshow(stream)); + if (stream.isFirst) begin + let {firstByteEn, lastByteEn} = dut.tlpSideBandFifoOut.first; + dut.tlpSideBandFifoOut.deq; + $display("firstByteEn:%b, lastByteEn:%b", firstByteEn, lastByteEn); + PcieRequesterRequestDescriptor desc = unpack(truncate(stream.data)); + $display("Descriptor Elements: dwordCnt:%d, address:%h", desc.dwordCnt, desc.address << 2); + end + if (stream.isLast) begin + $finish(); + end + endrule +endmodule + +module mkSimpleConvertStraddleAxisToDataStreamTb(Empty); + ConvertStraddleAxisToDataStream dut <- mkConvertStraddleAxisToDataStream; + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) tlpNumReg <- mkReg(2); + + CmplByteCnt testLength = 20; + DmaMemAddr startAddr = fromInteger(valueOf(SIMPLE_TEST_ADDR)); + + rule testInput if (testCntReg < 1); + let desc0 = PcieRequesterCompleteDescriptor { + reserve0 : 0, + attributes : 0, + trafficClass : 0, + reserve1 : 0, + completerId : 123, + tag : 'b01100, + requesterId : 0, + reserve2 : 0, + isPoisoned : False, + status : fromInteger(valueOf(SUCCESSFUL_CMPL)), + dwordCnt : 1, + reserve3 : 0, + isRequestCompleted : True, + isLockedReadCmpl : False, + byteCnt : testLength, + errorcode : 0, + lowerAddr : truncate(startAddr) + }; + let desc1 = desc0; + desc1.lowerAddr = desc0.lowerAddr + truncate(testLength); + desc1.tag = 'b10001; + let stream = generatePsuedoStream(unpack(zeroExtend(testLength)), True, True); + let isSop = PcieTlpCtlIsSopReqCpl { + isSop : fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)), + isSopPtrs : replicate(0) + }; + isSop.isSopPtrs[0] = fromInteger(valueOf(ISSOP_LANE_0)); + isSop.isSopPtrs[1] = fromInteger(valueOf(ISSOP_LANE_32)); + let isEop = PcieTlpCtlIsEopReqCpl { + isEop : fromInteger(valueOf(DOUBLE_TLP_IN_THIS_BEAT)), + isEopPtrs : replicate(0) + }; + let data0 = stream.data << valueOf(DES_RC_DESCRIPTOR_WIDTH) | zeroExtend(pack(desc0)); + let data1 = stream.data << valueOf(DES_RC_DESCRIPTOR_WIDTH) | zeroExtend(pack(desc1)); + let byteEn = stream.byteEn << valueOf(TDiv#(DES_RC_DESCRIPTOR_WIDTH, BYTE_WIDTH)); + let sideBand = PcieRequesterCompleteSideBandFrame { + parity : 0, + discontinue : False, + isEop : isEop, + isSop : isSop, + dataByteEn : byteEn | byteEn << valueOf(STRADDLE_THRESH_BYTE_WIDTH) + }; + + let axiStream = ReqCmplAxiStream { + tData : data0 | data1 << valueOf(STRADDLE_THRESH_BIT_WIDTH), + tKeep : -1, + tLast : True, + tUser : pack(sideBand) + }; + dut.axiStreamFifoIn.enq(axiStream); + testCntReg <= testCntReg + 1; + endrule + + rule testOutput; + for (Integer pathIdx = 0; pathIdx < valueOf(DMA_PATH_NUM); pathIdx = pathIdx + 1) begin + if (dut.dataFifoOut[pathIdx].notEmpty) begin + let stream = dut.dataFifoOut[pathIdx].first; + dut.dataFifoOut[pathIdx].deq; + $display(fshow(stream)); + end + end + endrule + +endmodule + + +// module mkSimpleConvertDataStreamsToStraddleAxisTb(Empty); +// ConvertDataStreamsToStraddleAxis dut <- mkConvertDataStreamsToStraddleAxis; +// Reg#(UInt#(32)) testCntReg <- mkReg(0); + +// rule testInput if (testCntReg < 1); +// let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); +// let rqSideBandSignal = tuple2(4'b1111, 4'b1111); +// dut.dataFifoIn[0].enq(stream); +// dut.byteEnFifoIn[0].enq(rqSideBandSignal); +// dut.dataFifoIn[1].enq(stream); +// dut.byteEnFifoIn[1].enq(rqSideBandSignal); +// testCntReg <= testCntReg + 1; +// endrule + +// rule testOutput; +// let axiStream = dut.axiStreamFifoOut.first; +// dut.axiStreamFifoOut.deq; +// $display("tData: %h", axiStream.tData); +// $display("tKeep: %h", axiStream.tKeep); +// PcieRequesterRequestSideBandFrame sideBand = unpack(axiStream.tUser); +// $display("isSop: %d", sideBand.isSop.isSop); +// if (axiStream.tLast) begin +// $finish(); +// end +// endrule +// endmodule + +// module mkSimpleC2HReadCoreTb(Empty); +// C2HReadCore dut <- mkC2HReadCore(0); +// Reg#(UInt#(32)) testCntReg <- mkReg(0); + +// rule testInput if (testCntReg < 1); +// let req = DmaRequest { +// startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), +// length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), +// isWrite : False, +// attr : default +// }; +// dut.rdReqFifoIn.enq(req); +// testCntReg <= testCntReg + 1; +// endrule + +// rule testOutput; +// let stream = dut.tlpFifoOut.first; +// dut.tlpFifoOut.deq; +// $display(fshow(stream)); +// if (stream.isFirst) begin +// let {firstByteEn, lastByteEn} = dut.tlpSideBandFifoOut.first; +// dut.tlpSideBandFifoOut.deq; +// $display("firstByteEn:%b, lastByteEn:%b", firstByteEn, lastByteEn); +// PcieRequesterRequestDescriptor desc = unpack(truncate(stream.data)); +// $display("Descriptor Elements: dwordCnt:%d, address:%h", desc.dwordCnt, desc.address << 2); +// end +// if (stream.isLast) begin +// $finish(); +// end +// endrule +// endmodule + +// module simpleWritePathTb(Empty); +// C2HWriteCore c2hWriteCore <- mkC2HWriteCore(0); +// ConvertDataStreamsToStraddleAxis adapter <- mkConvertDataStreamsToStraddleAxis; +// mkConnection(c2hWriteCore.tlpFifoOut, adapter.dataFifoIn[0]); +// mkConnection(c2hWriteCore.tlpSideBandFifoOut, adapter.byteEnFifoIn[0]); +// Reg#(UInt#(32)) testCntReg <- mkReg(0); + +// rule testInput if (testCntReg < 1); +// let req = DmaRequest { +// startAddr : fromInteger(valueOf(SIMPLE_TEST_ADDR)), +// length : fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), +// isWrite : True, +// attr : default +// }; +// let stream = generatePsuedoStream(fromInteger(valueOf(SIMPLE_TEST_BYTELEN)), True, True); +// c2hWriteCore.wrReqFifoIn.enq(req); +// c2hWriteCore.dataFifoIn.enq(stream); +// testCntReg <= testCntReg + 1; +// endrule + +// rule testOutput; +// let axiStream = adapter.axiStreamFifoOut.first; +// adapter.axiStreamFifoOut.deq; +// $display("tData: %h", axiStream.tData); +// $display("tKeep: %h", axiStream.tKeep); +// PcieRequesterRequestSideBandFrame sideBand = unpack(axiStream.tUser); +// $display("isSop: %d", sideBand.isSop.isSop); +// if (axiStream.tLast) begin +// $finish(); +// end +// endrule + +// endmodule diff --git a/test/TestDmacVivado.bsv b/test/TestDmacVivado.bsv new file mode 100644 index 0000000..428b2c7 --- /dev/null +++ b/test/TestDmacVivado.bsv @@ -0,0 +1,121 @@ +import FIFOF::*; +import BRAM::*; +import GetPut::*; + +import SemiFifo::*; +import XilBdmaPcieTypes::*; +import XilBdmaDmaTypes::*; +import XilBdmaDmaController::*; + +typedef 16384 TEST_BRAM_SIZE; + +interface TestDmacWrRdLoop; + (* prefix = "" *) interface RawXilinxPcieIp rawPcie; +endinterface + +(* synthesize, clock_prefix = "user_clk", reset_prefix = "user_reset" *) +module mkTestDmacCsrWrRdLoop((* reset="sys_rst" *) Reset sysRst, TestDmacWrRdLoop ifc); + + DmaController dmac <- mkDmaController; + + BRAM2Port#(DmaCsrAddr, DmaCsrValue) ram <- mkBRAM2Server( + BRAM_Configure { + memorySize : valueOf(TEST_BRAM_SIZE), + loadFormat : None, + latency : 1, + outFIFODepth: 3, + allowWriteResponseBypass : False + } + ); + + rule testWriteReq; + dmac.h2cWrite.dataFifoOut.deq; + dmac.h2cWrite.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cWrite req detect!"); + ram.portA.request.put( + BRAMRequest { + write : True, + responseOnWrite : False, + address : dmac.h2cWrite.reqFifoOut.first, + datain : dmac.h2cWrite.dataFifoOut.first + } + ); + endrule + + rule testReadReq; + dmac.h2cRead.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cRead req detect!"); + ram.portB.request.put( + BRAMRequest { + write : False, + responseOnWrite : False, + address : dmac.h2cRead.reqFifoOut.first, + datain : 0 + } + ); + endrule + + rule testReadResp; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cRead resp detect!"); + let value <- ram.portB.response.get; + dmac.h2cRead.dataFifoIn.enq(value); + endrule + + interface rawPcie = dmac.rawPcie; +endmodule + +// Only use for testing in bsv, do not use for synthesize +interface TestDmacCsrWrRdLoopTb; + interface RawXilinxPcieIpCompleter rawPcie; +endinterface + +module mkTestDmacCsrWrRdLoopTb(TestDmacCsrWrRdLoopTb); + + DmaControllerCompleter dmac <- mkDmaControllerCompleter; + + BRAM2Port#(DmaCsrAddr, DmaCsrValue) ram <- mkBRAM2Server( + BRAM_Configure { + memorySize : valueOf(TEST_BRAM_SIZE), + loadFormat : None, + latency : 1, + outFIFODepth: 3, + allowWriteResponseBypass : False + } + ); + + rule testWriteReq; + dmac.h2cWrite.dataFifoOut.deq; + dmac.h2cWrite.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cWrite req detect!"); + $display("BRAM: PortA write addr %h data %h", dmac.h2cWrite.reqFifoOut.first, dmac.h2cWrite.dataFifoOut.first); + ram.portA.request.put( + BRAMRequest { + write : True, + responseOnWrite : False, + address : dmac.h2cWrite.reqFifoOut.first, + datain : dmac.h2cWrite.dataFifoOut.first + } + ); + endrule + + rule testReadReq; + dmac.h2cRead.reqFifoOut.deq; + $display("SIM INFO @ mkTestDmacCsrWrRdLoop: h2cRead req detect!"); + $display("BRAM: PortB read addr %h", dmac.h2cRead.reqFifoOut.first); + ram.portB.request.put( + BRAMRequest { + write : False, + responseOnWrite : False, + address : dmac.h2cRead.reqFifoOut.first, + datain : 0 + } + ); + endrule + + rule testReadResp; + let value <- ram.portB.response.get; + dmac.h2cRead.dataFifoIn.enq(value); + endrule + + interface rawPcie = dmac.rawPcie; +endmodule \ No newline at end of file diff --git a/test/TestSimpleUtils.bsv b/test/TestSimpleUtils.bsv new file mode 100644 index 0000000..3a65c2a --- /dev/null +++ b/test/TestSimpleUtils.bsv @@ -0,0 +1,138 @@ +import Vector::*; +import RegFile::*; +import GetPut::*; +import SemiFifo::*; +import FIFOF::*; +import BRAM::*; +import Connectable :: *; + +import XilBdmaDmaTypes::*; +import XilBdmaStreamUtils::*; +import XilBdmaSimpleModeUtils::*; +import XilBdmaPcieDescriptorTypes::*; +import XilBdmaPcieAxiStreamTypes::*; +import XilBdmaPcieTypes::*; +import XilBdmaPcieAdapter::*; +import XilBdmaDmaH2CPipe::*; + +typedef 2'b10 TRANSLATED_ADDR_TYPE; + +module mkTestSimpleCore(Empty); + DmaSimpleCore core <- mkDmaSimpleCore; + Reg#(UInt#(32)) testRoundReg <- mkReg(0); + + rule test if (testRoundReg < 50); + testRoundReg <= testRoundReg + 1; + case (testRoundReg) + 0: begin + core.reqFifoIn.enq(CsrRequest { + addr : 1, + value : 'h1234, + isWrite: True + }); + end + 1: begin + core.reqFifoIn.enq(CsrRequest { + addr : 2, + value : 'h1234, + isWrite: True + }); + end + 2: begin + core.reqFifoIn.enq(CsrRequest { + addr : 3, + value : 100, + isWrite: True + }); + end + 4: begin + core.reqFifoIn.enq(CsrRequest { + addr : 0, + value : 1, + isWrite: True + }); + end + 5: begin + core.reqFifoIn.enq(CsrRequest { + addr : 1, + value : 0, + isWrite: False + }); + end + endcase + if (core.respFifoOut.notEmpty) begin + let resp = core.respFifoOut.first; + core.respFifoOut.deq; + $display($time, "ns SIM INFO @ mkTestSimpleCore: recv response from dut, address:%h value:%d", resp.addr, resp.value); + end + if (core.c2hReqFifoOut[0].notEmpty) begin + let c2hReq = core.c2hReqFifoOut[0].first; + core.c2hReqFifoOut[0].deq; + $display($time, "ns SIM INFO @ mkTestSimpleCore: recv c2hReq from dut, startAddr:%h length:%d isWrite:%d", c2hReq.startAddr, c2hReq.length, c2hReq.isWrite); + end + endrule +endmodule + +module mkTestSimpleH2CCore(Empty); + DmaH2CPipe pipe <- mkDmaH2CPipe; + DmaSimpleCore sCore <- mkDmaSimpleCore; + + mkConnection(pipe.csrReqFifoOut, sCore.reqFifoIn); + mkConnection(pipe.csrRespFifoIn, sCore.respFifoOut); + + Reg#(Bool) testInitReg <- mkReg(False); + Reg#(Bool) simuDoneReg <- mkReg(False); + + function DataStream genCsrReqTlp(CsrRequest req); + let pcieDesc = PcieCompleterRequestDescriptor { + reserve0 : 0, + attributes : 0, + trafficClass : 0, + barAperture : 12, + barId : 0, + targetFunction: 0, + tag : 0, + requesterId : 'hABCD, + reserve1 : 0, + reqType : req.isWrite ? fromInteger(valueOf(MEM_WRITE_REQ)) :fromInteger(valueOf(MEM_READ_REQ)) , + dwordCnt : 1, + address : zeroExtend(req.addr >> 2), + addrType : fromInteger(valueOf(TRANSLATED_ADDR_TYPE)) + }; + let tlpData = DataStream { + data : zeroExtend(pack(pcieDesc)) | (zeroExtend(req.value) << valueOf(TDiv#(DES_CQ_DESCRIPTOR_WIDTH, BYTE_WIDTH))), + byteEn : 'hFFF, + isFirst: True, + isLast : True + }; + return tlpData; + endfunction + + rule testInit if (!testInitReg); + testInitReg <= True; + endrule + + + rule testRead if (testInitReg); + let tlpData = genCsrReqTlp(CsrRequest { + addr : 1, + value : 0, + isWrite : False + }); + pipe.tlpDataFifoIn.enq(tlpData); + simuDoneReg <= True; + $display($time, "ns SIM INFO @ mkTestSimpleH2CCore: send a test read req"); + endrule + + rule testResult if (simuDoneReg); + let tlp = pipe.tlpDataFifoOut.first; + pipe.tlpDataFifoOut.deq; + let desc = truncate(tlp.data); + DmaCsrValue value = truncate(tlp.data >> valueOf(DES_CQ_DESCRIPTOR_WIDTH)); + $display($time, "ns SIM INFO @ mkTestSimpleH2CCore: received h2c path value:%d, whole cc tlp:%h", value, tlp.data); + $finish; + endrule +endmodule + + + diff --git a/test/TestStreamUtils.bsv b/test/TestStreamUtils.bsv new file mode 100755 index 0000000..93d4b83 --- /dev/null +++ b/test/TestStreamUtils.bsv @@ -0,0 +1,280 @@ +import FIFOF::*; +import SemiFifo::*; +import LFSR::*; +import Vector::*; + +import XilBdmaPrimUtils::*; +import XilBdmaDmaTypes::*; +import XilBdmaStreamUtils::*; + +typedef 0 LOG_DETAILS_EN; + +typedef 'hAB PSEUDO_DATA; +typedef 8 PSEUDO_DATA_WIDTH; + +typedef 10 TEST_IDEAL_FIFO_DEPTH; + +typedef 'h12345678 SEED_1; +typedef 'hABCDEF01 SEED_2; + +// TEST HYPER PARAMETERS CASE 1 +// typedef 3 MAX_STREAM_SIZE_PTR; +// typedef 10 TEST_NUM; + +// TEST HYPER PARAMETERS CASE 2 +typedef 16 MAX_STREAM_SIZE_PTR; +typedef 1000 TEST_NUM; + +interface RandomStreamSize; + method ActionValue#(StreamSize) next(); +endinterface + +function Data getPseudoData(); + Data pseudoData = fromInteger(valueOf(PSEUDO_DATA)); + for (Integer idx = 0; idx < valueOf(TDiv#(DATA_WIDTH, PSEUDO_DATA_WIDTH)); idx = idx + 1) begin + pseudoData = pseudoData | (pseudoData << idx*valueOf(PSEUDO_DATA_WIDTH)); + end + return pseudoData; +endfunction + +function DataStream generatePsuedoStream (StreamSize size, Bool isFirst, Bool isLast); + let pseudoData = getPseudoData(); + let offsetPtr = (unpack(zeroExtend(getMaxBytePtr())) - size) << valueOf(BYTE_WIDTH_WIDTH); + Data streamData = (pseudoData << offsetPtr) >> offsetPtr; + return DataStream{ + data: streamData, + byteEn: (1 << size) - 1, + isFirst: isFirst, + isLast: isLast + }; +endfunction + +function StreamSize getMaxFrameSize (); + return fromInteger(valueOf(BYTE_EN_WIDTH)); +endfunction + +module mkRandomStreamSize(StreamSize seed, StreamSizeBitPtr maxSizeBitPtr, RandomStreamSize ifc); + LFSR#(Bit#(STREAM_SIZE_WIDTH)) lfsr <- mkLFSR_32 ; + FIFOF#(StreamSize) outputFifo <- mkFIFOF ; + Reg#(Bool) isInitReg <- mkReg(False) ; + + rule run if (isInitReg); + let value = lfsr.value >> (fromInteger(valueOf(STREAM_SIZE_WIDTH)) - maxSizeBitPtr); + if (value > 0) begin + outputFifo.enq(unpack(value)); + end + lfsr.next; + endrule + + rule init if (!isInitReg); + isInitReg <= True; + lfsr.seed(pack(seed)); + endrule + + method ActionValue#(StreamSize) next(); + outputFifo.deq; + return outputFifo.first; + endmethod +endmodule + +(* doc = "testcase" *) +module mkStreamSplitTb(Empty); + + StreamSplit dut <- mkStreamSplit; + + RandomStreamSize streamSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); + RandomStreamSize splitLocationRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_2)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR)-1)); + + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) testRoundReg <- mkReg(0); + + FIFOF#(StreamSize) ideaTotalSizeFifo <- mkSizedFIFOF(valueOf(TEST_IDEAL_FIFO_DEPTH)); + FIFOF#(StreamSize) ideaSplitSizeFifo <- mkSizedFIFOF(valueOf(TEST_IDEAL_FIFO_DEPTH)); + + Reg#(StreamSize) streamSize2PutReg <- mkReg(0); + Reg#(StreamSize) totalRecvSizeReg <- mkReg(0); + + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(Bool) hasRecvFirstChunkReg <- mkReg(False); + + Bool logDetailEn = unpack(fromInteger(valueOf(LOG_DETAILS_EN))); + + rule testInit if (!isInitReg); + isInitReg <= True; + $display("INFO: start mkStreamSplitTb!"); + endrule + + rule testInput if (isInitReg && testCntReg < fromInteger(valueOf(TEST_NUM))); + // First Frame + if (streamSize2PutReg == 0) begin + let size <- streamSizeRandomValue.next; + let splitLocation <- splitLocationRandomValue.next; + if (splitLocation < size) begin + let isLast = size <= getMaxFrameSize(); + let firstSize = isLast ? size : getMaxFrameSize(); + let stream = generatePsuedoStream(firstSize, True, isLast); + dut.splitLocationFifoIn.enq(splitLocation); + dut.inputStreamFifoIn.enq(stream); + ideaTotalSizeFifo.enq(size); + ideaSplitSizeFifo.enq(splitLocation); + streamSize2PutReg <= size - firstSize; + if (logDetailEn) begin + $display("INFO: Add input stream size %d, split at %d", size, splitLocation); + end + end + end + else begin + let isLast = streamSize2PutReg <= getMaxFrameSize(); + let size = isLast ? streamSize2PutReg : getMaxFrameSize(); + let stream = generatePsuedoStream(size, False, isLast); + dut.inputStreamFifoIn.enq(stream); + streamSize2PutReg <= streamSize2PutReg - size; + end + endrule + + rule testOutput if (isInitReg); + let outStream = dut.outputStreamFifoOut.first; + dut.outputStreamFifoOut.deq; + StreamSize totalSize = totalRecvSizeReg + unpack(zeroExtend(convertByteEn2BytePtr(outStream.byteEn))); + if (outStream.isLast) begin + if (hasRecvFirstChunkReg) begin + immAssert( + (totalSize == ideaTotalSizeFifo.first), + "outStream total length check @ mkStreamSplitTb", + $format("Wrong total length, ideaLen=%d, realLen=%d \n", ideaTotalSizeFifo.first, totalSize) + ); + if (logDetailEn) begin + $display("INFO: receive total size", totalSize); + end + ideaTotalSizeFifo.deq; + testCntReg <= testCntReg + 1; + hasRecvFirstChunkReg <= False; + totalRecvSizeReg <= 0; + end + else begin + immAssert( + (totalSize == ideaSplitSizeFifo.first), + "outStream split location check @ mkStreamSplitTb", + $format("Wrong split location, ideaLen=%d, realLen=%d \n", ideaSplitSizeFifo.first, totalSize) + ); + if (logDetailEn) begin + $display("INFO: receive first chunk at %d, total size %d", ideaSplitSizeFifo.first, ideaTotalSizeFifo.first); + end + ideaSplitSizeFifo.deq; + hasRecvFirstChunkReg <= True; + totalRecvSizeReg <= totalSize; + end + end + else begin + totalRecvSizeReg <= totalSize; + end + endrule + + rule testFinish; + if (testCntReg == fromInteger(valueOf(TEST_NUM)-1)) begin + $display("INFO: end mkStreamSplitTb"); + $finish(); + end + endrule + +endmodule + +module mkStreamShiftTb(Empty); + RandomStreamSize streamSizeRandomValue <- mkRandomStreamSize(fromInteger(valueOf(SEED_1)), fromInteger(valueOf(MAX_STREAM_SIZE_PTR))); + Vector#(TAdd#(BYTE_EN_WIDTH, 1), FIFOF#(StreamSize)) setSizeFifo <- replicateM(mkSizedFIFOF(10)); + Vector#(TAdd#(BYTE_EN_WIDTH, 1), StreamPipe) duts = newVector; + for (DataBytePtr idx = 0; idx <= getMaxBytePtr; idx = idx + 1) begin + duts[idx] <- mkStreamShift(idx); + end + + Reg#(Bool) isInitReg <- mkReg(False); + Reg#(UInt#(32)) testCntReg <- mkReg(0); + Reg#(UInt#(32)) testRoundReg <- mkReg(0); + Reg#(StreamSize) remainSizeReg <- mkReg(0); + Reg#(UInt#(32)) recvNumReg <- mkReg(0); + + UInt#(32) testCnt = fromInteger(valueOf(TEST_NUM)); + Bool logDetailEn = unpack(fromInteger(valueOf(LOG_DETAILS_EN))); + + rule testInit if (!isInitReg); + isInitReg <= True; + $display("INFO: Start StreamShift test"); + endrule + + rule testInput if (isInitReg && testCntReg < testCnt); + if (testRoundReg == 0) begin + let size <- streamSizeRandomValue.next; + if (logDetailEn) begin + $display("INFO: mkStreamShiftTb input stream size ", size); + end + testRoundReg <= size / getMaxFrameSize; + Bool isLast = (size <= getMaxFrameSize); + let firstSize = isLast ? size : getMaxFrameSize; + let testStream = generatePsuedoStream(firstSize, True, isLast); + remainSizeReg <= size - firstSize; + testCntReg <= testCntReg + 1; + for (DataBytePtr idx = 0; idx <= getMaxBytePtr; idx = idx + 1) begin + setSizeFifo[idx].enq(size); + duts[idx].streamFifoIn.enq(testStream); + end + end + else begin + Bool isLast = (remainSizeReg <= getMaxFrameSize); + let size = isLast ? remainSizeReg : getMaxFrameSize; + remainSizeReg <= remainSizeReg - size; + let testStream = generatePsuedoStream(size, False, isLast); + testRoundReg <= testRoundReg - 1; + if (size > 0) begin + for (DataBytePtr idx = 0; idx <= getMaxBytePtr; idx = idx + 1) begin + duts[idx].streamFifoIn.enq(testStream); + end + end + end + endrule + + rule testFinish if (isInitReg && testCntReg == testCnt); + $display("INFO: End StreamShift test!"); + $finish(); + endrule + + for (DataBytePtr shiftOffset = 0; shiftOffset <= getMaxBytePtr; shiftOffset = shiftOffset + 1) begin + StreamPipe dut = duts[shiftOffset]; + + rule testOutput if (isInitReg); + let shiftStream = dut.streamFifoOut.first; + dut.streamFifoOut.deq; + let ideaSize = setSizeFifo[shiftOffset].first; + let refStream = getEmptyStream; + if (shiftStream.isFirst) begin + let firstSize = ideaSize > getMaxFrameSize ? getMaxFrameSize : ideaSize; + refStream = generatePsuedoStream(firstSize, True, False); + refStream.byteEn = refStream.byteEn << shiftOffset; + DataBitPtr dataShiftOffset = zeroExtend(shiftOffset) << valueOf(BYTE_WIDTH_WIDTH); + refStream.data = refStream.data << dataShiftOffset; + end + else if (shiftStream.isLast) begin + let oriLastSize = ideaSize % fromInteger(valueOf(BYTE_EN_WIDTH)); + let lastSize = oriLastSize + unpack(zeroExtend(shiftOffset)); + lastSize = (lastSize > getMaxFrameSize) ? (lastSize - getMaxFrameSize) : lastSize; + lastSize = (lastSize == 0) ? getMaxFrameSize : lastSize; + refStream = generatePsuedoStream(lastSize, False, True); + end + else begin + refStream = generatePsuedoStream(getMaxFrameSize, False, False); + end + if (shiftStream.isLast) begin + setSizeFifo[shiftOffset].deq; + if (shiftOffset == getMaxBytePtr) begin + if (logDetailEn) begin + $display("INFO: StreamShift test epoch %d end!", ideaSize); + end + end + end + immAssert( + (refStream.data == shiftStream.data && refStream.byteEn == shiftStream.byteEn), + "shift stream check @ mkStreamShiftTb", + $format("streamSize:%d, shiftOffset: %d\n", ideaSize, shiftOffset, "shiftStream", fshow(shiftStream), "refStream", fshow(refStream)) + ); + endrule + end +endmodule diff --git a/test_pci.py b/test_pci.py new file mode 100644 index 0000000..ef1babb --- /dev/null +++ b/test_pci.py @@ -0,0 +1,294 @@ +import ctypes +import os +import random +import mmap +import struct +import time + +def va_to_pa(va): + page_size = os.sysconf(os.sysconf_names['SC_PAGESIZE']) + # page_size = 2*1024*1024 + page_offset = va % page_size + pagemap_entry_offset = (va // page_size) * 8 # 每个条目8字节 + + try: + with open('/proc/self/pagemap', 'rb') as f: + f.seek(pagemap_entry_offset) + entry_bytes = f.read(8) + if len(entry_bytes) != 8: + raise ValueError("Invalid pagemap entry") + + entry = int.from_bytes(entry_bytes, byteorder='little') + if not (entry & (1 << 63)): # 检查页面是否在内存中 + raise ValueError("Page not present in physical memory") + + pfn = entry & 0x7FFFFFFFFFFFFF # 提取PFN + print(f"pfn={hex(pfn)}") + return (pfn * page_size) + page_offset + + except IOError as e: + raise RuntimeError(f"Failed to access pagemap: {e}") + + +# 定义 mmap 相关常量 +PROT_READ = 1 +PROT_WRITE = 2 +MAP_SHARED = 0x01 +MAP_HUGETLB = 0x40000 # 巨页内存标志 +MAP_LOCKED = 0x02000 +MAP_ANONYMOUS = 0x20 + +# 定义 mmap 函数 +libc = ctypes.CDLL("libc.so.6") +cmmap = libc.mmap +cmmap.restype = ctypes.c_void_p +cmmap.argtypes = ( + ctypes.c_void_p, ctypes.c_size_t, + ctypes.c_int, ctypes.c_int, + ctypes.c_int, ctypes.c_long +) + +# 申请 2MB 巨页内存 +size = 2 * 1024 * 1024 # 2MB +addr = cmmap( + 0, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_LOCKED, + -1, 0 +) + +if addr == -1: + raise OSError("Failed to allocate huge page memory") + +os.system("setpci -s 02:00.0 COMMAND=0x02") +os.system("setpci -s 02:00.0 98.b=0x16") # 98 = 0x70(base) + 0x28(DevCtl2 offset), 0x16 means disable completion timeout + + +# 使用内存(示例) + +va_src = addr +va_dst = addr + 1024*1024 + +src_buffer = (ctypes.c_char * size).from_address(va_src) +dst_buffer = (ctypes.c_char * size).from_address(va_dst) + + +def test_throughput(): + for offset in range(0, 1024*1024, 4): + src_buffer[offset:offset + 4] = (offset//4).to_bytes(4, byteorder="little") + dst_buffer[offset:offset + 4] = (0).to_bytes(4, byteorder="little") + + + src_buffer[:5] = b'Hello' # 写入数据 + print(src_buffer[:10]) # 读取数据 + dst_buffer[:5] = b'world' # 写入数据 + print(dst_buffer[:10]) + + pa_src = va_to_pa(addr) # + 2 + + pa_dst = va_to_pa(addr + 1024*1024) # + 3 + + req_size = 4096 + stride_size = 0 + stride_cnt = 32 + + double_channel_offset = 1024*512 # double channel test enabled + # double_channel_offset = 0 # double channel test disabled + + with open('/sys/bus/pci/devices/0000:02:00.0/resource1', 'r+b') as f: + # 将文件映射到内存 + with mmap.mmap(f.fileno(), 0) as mm: + + struct.pack_into('> 32) + struct.pack_into('> 32) + struct.pack_into('> 32) + struct.pack_into('> 32) + struct.pack_into('> 32) + struct.pack_into('> 32) + struct.pack_into('