@@ -2,9 +2,13 @@ package caches.hardware.pipelined
22
33import caches .hardware .reppol ._
44import chisel3 ._
5+ import chisel3 .util ._
56
67/**
78 * Top level module for synthesizing the shared pipelined cache.
9+ * The top level module uses shift registers to serialize input and combines the cache's output signals
10+ * in to a single signal, in order to reduce the number of top-level pins while preventing the synthesis tool from
11+ * synthesizing away internal logic.
812 */
913class SharedPipelinedCacheSynthTop (
1014 sizeInBytes : Int ,
@@ -20,6 +24,9 @@ class SharedPipelinedCacheSynthTop(
2024 nHalfMissCmds : Option [Int ] = None ,
2125 ) extends Module {
2226 private val coreDataWidth = 32
27+ private val memDataWidth = memBeatSize * 8
28+ private val coreByteEnWidth = coreDataWidth / 8
29+ private val memByteEnWidth = memDataWidth / 8
2330 private val coreBurstLen = bytesPerSubBlock / (coreDataWidth / 8 )
2431
2532 val l2CacheGen = () => new SharedPipelinedCache (
@@ -47,32 +54,82 @@ class SharedPipelinedCacheSynthTop(
4754 l2Cache = l2CacheGen
4855 ))
4956
50- val io = IO (new OcpCacheWrapperPort (nCores, addrWidth, coreDataWidth, coreBurstLen, memBeatSize * 8 , memBurstLen, l2Cache.l2SchedulerDataWidth))
57+ private val schedulerAddrWidth = log2Up(nCores)
58+ private val schedulerDataWidth = l2Cache.l2SchedulerDataWidth
59+ private val schedulerByteEnWidth = schedulerDataWidth / 8
5160
52- l2Cache.io.mem <> io.mem
53- l2Cache.io.scheduler <> io.scheduler
54- l2Cache.io.cores <> io.cores
61+ val io = IO (new Bundle {
62+ val serialInCores = Input (UInt (1 .W ))
63+ val serialInMem = Input (UInt (1 .W ))
64+ val serialInScheduler = Input (UInt (1 .W ))
65+ val reducedOut = Output (UInt (1 .W ))
66+ })
67+
68+ // Input shift registers
69+ private val coreInShiftRegisterWidth = 3 + addrWidth + coreDataWidth + coreByteEnWidth + 1
70+ val coresInShiftReg = Reg (UInt ((nCores * coreInShiftRegisterWidth).W ))
71+ val memInShiftReg = Reg (UInt ((2 + memDataWidth + 1 + 1 ).W ))
72+ val schedulerInShiftReg = Reg (UInt ((3 + schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth).W ))
73+
74+ // Output registers
75+ val coresOutReg = Reg (UInt ((nCores * (2 + coreDataWidth + 1 + 1 )).W ))
76+ val memOutReg = Reg (UInt ((3 + addrWidth + memDataWidth + memByteEnWidth + 1 ).W ))
77+ val schedulerOutReg = Reg (UInt ((2 + schedulerDataWidth).W ))
78+
79+ // Provide input serially through shift registers to reduce number of input pins for synthesis
80+ coresInShiftReg := Cat (coresInShiftReg(coresInShiftReg.getWidth - 2 , 0 ), io.serialInCores)
81+ memInShiftReg := Cat (memInShiftReg(memInShiftReg.getWidth - 2 , 0 ), io.serialInMem)
82+ schedulerInShiftReg := Cat (schedulerInShiftReg(schedulerInShiftReg.getWidth - 2 , 0 ), io.serialInScheduler)
83+
84+ // Core interfaces
85+ val coresOut = VecInit (Seq .fill(nCores)(WireDefault (0 .U ((2 + coreDataWidth + 1 + 1 ).W ))))
86+ for (core <- 0 until nCores) {
87+ val coreOffset = core * coreInShiftRegisterWidth
88+
89+ l2Cache.io.cores(core).M .Cmd := coresInShiftReg(3 + addrWidth + coreDataWidth + coreByteEnWidth + coreOffset, addrWidth + coreDataWidth + coreByteEnWidth + 1 + coreOffset)
90+ l2Cache.io.cores(core).M .Addr := coresInShiftReg(addrWidth + coreDataWidth + coreByteEnWidth + coreOffset, coreDataWidth + coreByteEnWidth + 1 + coreOffset)
91+ l2Cache.io.cores(core).M .Data := coresInShiftReg(coreDataWidth + coreByteEnWidth + coreOffset, coreByteEnWidth + 1 + coreOffset)
92+ l2Cache.io.cores(core).M .DataByteEn := coresInShiftReg(coreByteEnWidth + coreOffset, 1 + coreOffset)
93+ l2Cache.io.cores(core).M .DataValid := coresInShiftReg(coreOffset)
94+
95+ coresOut(core) := Cat (l2Cache.io.cores(core).S .Resp , l2Cache.io.cores(core).S .Data , l2Cache.io.cores(core).S .CmdAccept , l2Cache.io.cores(core).S .DataAccept )
96+ }
97+
98+ coresOutReg := Cat (coresOut)
99+
100+ // Memory interface
101+ l2Cache.io.mem.S .Resp := memInShiftReg((2 + memDataWidth + 2 ) - 1 , memDataWidth + 2 )
102+ l2Cache.io.mem.S .Data := memInShiftReg((memDataWidth + 2 ) - 1 , 2 )
103+ l2Cache.io.mem.S .CmdAccept := memInShiftReg(1 )
104+ l2Cache.io.mem.S .DataAccept := memInShiftReg(0 )
105+
106+ memOutReg := Cat (l2Cache.io.mem.M .Cmd , l2Cache.io.mem.M .Addr , l2Cache.io.mem.M .Data , l2Cache.io.mem.M .DataByteEn , l2Cache.io.mem.M .DataValid )
107+
108+ // Scheduler interface
109+ l2Cache.io.scheduler.M .Cmd := schedulerInShiftReg((3 + schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth) - 1 , schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth)
110+ l2Cache.io.scheduler.M .Addr := schedulerInShiftReg((schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth) - 1 , schedulerDataWidth + schedulerByteEnWidth)
111+ l2Cache.io.scheduler.M .Data := schedulerInShiftReg((schedulerDataWidth + schedulerByteEnWidth) - 1 , schedulerByteEnWidth)
112+ l2Cache.io.scheduler.M .ByteEn := schedulerInShiftReg(schedulerByteEnWidth - 1 , 0 )
113+
114+ schedulerOutReg := Cat (l2Cache.io.scheduler.S .Resp , l2Cache.io.scheduler.S .Data )
115+
116+ // Combine all the retimed output signals using an XOR operation so the synthesis tool doesn't remove logic
117+ io.reducedOut := RegNext (RegNext (Cat (coresOutReg, memOutReg, schedulerOutReg).xorR))
55118}
56119
57120object SharedPipelinedCacheSynthTop extends App {
58- // val l2Size = 524288 // 512 KiB
59- // val l2Size = 262144 // 256 KiB
60- // val l2Size = 131072 // 128 KiB
61121 val l2Size = 65536 // 64 KiB
62- // val l2Size = 16384 // 16 KiB
63122 val nWays = 8
64- val nCores = 2
123+ val nCores = 4
65124 val addressWidth = 32
66125 val bytesPerBlock = 64
67126 val bytesPerSubBlock = 16
68127 val memBeatSize = 4
69128 val memBurstLen = 4
70129
71130 val l2nSets = l2Size / (nWays * bytesPerBlock)
72- val l2RepPolicy = () => new BitPlruReplacementPolicy (nWays, l2nSets, nCores)
73- // val l2RepPolicy = () => new TreePlruReplacementPolicy(nWays, l2nSets, nCores)
74- // val l2RepPolicy = () => new TimeoutReplacementPolicy(nWays, l2nSets, nCores, BasePolicies.BIT_PLRU, repSetFormat = new MruFormat)
75- // val l2RepPolicy = () => new ContentionReplacementPolicy(nWays, l2nSets, nCores, BasePolicies.BIT_PLRU, true, true, true, repSetFormat = new MruFormat)
131+ // val l2RepPolicy = () => new BitPlruReplacementPolicy(nWays, l2nSets, nCores)
132+ val l2RepPolicy = () => new ContentionReplacementPolicy (nWays, l2nSets, nCores, BasePolicies .BIT_PLRU , true , true , true , repSetFormat = new MruFormat )
76133
77134 println(" Generating the L2 cache hardware for synthesis..." )
78135 (new chisel3.stage.ChiselStage ).emitVerilog(
0 commit comments