Skip to content

Commit 915ea37

Browse files
Reduce number of top-level synthesis module pins
1 parent a4e9fee commit 915ea37

1 file changed

Lines changed: 70 additions & 13 deletions

File tree

src/main/scala/caches/hardware/pipelined/SharedPipelinedCacheSynthTop.scala

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@ package caches.hardware.pipelined
22

33
import caches.hardware.reppol._
44
import chisel3._
5+
import chisel3.util._
56

67
/**
78
* Top level module for synthesizing the shared pipelined cache.
9+
* The top level module uses shift registers to serialize input and combines the cache's output signals
10+
* in to a single signal, in order to reduce the number of top-level pins while preventing the synthesis tool from
11+
* synthesizing away internal logic.
812
*/
913
class SharedPipelinedCacheSynthTop(
1014
sizeInBytes: Int,
@@ -20,6 +24,9 @@ class SharedPipelinedCacheSynthTop(
2024
nHalfMissCmds: Option[Int] = None,
2125
) extends Module {
2226
private val coreDataWidth = 32
27+
private val memDataWidth = memBeatSize * 8
28+
private val coreByteEnWidth = coreDataWidth / 8
29+
private val memByteEnWidth = memDataWidth / 8
2330
private val coreBurstLen = bytesPerSubBlock / (coreDataWidth / 8)
2431

2532
val l2CacheGen = () => new SharedPipelinedCache(
@@ -47,32 +54,82 @@ class SharedPipelinedCacheSynthTop(
4754
l2Cache = l2CacheGen
4855
))
4956

50-
val io = IO(new OcpCacheWrapperPort(nCores, addrWidth, coreDataWidth, coreBurstLen, memBeatSize * 8, memBurstLen, l2Cache.l2SchedulerDataWidth))
57+
private val schedulerAddrWidth = log2Up(nCores)
58+
private val schedulerDataWidth = l2Cache.l2SchedulerDataWidth
59+
private val schedulerByteEnWidth = schedulerDataWidth / 8
5160

52-
l2Cache.io.mem <> io.mem
53-
l2Cache.io.scheduler <> io.scheduler
54-
l2Cache.io.cores <> io.cores
61+
val io = IO(new Bundle {
62+
val serialInCores = Input(UInt(1.W))
63+
val serialInMem = Input(UInt(1.W))
64+
val serialInScheduler = Input(UInt(1.W))
65+
val reducedOut = Output(UInt(1.W))
66+
})
67+
68+
// Input shift registers
69+
private val coreInShiftRegisterWidth = 3 + addrWidth + coreDataWidth + coreByteEnWidth + 1
70+
val coresInShiftReg = Reg(UInt((nCores * coreInShiftRegisterWidth).W))
71+
val memInShiftReg = Reg(UInt((2 + memDataWidth + 1 + 1).W))
72+
val schedulerInShiftReg = Reg(UInt((3 + schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth).W))
73+
74+
// Output registers
75+
val coresOutReg = Reg(UInt((nCores * (2 + coreDataWidth + 1 + 1)).W))
76+
val memOutReg = Reg(UInt((3 + addrWidth + memDataWidth + memByteEnWidth + 1).W))
77+
val schedulerOutReg = Reg(UInt((2 + schedulerDataWidth).W))
78+
79+
// Provide input serially through shift registers to reduce number of input pins for synthesis
80+
coresInShiftReg := Cat(coresInShiftReg(coresInShiftReg.getWidth - 2, 0), io.serialInCores)
81+
memInShiftReg := Cat(memInShiftReg(memInShiftReg.getWidth - 2, 0), io.serialInMem)
82+
schedulerInShiftReg := Cat(schedulerInShiftReg(schedulerInShiftReg.getWidth - 2, 0), io.serialInScheduler)
83+
84+
// Core interfaces
85+
val coresOut = VecInit(Seq.fill(nCores)(WireDefault(0.U((2 + coreDataWidth + 1 + 1).W))))
86+
for (core <- 0 until nCores) {
87+
val coreOffset = core * coreInShiftRegisterWidth
88+
89+
l2Cache.io.cores(core).M.Cmd := coresInShiftReg(3 + addrWidth + coreDataWidth + coreByteEnWidth + coreOffset, addrWidth + coreDataWidth + coreByteEnWidth + 1 + coreOffset)
90+
l2Cache.io.cores(core).M.Addr := coresInShiftReg(addrWidth + coreDataWidth + coreByteEnWidth + coreOffset, coreDataWidth + coreByteEnWidth + 1 + coreOffset)
91+
l2Cache.io.cores(core).M.Data := coresInShiftReg(coreDataWidth + coreByteEnWidth + coreOffset, coreByteEnWidth + 1 + coreOffset)
92+
l2Cache.io.cores(core).M.DataByteEn := coresInShiftReg(coreByteEnWidth + coreOffset, 1 + coreOffset)
93+
l2Cache.io.cores(core).M.DataValid := coresInShiftReg(coreOffset)
94+
95+
coresOut(core) := Cat(l2Cache.io.cores(core).S.Resp, l2Cache.io.cores(core).S.Data, l2Cache.io.cores(core).S.CmdAccept, l2Cache.io.cores(core).S.DataAccept)
96+
}
97+
98+
coresOutReg := Cat(coresOut)
99+
100+
// Memory interface
101+
l2Cache.io.mem.S.Resp := memInShiftReg((2 + memDataWidth + 2) - 1, memDataWidth + 2)
102+
l2Cache.io.mem.S.Data := memInShiftReg((memDataWidth + 2) - 1, 2)
103+
l2Cache.io.mem.S.CmdAccept := memInShiftReg(1)
104+
l2Cache.io.mem.S.DataAccept := memInShiftReg(0)
105+
106+
memOutReg := Cat(l2Cache.io.mem.M.Cmd, l2Cache.io.mem.M.Addr, l2Cache.io.mem.M.Data, l2Cache.io.mem.M.DataByteEn, l2Cache.io.mem.M.DataValid)
107+
108+
// Scheduler interface
109+
l2Cache.io.scheduler.M.Cmd := schedulerInShiftReg((3 + schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth) - 1, schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth)
110+
l2Cache.io.scheduler.M.Addr := schedulerInShiftReg((schedulerAddrWidth + schedulerDataWidth + schedulerByteEnWidth) - 1, schedulerDataWidth + schedulerByteEnWidth)
111+
l2Cache.io.scheduler.M.Data := schedulerInShiftReg((schedulerDataWidth + schedulerByteEnWidth) - 1, schedulerByteEnWidth)
112+
l2Cache.io.scheduler.M.ByteEn := schedulerInShiftReg(schedulerByteEnWidth - 1, 0)
113+
114+
schedulerOutReg := Cat(l2Cache.io.scheduler.S.Resp, l2Cache.io.scheduler.S.Data)
115+
116+
// Combine all the retimed output signals using an XOR operation so the synthesis tool doesn't remove logic
117+
io.reducedOut := RegNext(RegNext(Cat(coresOutReg, memOutReg, schedulerOutReg).xorR))
55118
}
56119

57120
object SharedPipelinedCacheSynthTop extends App {
58-
// val l2Size = 524288 // 512 KiB
59-
// val l2Size = 262144 // 256 KiB
60-
// val l2Size = 131072 // 128 KiB
61121
val l2Size = 65536 // 64 KiB
62-
// val l2Size = 16384 // 16 KiB
63122
val nWays = 8
64-
val nCores = 2
123+
val nCores = 4
65124
val addressWidth = 32
66125
val bytesPerBlock = 64
67126
val bytesPerSubBlock = 16
68127
val memBeatSize = 4
69128
val memBurstLen = 4
70129

71130
val l2nSets = l2Size / (nWays * bytesPerBlock)
72-
val l2RepPolicy = () => new BitPlruReplacementPolicy(nWays, l2nSets, nCores)
73-
// val l2RepPolicy = () => new TreePlruReplacementPolicy(nWays, l2nSets, nCores)
74-
// val l2RepPolicy = () => new TimeoutReplacementPolicy(nWays, l2nSets, nCores, BasePolicies.BIT_PLRU, repSetFormat = new MruFormat)
75-
// val l2RepPolicy = () => new ContentionReplacementPolicy(nWays, l2nSets, nCores, BasePolicies.BIT_PLRU, true, true, true, repSetFormat = new MruFormat)
131+
// val l2RepPolicy = () => new BitPlruReplacementPolicy(nWays, l2nSets, nCores)
132+
val l2RepPolicy = () => new ContentionReplacementPolicy(nWays, l2nSets, nCores, BasePolicies.BIT_PLRU, true, true, true, repSetFormat = new MruFormat)
76133

77134
println("Generating the L2 cache hardware for synthesis...")
78135
(new chisel3.stage.ChiselStage).emitVerilog(

0 commit comments

Comments
 (0)