可编程逻辑
因为在做3*3卷积的时候,图像大小会变小,具体计算公式如下

其中O是输出特征图的大小,I是输入特征图的大小,P是Padding的大小,K是卷积核的大小,S是指Stride的大小,当K的值是3,P的值是1,S的值也是1,的时候O的值和I的值相等。
为了保持输出图像的大小在经过卷积后和输入的大小一样,我们需要进行Padding操作,在这里我采用了复制周围一圈的方式来完成。

采用python完成Sobel算法的参考模型
import cv2 as cv
import numpy as np
img = cv.imread(r"G:shiyanIDc.jpg")
img_gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
h, w = img_gray.shape
img_padding = np.zeros((h + 2, w + 2), np.uint8)
img_padding[1:h + 1, 1:w + 1] = img_gray
img_padding[0:1, 1:w + 1] = img_gray[0:1, :]
img_padding[h + 1:h + 2, 1:w + 1] = img_gray[h - 1:h, :]
img_padding[:, 0:1] = img_padding[:, 1:2]
img_padding[:, w + 1:w + 2] = img_padding[:, w:w + 1]
th = 200
sobel_rf = np.zeros((h, w), np.uint8)
for i in range(1, h):
for j in range(1, w):
gx1 = img_padding[i - 1][j + 1] + 2 * img_padding[i][j + 1] + img_padding[i + 1][j + 1]
gx2 = img_padding[i - 1][j - 1] + 2 * img_padding[i][j - 1] + img_padding[i + 1][j - 1]
gy1 = img_padding[i - 1][j - 1] + 2 * img_padding[i - 1][j] + img_padding[i - 1][j + 1]
gy2 = img_padding[i + 1][j - 1] + 2 * img_padding[i + 1][j] + img_padding[i + 1][j + 1]
gx = abs(gx1 - gx2)
gy = abs(gy1 - gy2)
if gx + gy > th:
sobel_rf[i - 1][j - 1] = 255
else:
sobel_rf[i - 1][j - 1] = 0
cv.imshow("sobel_rf", sobel_rf)
cv.imshow("src", img_gray)
cv.waitKey()
cv.destroyAllWindows()
根据算法模型完成HDL:提供SpinalHDL源码
import spinal.core._
import spinal.lib._
class Sobel(th: Int, imageColNum: Int, imageRowNum: Int) extends Component {
val io = new Bundle {
val dataIn = slave(ImageStream(8, imageColNum, imageRowNum, 1))
val dataOut = master(ImageStream(8, imageColNum, imageRowNum, 1))
}
noIoPrefix()
val genMatrix = new GenMatrix(scala.math.pow(2, log2Up(imageColNum)).toInt, imageColNum, imageRowNum)
genMatrix.io.dataIn <> io.dataIn
val genMatrixOut = ImageStream(8, imageColNum, imageRowNum, 9)
genMatrixOut := genMatrix.io.dataOut
val GX1 = RegNext(genMatrixOut.data(0).asUInt +^ (genMatrixOut.data(1) ## B"1'b0").asUInt +^ genMatrixOut.data(2).asUInt)
val GX2 = RegNext(genMatrixOut.data(6).asUInt +^ (genMatrixOut.data(7) ## B"1'b0").asUInt +^ genMatrixOut.data(8).asUInt)
val GX = Reg(UInt(11 bits))
val GY1 = RegNext(genMatrixOut.data(6).asUInt +^ (genMatrixOut.data(3) ## B"1'b0").asUInt +^ genMatrixOut.data(0).asUInt)
val GY2 = RegNext(genMatrixOut.data(8).asUInt +^ (genMatrixOut.data(5) ## B"1'b0").asUInt +^ genMatrixOut.data(2).asUInt)
val GY = Reg(UInt(11 bits))
when(GX1 > GX2) {
GX := GX1 - GX2
} otherwise {
GX := GX2 - GX1
}
when(GY1 > GY2) {
GY := GY1 - GY2
} otherwise {
GY := GY2 - GY1
}
val G = RegNext(GX + GY)
val sobelOut = Reg(Bits(8 bits))
when(G > th) {
sobelOut := 255
} otherwise {
sobelOut := 0
}
io.dataOut.data(0) := sobelOut
io.dataOut.row := Delay(genMatrixOut.row, 4)
io.dataOut.col := Delay(genMatrixOut.col, 4)
io.dataOut.c.hsync := Delay(genMatrixOut.c.hsync, 4,init = False)
io.dataOut.c.vsync := Delay(genMatrixOut.c.vsync, 4,init = False)
io.dataOut.c.de := Delay(genMatrixOut.c.de, 4,init = False)
}
object Sobel extends App {
SpinalConfig().generateVerilog(new Sobel(200, 640, 480))
}
仿真代码:
import spinal.lib._
import spinal.core._
import spinal.core.sim._
import scala.collection.mutable.Queue
import java.io.FileOutputStream
import scala.io.Source
class tbSobelC(th: Int) extends Sobel(th, 430, 430) {
var src = Array[String]()
var destDut = Array[String]()
var destRef = Array[String]()
// var srcLen = 0
var width = Array[Int]()
var high = Array[Int]()
val dutData = Queue[Int]()
val refData = Queue[Int]()
var frameLen = 0
def init(srcFile: Array[String], destDutFile: Array[String], destRefFile: Array[String], imgShape: Array[(Int, Int)]) = {
clockDomain.forkStimulus(10)
io.dataIn.data(0) #= 0
io.dataIn.row #= 0
io.dataIn.col #= 0
src = srcFile
destDut = destDutFile
destRef = destRefFile
io.dataIn.c.de #= false
io.dataIn.c.vsync #= false
io.dataIn.c.hsync #= false
frameLen = src.length
width = imgShape.map(i => i._1)
high = imgShape.map(i => i._2)
clockDomain.waitSampling(10)
}
def frame(src: String, width: Int, high: Int) = {
val srcFile = Source.fromFile(src)
val srcData = srcFile.getLines()
var colCnt = 0
var rowCnt = 0
io.dataIn.row #= width
io.dataIn.col #= high
io.dataIn.c.de #= false
io.dataIn.c.vsync #= false
io.dataIn.c.hsync #= false
clockDomain.waitSampling(20)
while (srcData.hasNext) {
val data = srcData.next()
io.dataIn.data(0) #= data.toInt
io.dataIn.c.de #= true
if (colCnt == 0 && rowCnt == 0) {
io.dataIn.c.vsync #= true
println("xx")
} else {
io.dataIn.c.vsync #= false
}
if (colCnt == width - 1 && rowCnt == high - 1) {
clockDomain.waitSampling(1)
io.dataIn.c.de #= false
clockDomain.waitSampling(200)
}
if (colCnt == 0) {
io.dataIn.c.hsync #= true
} else {
io.dataIn.c.hsync #= false
}
if (colCnt == width - 1 && rowCnt != high - 1) {
clockDomain.waitSampling(1)
io.dataIn.c.de #= false
clockDomain.waitSampling(20)
}
if (colCnt == width - 1) {
colCnt = 0
if (rowCnt == high - 1) {
rowCnt = 0
} else {
rowCnt = rowCnt + 1
}
} else {
colCnt = colCnt + 1
}
clockDomain.waitSampling()
}
clockDomain.waitSampling(1000)
srcFile.close()
}
def driver = {
val dri = fork {
for (i <- 0 until frameLen) {
println(s"frame = ${i}")
frame(src(i), width(i), high(i))
}
}
}
def dutOut = {
val dutOutFile = new FileOutputStream(destDut(0))
val d = fork {
while (true) {
if (io.dataOut.c.de.toBoolean) {
dutData.enqueue(io.dataOut.data(0).toInt)
dutOutFile.write((io.dataOut.data(0).toInt.toString + "
").getBytes())
}
clockDomain.waitSampling()
}
}
}
def refFun = {
val d = fork {
while (true) {
for (i <- 0 until frameLen) {
val file = Source.fromFile(destRef(i))
val srcData = file.getLines()
while (srcData.hasNext) {
clockDomain.waitSampling()
val data = srcData.next().toInt
refData.enqueue(data)
}
}
}
}
}
def scoreBoard = {
val d = fork {
var index = 0
while (true) {
while (dutData.nonEmpty && refData.nonEmpty) {
clockDomain.waitSampling()
val dut = dutData.dequeue()
val ref = refData.dequeue()
// if(dut != ref){
// println(s"i:${index} dutData:${dut} refData:${ref}")
// }
index = index + 1
assert(scala.math.abs(ref - dut) < 5, s"index:${index}, dutData:${dut} refData:${ref}")
// if (scala.math.abs(ref - dut) != 0) {
// println(s"ref = ${ref} , dut = ${dut}")
// }
}
clockDomain.waitSampling()
}
}
}
def waitSimDone = {
val d = fork {
var index = 0
while (index < width(0) * high(0)) {
clockDomain.waitSampling()
if (io.dataOut.c.de.toBoolean) {
index = index + 1
}
}
clockDomain.waitSampling(3000)
simSuccess()
}.join()
}
}
class tbSobel {
val testFile = Array("testGray.txt")
val dutFile = Array("testDut.txt")
val refFile = Array("testSobel.txt")
val imgShape = Array((430, 430))
val dut = SimConfig.withConfig(SpinalConfig(inlineRom = true)).withWave.compile(new tbSobelC(100))
dut.doSim { dut =>
dut.init(testFile, dutFile, refFile, imgShape)
dut.driver
dut.refFun
dut.dutOut
dut.scoreBoard
dut.waitSimDone
}
}
object tbSobel extends App {
val tb = new tbSobel
}

经过分析之后,该代码可以跑到238MHz,占用330LUT,312FF。
审核编辑:刘清
全部0条评论
快来发表一下你的评论吧 !