FPGA图像处理-Sobel边缘检测原理

HKJD8885 2023-07-30 791

可编程逻辑

1367人已加入

描述

因为在做3*3卷积的时候，图像大小会变小，具体计算公式如下

python

其中O是输出特征图的大小，I是输入特征图的大小，P是Padding的大小，K是卷积核的大小，S是指Stride的大小，当K的值是3，P的值是1，S的值也是1，的时候O的值和I的值相等。

为了保持输出图像的大小在经过卷积后和输入的大小一样，我们需要进行Padding操作，在这里我采用了复制周围一圈的方式来完成。

python

采用python完成Sobel算法的参考模型

import cv2 as cv
import numpy as np


img = cv.imread(r"G:shiyanIDc.jpg")
img_gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
h, w = img_gray.shape
img_padding = np.zeros((h + 2, w + 2), np.uint8)
img_padding[1:h + 1, 1:w + 1] = img_gray
img_padding[0:1, 1:w + 1] = img_gray[0:1, :]
img_padding[h + 1:h + 2, 1:w + 1] = img_gray[h - 1:h, :]
img_padding[:, 0:1] = img_padding[:, 1:2]
img_padding[:, w + 1:w + 2] = img_padding[:, w:w + 1]


th = 200
sobel_rf = np.zeros((h, w), np.uint8)
for i in range(1, h):
    for j in range(1, w):
        gx1 = img_padding[i - 1][j + 1] + 2 * img_padding[i][j + 1] + img_padding[i + 1][j + 1]
        gx2 = img_padding[i - 1][j - 1] + 2 * img_padding[i][j - 1] + img_padding[i + 1][j - 1]
        gy1 = img_padding[i - 1][j - 1] + 2 * img_padding[i - 1][j] + img_padding[i - 1][j + 1]
        gy2 = img_padding[i + 1][j - 1] + 2 * img_padding[i + 1][j] + img_padding[i + 1][j + 1]
        gx = abs(gx1 - gx2)
        gy = abs(gy1 - gy2)
        if gx + gy > th:
            sobel_rf[i - 1][j - 1] = 255
        else:
            sobel_rf[i - 1][j - 1] = 0
cv.imshow("sobel_rf", sobel_rf)
cv.imshow("src", img_gray)
cv.waitKey()
cv.destroyAllWindows()

根据算法模型完成HDL：提供SpinalHDL源码

import spinal.core._
import spinal.lib._


class Sobel(th: Int, imageColNum: Int, imageRowNum: Int) extends Component {
  val io = new Bundle {
    val dataIn = slave(ImageStream(8, imageColNum, imageRowNum, 1))
    val dataOut = master(ImageStream(8, imageColNum, imageRowNum, 1))
  }
  noIoPrefix()
  val genMatrix = new GenMatrix(scala.math.pow(2, log2Up(imageColNum)).toInt, imageColNum, imageRowNum)
  genMatrix.io.dataIn <> io.dataIn


  val genMatrixOut = ImageStream(8, imageColNum, imageRowNum, 9)
  genMatrixOut := genMatrix.io.dataOut


  val GX1 = RegNext(genMatrixOut.data(0).asUInt +^ (genMatrixOut.data(1) ## B"1'b0").asUInt +^ genMatrixOut.data(2).asUInt)
  val GX2 = RegNext(genMatrixOut.data(6).asUInt +^ (genMatrixOut.data(7) ## B"1'b0").asUInt +^ genMatrixOut.data(8).asUInt)
  val GX = Reg(UInt(11 bits))


  val GY1 = RegNext(genMatrixOut.data(6).asUInt +^ (genMatrixOut.data(3) ## B"1'b0").asUInt +^ genMatrixOut.data(0).asUInt)
  val GY2 = RegNext(genMatrixOut.data(8).asUInt +^ (genMatrixOut.data(5) ## B"1'b0").asUInt +^ genMatrixOut.data(2).asUInt)
  val GY = Reg(UInt(11 bits))


  when(GX1 > GX2) {
    GX := GX1 - GX2
  } otherwise {
    GX := GX2 - GX1
  }


  when(GY1 > GY2) {
    GY := GY1 - GY2
  } otherwise {
    GY := GY2 - GY1
  }


  val G = RegNext(GX + GY)
  val sobelOut = Reg(Bits(8 bits))
  when(G > th) {
    sobelOut := 255
  } otherwise {
    sobelOut := 0
  }
  io.dataOut.data(0) := sobelOut
  io.dataOut.row := Delay(genMatrixOut.row, 4)
  io.dataOut.col := Delay(genMatrixOut.col, 4)
  io.dataOut.c.hsync := Delay(genMatrixOut.c.hsync, 4,init = False)
  io.dataOut.c.vsync := Delay(genMatrixOut.c.vsync, 4,init = False)
  io.dataOut.c.de := Delay(genMatrixOut.c.de, 4,init = False)


}


object Sobel extends App {
  SpinalConfig().generateVerilog(new Sobel(200, 640, 480))
}

仿真代码：

import spinal.lib._
import spinal.core._
import spinal.core.sim._


import scala.collection.mutable.Queue
import java.io.FileOutputStream
import scala.io.Source


class tbSobelC(th: Int) extends Sobel(th, 430, 430) {
  var src = Array[String]()
  var destDut = Array[String]()
  var destRef = Array[String]()
  //  var srcLen = 0
  var width = Array[Int]()
  var high = Array[Int]()


  val dutData = Queue[Int]()
  val refData = Queue[Int]()


  var frameLen = 0




  def init(srcFile: Array[String], destDutFile: Array[String], destRefFile: Array[String], imgShape: Array[(Int, Int)]) = {
    clockDomain.forkStimulus(10)
    io.dataIn.data(0) #= 0
    io.dataIn.row #= 0
    io.dataIn.col #= 0


    src = srcFile
    destDut = destDutFile
    destRef = destRefFile
    io.dataIn.c.de #= false
    io.dataIn.c.vsync #= false
    io.dataIn.c.hsync #= false


    frameLen = src.length


    width = imgShape.map(i => i._1)
    high = imgShape.map(i => i._2)


    clockDomain.waitSampling(10)
  }


  def frame(src: String, width: Int, high: Int) = {


    val srcFile = Source.fromFile(src)
    val srcData = srcFile.getLines()
    var colCnt = 0
    var rowCnt = 0
    io.dataIn.row #= width
    io.dataIn.col #= high
    io.dataIn.c.de #= false
    io.dataIn.c.vsync #= false
    io.dataIn.c.hsync #= false
    clockDomain.waitSampling(20)
    while (srcData.hasNext) {
      val data = srcData.next()
      io.dataIn.data(0) #= data.toInt
      io.dataIn.c.de #= true
      if (colCnt == 0 && rowCnt == 0) {
        io.dataIn.c.vsync #= true
        println("xx")
      } else {
        io.dataIn.c.vsync #= false
      }
      if (colCnt == width - 1 && rowCnt == high - 1) {
        clockDomain.waitSampling(1)
        io.dataIn.c.de #= false
        clockDomain.waitSampling(200)
      }
      if (colCnt == 0) {
        io.dataIn.c.hsync #= true
      } else {
        io.dataIn.c.hsync #= false
      }


      if (colCnt == width - 1 && rowCnt != high - 1) {
        clockDomain.waitSampling(1)
        io.dataIn.c.de #= false
        clockDomain.waitSampling(20)
      }


      if (colCnt == width - 1) {
        colCnt = 0
        if (rowCnt == high - 1) {
          rowCnt = 0
        } else {
          rowCnt = rowCnt + 1
        }
      } else {
        colCnt = colCnt + 1
      }


      clockDomain.waitSampling()
    }
    clockDomain.waitSampling(1000)
    srcFile.close()
  }




  def driver = {
    val dri = fork {
      for (i <- 0 until frameLen) {
        println(s"frame = ${i}")
        frame(src(i), width(i), high(i))
      }
    }
  }


  def dutOut = {
    val dutOutFile = new FileOutputStream(destDut(0))
    val d = fork {
      while (true) {
        if (io.dataOut.c.de.toBoolean) {
          dutData.enqueue(io.dataOut.data(0).toInt)
          dutOutFile.write((io.dataOut.data(0).toInt.toString + "
").getBytes())
        }
        clockDomain.waitSampling()
      }
    }
  }


  def refFun = {
    val d = fork {
      while (true) {
        for (i <- 0 until frameLen) {
          val file = Source.fromFile(destRef(i))
          val srcData = file.getLines()
          while (srcData.hasNext) {
            clockDomain.waitSampling()
            val data = srcData.next().toInt
            refData.enqueue(data)
          }
        }
      }
    }
  }


  def scoreBoard = {
    val d = fork {
      var index = 0
      while (true) {
        while (dutData.nonEmpty && refData.nonEmpty) {
          clockDomain.waitSampling()
          val dut = dutData.dequeue()
          val ref = refData.dequeue()
          //          if(dut != ref){
          //            println(s"i:${index} dutData:${dut} refData:${ref}")
          //          }
          index = index + 1
          assert(scala.math.abs(ref - dut) < 5, s"index:${index}, dutData:${dut} refData:${ref}")
          //          if (scala.math.abs(ref - dut) != 0) {
          //            println(s"ref = ${ref} ,  dut = ${dut}")
          //          }
        }
        clockDomain.waitSampling()
      }
    }
  }


  def waitSimDone = {
    val d = fork {
      var index = 0
      while (index < width(0) * high(0)) {
        clockDomain.waitSampling()
        if (io.dataOut.c.de.toBoolean) {
          index = index + 1
        }
      }
      clockDomain.waitSampling(3000)
      simSuccess()
    }.join()
  }
}


class tbSobel {
  val testFile = Array("testGray.txt")
  val dutFile = Array("testDut.txt")
  val refFile = Array("testSobel.txt")
  val imgShape = Array((430, 430))
  
  val dut = SimConfig.withConfig(SpinalConfig(inlineRom = true)).withWave.compile(new tbSobelC(100))
  dut.doSim { dut =>
    dut.init(testFile, dutFile, refFile, imgShape)
    dut.driver
    dut.refFun
    dut.dutOut
    dut.scoreBoard
    dut.waitSimDone
  }
}


object tbSobel extends App {
  val tb = new tbSobel
}

python

经过分析之后，该代码可以跑到238MHz，占用330LUT，312FF。

审核编辑：刘清

打开APP阅读更多精彩内容