可编程逻辑
在之前的推文中介绍了冒泡排序的实现,但是分享的代码使用的是SpinalHDL,最近有好多小伙伴后台私信问有没有Verilog版的代码。今天就给大家贴出来,具体原理参考FPGA排序--冒泡排序这篇之前的文章。
仍然以8个8bit的数为例来介绍冒泡排序,因此数据的输入和输出位宽均为64bit(8*8bit),使用valid信号来标识数据有效,整个实现采用流水线的方式。
`timescale 1ns / 1ps module bubble( input clk , input rst , input [63:0] data_in , input data_in_valid , output [63:0] data_out , output data_out_valid ); reg [ 3:0] data_in_valid_ff; reg [63:0] data_in_ff[3:0] ; reg v[7:0][7:0] ; reg [ 1:0] sum_1[7:0][3:0] ; reg [ 2:0] sum_2[7:0][1:0] ; reg [ 3:0] sum_3[7:0] ; reg [7:0] data_out_temp[7:0] ; reg data_out_valid_temp; genvar i; genvar j; always @(posedge clk ) begin if(rst == 1'b1)begin data_in_valid_ff <= 4'b0; end else begin data_in_valid_ff <= {data_in_valid_ff[2:0], data_in_valid}; end end always @(posedge clk ) begin data_in_ff[0] <= data_in; end generate for ( i = 0; i < 3 ; i = i + 1 ) begin : LOOP_DATA_IN always @(posedge clk ) begin data_in_ff[i+1] <= data_in_ff[i]; end end endgenerate generate for ( i = 0 ; i < 8 ; i = i + 1 ) begin : LOOP_V_I for ( j = i ; j < 8 ; j = j + 1) begin : LOOP_V_J always @(posedge clk ) begin if(data_in_valid == 1'b1)begin v[i][j] <= data_in[i*8 +: 8] >= data_in[j*8 +: 8]; v[j][i] <= data_in[i*8 +: 8] < data_in[j*8 +: 8]; end end end end endgenerate generate for ( i = 0 ; i < 8 ; i = i + 1 ) begin : LOOP_SUM_1_I for ( j = 0 ; j < 4 ; j = j + 1) begin : LOOP_SUM_1_J always @(posedge clk ) begin if(data_in_valid_ff[0] == 1'b1)begin sum_1[i][j] <= v[i][j*2] + v[i][j*2 + 1]; end end end end endgenerate generate for ( i = 0 ; i < 8 ; i = i + 1 ) begin : LOOP_SUM_2_I for ( j = 0 ; j < 2 ; j = j + 1) begin : LOOP_SUM_2_J always @(posedge clk ) begin if(data_in_valid_ff[1] == 1'b1)begin sum_2[i][j] <= sum_1[i][j*2] + sum_1[i][j*2 + 1]; end end end end endgenerate generate for ( i = 0 ; i < 8 ; i = i + 1 ) begin : LOOP_SUM_3_I always @(posedge clk ) begin if(data_in_valid_ff[2] == 1'b1)begin sum_3[i] <= sum_2[i][0] + sum_2[i][1]; end end end endgenerate always @(posedge clk ) begin : LOOP_DATA_OUT_TEMP_CLK integer k; for ( k = 0; k < 8; k = k + 1) begin : LOOP_DATA_OUT_TEMP if(data_in_valid_ff[3] == 1'b1)begin data_out_temp[sum_3[k]] <= data_in_ff[3][k*8 +: 8]; data_out_valid_temp <= 1'b1; end else begin data_out_temp[sum_3[k]] <= 8'd0; data_out_valid_temp <= 1'b0; end end end generate for ( i = 0 ; i < 8 ; i = i + 1) begin : LOOP_DATA_OUT assign data_out[i*8 +: 8] = data_out_temp[i] ; assign data_out_valid = data_out_valid_temp; end endgenerate endmodule
在代码中用了大量的生成语句,这样可以降低我们的代码量,这些生成语句帮我们生成了大量的重复性电路,我们并不需要有什么担心。
仿真代码:
`timescale 1ns / 1ps module tb_bubble( ); reg clk; reg rst; reg [7:0] data_in_0; reg [7:0] data_in_1; reg [7:0] data_in_2; reg [7:0] data_in_3; reg [7:0] data_in_4; reg [7:0] data_in_5; reg [7:0] data_in_6; reg [7:0] data_in_7; wire [63:0] data_in; reg data_in_valid; wire [63:0] data_out; wire data_out_valid; initial begin clk = 1'b0; rst = 1'b1; #50 rst = 1'b0; end always #5 clk = !clk; always @(posedge clk ) begin if(rst == 1'b1)begin data_in_0 <= 8'd0; data_in_1 <= 8'd0; data_in_2 <= 8'd0; data_in_3 <= 8'd0; data_in_4 <= 8'd0; data_in_5 <= 8'd0; data_in_6 <= 8'd0; data_in_7 <= 8'd0; data_in_valid <= 1'b0; end else begin data_in_0 <= {$random} % 255; data_in_1 <= {$random} % 255; data_in_2 <= {$random} % 255; data_in_3 <= {$random} % 255; data_in_4 <= {$random} % 255; data_in_5 <= {$random} % 255; data_in_6 <= {$random} % 255; data_in_7 <= {$random} % 255; data_in_valid <= 1'b1; end end assign data_in = {data_in_0, data_in_1, data_in_2, data_in_3, data_in_4, data_in_5, data_in_6, data_in_7}; bubble u_bubble( .clk (clk ), .rst (rst ), .data_in (data_in ), .data_in_valid (data_in_valid ), .data_out (data_out ), .data_out_valid (data_out_valid) ); endmodule
仿真结果:
可以看到每个时钟周期输出8个排好序的数字。
消耗的资源如上。如果我们不需要流水输出的话,使用的资源可以进一步进行压缩,这个就看具体的需求了,资源和性能的平衡。
审核编辑:刘清
全部0条评论
快来发表一下你的评论吧 !