Verilog FIR filter using FPGA

Question

I am implementing an FIR filter in Verilog, using the DE2 board. For some reason the output out of the speakers is full of static, although it does appear to filter out some frequencies. Here is the code for the FIR:

// Local wires.
wire read_ready, write_ready, read, write;
wire [23:0] readdata_left, readdata_right;
wire [23:0] writedata_left, writedata_right;

assign writedata_left = output_sample;

assign writedata_right = output_sample;
assign read = 1;
assign write = 1;

wire [23:0] input_sample = readdata_left;

reg [23:0] output_sample;

The input sample is put through the FIR, and the output sample is put to both left and right speakers for simplicity.

//The FIR filter
parameter N = 40;
reg signed[23:0] coeffs[39:0];
reg [23:0] holderBefore[39:0];

wire [23:0] toAdd[39:0];

// -- 1000-1100
always @(*)
begin
    coeffs[0]=24'b100000000110101001111110; // -- 1
    coeffs[1]=24'b100000000110100011011011; // -- 2
    coeffs[2]=24'b100000000111000100001100; // -- 3
    coeffs[3]=24'b100000000111111000101000;// -- 4
    coeffs[4]=24'b100000001000011111111100;// -- 5
    coeffs[5]=24'b100000001000011001011001;// -- 6
    coeffs[6]=24'b100000000111010001010011;// -- 7
    coeffs[7]=24'b100000000100100110111010;// -- 8
    coeffs[8]=24'b100000000000011010001101;// -- 9
    coeffs[9]=24'b000000000101101111000000;// -- 10
    coeffs[10]=24'b000000001101100001000100;// -- 11
    coeffs[11]=24'b000000010110111100000000;// -- 12
    coeffs[12]=24'b000000100001011111000001;// -- 13
    coeffs[13]=24'b000000101100101001010111;// -- 14
    coeffs[14]=24'b000000111000000000110100;// -- 15
    coeffs[15]=24'b000001000010101010011001;// -- 16
    coeffs[16]=24'b000001001100001011111000;// -- 17
    coeffs[17]=24'b000001010011111101111100;// -- 18
    coeffs[18]=24'b000001011001011001010010;// -- 19
    coeffs[19]=24'b000001011100010000110010;// -- 20
    coeffs[20]=24'b000001011100010000110010;// -- 20
    coeffs[21]=24'b000001011001011001010010;// -- 19
    coeffs[22]=24'b000001001100001011111000;// -- 18
    coeffs[23]=24'b000001001100001011111000;// -- 17
    coeffs[24]=24'b000001000010101010011001;// -- 16
    coeffs[25]=24'b000000111000000000110100;// -- 15
    coeffs[26]=24'b000000101100101001010111;// -- 14
    coeffs[27]=24'b000000100001011111000001;// -- 13
    coeffs[28]=24'b000000010110111100000000;// -- 12
    coeffs[29]=24'b000000001101100001000100;// -- 11
    coeffs[30]=24'b000000000101101111000000;// -- 10
    coeffs[31]=24'b100000000000011010001101;// -- 9
    coeffs[32]=24'b100000000100100110111010;// -- 8
    coeffs[33]=24'b100000000111010001010011;// -- 7
    coeffs[34]=24'b100000001000011001011001;// -- 6
    coeffs[35]=24'b100000001000011111111100;// -- 5
    coeffs[36]=24'b100000000111111000101000;// -- 4
    coeffs[37]=24'b100000000111000100001100;// -- 3
    coeffs[38]=24'b100000000110100011011011;// -- 2
    coeffs[39]=24'b100000000110101001111110;// -- 1
end

genvar i;

generate
for (i=0; i<N; i=i+1)
    begin: mult
        multiplier mult1(
          .dataa(coeffs[i]),
          .datab(holderBefore[i]),
          .out(toAdd[i]));
    end
endgenerate

always @(posedge CLOCK_50 or posedge reset)
begin
    if(reset)
        begin
            holderBefore[39]     <= 0;
            holderBefore[38]     <= 0;
            holderBefore[37]     <= 0;
            holderBefore[36]     <= 0;
            holderBefore[35]     <= 0;
            holderBefore[34]     <= 0;
            holderBefore[33]     <= 0;
            holderBefore[32]     <= 0;
            holderBefore[31]     <= 0;
            holderBefore[30]     <= 0;
            holderBefore[29]     <= 0;
            holderBefore[28]     <= 0;
            holderBefore[27]     <= 0;
            holderBefore[26]     <= 0;
            holderBefore[25]     <= 0;
            holderBefore[24]     <= 0;
            holderBefore[23]     <= 0;
            holderBefore[22]     <= 0;
            holderBefore[21]     <= 0;
            holderBefore[20]     <= 0;
            holderBefore[19]     <= 0;
            holderBefore[18]     <= 0;
            holderBefore[17]     <= 0;
            holderBefore[16]     <= 0;
            holderBefore[15]     <= 0;
            holderBefore[14]     <= 0;
            holderBefore[13]     <= 0;
            holderBefore[12]     <= 0;
            holderBefore[11]     <= 0;
            holderBefore[10]     <= 0;
            holderBefore[9]      <= 0;
            holderBefore[8]      <= 0;
            holderBefore[7]      <= 0;
            holderBefore[6]      <= 0;
            holderBefore[5]      <= 0;
            holderBefore[4]      <= 0;
            holderBefore[3]      <= 0;
            holderBefore[2]      <= 0;
            holderBefore[1]      <= 0;
            holderBefore[0]      <= 0;
            output_sample        <= 0;
        end
    else
        begin
            holderBefore[39]     <= holderBefore[38];
            holderBefore[38]     <= holderBefore[37];
            holderBefore[37]     <= holderBefore[36];
            holderBefore[36]     <= holderBefore[35];
            holderBefore[35]     <= holderBefore[34];
            holderBefore[34]     <= holderBefore[33];
            holderBefore[33]     <= holderBefore[32];
            holderBefore[32]     <= holderBefore[31];
            holderBefore[31]     <= holderBefore[30];
            holderBefore[30]     <= holderBefore[29];
            holderBefore[29]     <= holderBefore[28];
            holderBefore[28]     <= holderBefore[27];
            holderBefore[27]     <= holderBefore[26];
            holderBefore[26]     <= holderBefore[25];
            holderBefore[25]     <= holderBefore[24];
            holderBefore[24]     <= holderBefore[23];
            holderBefore[23]     <= holderBefore[22];
            holderBefore[22]     <= holderBefore[21];
            holderBefore[21]     <= holderBefore[20];
            holderBefore[20]     <= holderBefore[19];
            holderBefore[19]     <= holderBefore[18];
            holderBefore[18]     <= holderBefore[17];
            holderBefore[17]     <= holderBefore[16];
            holderBefore[16]     <= holderBefore[15];
            holderBefore[15]     <= holderBefore[14];
            holderBefore[14]     <= holderBefore[13];
            holderBefore[13]     <= holderBefore[12];
            holderBefore[12]     <= holderBefore[11];
            holderBefore[11]     <= holderBefore[10];
            holderBefore[10]     <= holderBefore[9];
            holderBefore[9]      <= holderBefore[8];
            holderBefore[8]      <= holderBefore[7];
            holderBefore[7]      <= holderBefore[6];
            holderBefore[6]      <= holderBefore[5];
            holderBefore[5]      <= holderBefore[4];
            holderBefore[4]      <= holderBefore[3];
            holderBefore[3]      <= holderBefore[2];
            holderBefore[2]      <= holderBefore[1];
            holderBefore[1]      <= holderBefore[0];
            holderBefore[0]      <= input_sample;
            output_sample <= (input_sample + toAdd[0] + toAdd[1] + 
                              toAdd[2] + toAdd[3] + toAdd[4] + toAdd[5] +
                              toAdd[6] + toAdd[7] + toAdd[8] + toAdd[9] + 
                              toAdd[10] + toAdd[11] + toAdd[12]+ toAdd[13] + toAdd[14] + 
                              toAdd[15] + toAdd[16] + toAdd[17] + toAdd[18] +
                              toAdd[19] + toAdd[20] + toAdd[21] + toAdd[22] + 
                              toAdd[23] + toAdd[24] + toAdd[25] +toAdd[26] + toAdd[27] + toAdd[28] + toAdd[29] +
                              toAdd[19] + toAdd[20] + toAdd[21] + toAdd[22] + 
                              toAdd[30] + toAdd[31] + toAdd[32]+ toAdd[33] + toAdd[34] + toAdd[35] + toAdd[36] +
                              toAdd[37] + toAdd[38] + toAdd[39]);
        end
end

//The multiplier
module multiplier (dataa,datab,out);
input [23:0]dataa;
input [23:0]datab;
reg [47:0]result;
output[23:0]out;
always@(*)begin
    result = dataa*datab;
end
assign out = result[46:24]; 
endmodule

Granted that the coefficients are correct, is there something wrong with the code? I assume there is a problem with the representation of the coefficients in binary, or the multiplier is wrong but I can't figure it out.

What happens if you pass the input data straight through to the output without filtering it? Also, what sort of number representation are you using for the audio data, and how are you accounting for the scaling of the coefficients? — Dave Tweed, Nov 26 '14 at 22:41
without the filtering, the sound is exactly what is spoken into the mic. we are using signed representation; one of the problems was that the multiplier did not have have "signed" next to the reg result. so we fixed that but it still doesn't seem to work, maybe there is an overflow when we perform addition? — algoBaller, Nov 26 '14 at 23:45
You could try to set all, but one coefficient to zero and on to max (assume this is 24'b100000000000000000000000). Your filter would behave like a delay line then. — Roland Mieslinger, Jan 31 '15 at 20:27
I'm wondering about "assign out = result[46:24];" at the end of your multiplier shouldn't this be "assign out = result[47:24];" — Roland Mieslinger, Jan 31 '15 at 20:39
If you want to check for overflow, simulate with a sample sequence such that each pos coef is multiplied with +max and each neg coef with -max and vice versa (pos * -max and neg * max). This should give you the upper and lower bound of your output signal. — Roland Mieslinger, Jan 31 '15 at 21:31

Roland Mieslinger · Answer 1 · 2015-01-31T20:54:32.830

1

Your code you be a bit easier to read with a single tap as module like (verilog pseudo code, ignoring eg. bit shifts after mul, etc)

module tap(reset, clk, samplein, sampleout, coef, sumin, sumout) 
  always@(posedge clk) begin
    if(reset) begin
      sumout <= 0;
      sampleout <= 0;
    end else begin
      sampleout <= samplein;
      sumout <= sumin + coef * samplein;
    end
endmodule

use it like:

tap tap0(reset, clk, input,    buf[0],  coef[0],  0, sum[0]);
tap tap1(reset, clk, buf[0],   buf[1],  coef[0],  0, sum[1]);
...
tap tap39(reset, clk, buf[38], buf[39], coef[39], 0, output);

I think that this is a lot easier to use in a testbench

edited Jan 31 '15 at 20:54

answered Jan 31 '15 at 20:45

Roland Mieslinger

618
3
7

for audio stuff, this is still a very wasteful implementation because of your low clk. frequency. Multipliers are expensive, and here 40 multipliers are use at a very low frequency. A more efficient solution would use a clk that is at least 40 times higher than your audio sample frequency and have one multiplier and add module doing all the operations. – Roland Mieslinger Jan 31 '15 at 20:57

score 0 · Answer 2 · answered Dec 02 '14 at 15:55

Ideally you would make a recording of the input data with SignalTap or similar to see how the data arrives inside the FPGA, or failing that generate some test data in the expected format using a microphone and matlab. Then you can write Verilog to read your samples and actually debug the design using a simulator and waveform viewer tool. Icarus Verilog and GTKWave are good enough to do this work with at no cost.

Altera have a FIR core you might want to try.

Verilog FIR filter using FPGA

2 Answers2