0
votes

For an academic excercise, I have implemented a 32-bit Karatsuba multiplier which takes 17 cycles to run by doing parallel multiplication of 16 bits each and shifting them accordingly.

I am getting an issue where the partial products need to be unsigned, but booth multiplier is generating signed partial product for me, regardless of the input type I give, because of which I get incorrect partial products. How can I solve this?

For eg. my two signed inputs are 0xA000_000A and 0x000A_A000. So the first partial product of A000 * 000A should be 64000 but I get 0xFFFC4000 (FFFF_A000 * 0000_000A). I have shared my code here for the booth mult and its testbench.

module booth_multiplier 
(
    input logic clk,
    input logic rst,
    input logic valid,  
    input logic signed [15:0] Mul_X,
    input logic signed [15:0] Mul_Y,
    output logic signed [31:0] product,
    output logic result_ready
);

    logic unsigned Q_1;
    bit [4:0] count;
    logic signed [15:0] multiplier;

    logic signed [15:0] multiplicand;

    logic [15:0] A, temp_A;
    logic signed [32:0] partial_product;
    logic signed [32:0] partial_multiplier;


    typedef enum {IDLE=0, OPERATE} fsm;
    fsm state, next_state;

        parameter ADD = 2'b01, SUB = 2'b10;

    //assign product = multiplier[16:1];


    always@(posedge clk or negedge rst)
    begin
        if(~rst)
        begin
           count <= 0;
           state <= IDLE;
           multiplier <= 0;
               multiplicand <= 0;
        end
        else begin
           count <= count+1;
           state <= next_state;
        end
    end

    always@(*)
    begin
        case(state)

           IDLE : begin
                Q_1 = 0;
                A = 0;
                count = 0;
                product = 0;
                temp_A = 0;
                result_ready = 0;
                if(valid) begin
                       multiplicand   = Mul_X;
                   multiplier   =   Mul_Y;
                   partial_product = {A, multiplier, Q_1};
                   partial_multiplier = 0;
                   next_state = OPERATE;
                end

                  end

           OPERATE:   begin
                case(partial_product[1:0])

                     ADD:   begin
                        temp_A = A + multiplicand;
                        multiplier = partial_product[16:1];
                        partial_multiplier = {temp_A, multiplier, Q_1};
                        partial_product = partial_multiplier >>> 1; 
                        Q_1 = partial_product[0];
                        A = partial_product[32:17];
                        end
                     SUB:   begin
                        temp_A = A - multiplicand;
                        multiplier = partial_product[16:1];
                        partial_multiplier = {temp_A, multiplier, Q_1};
                        partial_product = partial_multiplier >>> 1;
                        Q_1 = partial_product[0];
                        A = partial_product[32:17];
                        end
                   default: begin
                        temp_A = A;
                        multiplier = partial_product[16:1];
                            partial_multiplier = {temp_A, multiplier, Q_1};
                        partial_product = partial_multiplier >>> 1;
                        Q_1 = multiplier[0];
                        A = partial_product[32:17];
                        end
                endcase

                if(count == 16) begin
                   next_state = IDLE;
                   product = partial_product >> 1;
                   result_ready = 1;
                end 
                else  next_state = OPERATE;  
              end


        endcase

    end

endmodule

This I am using to do 4 parallel multiplications in

module fast_multiplier
(
    input logic clk,
    input logic rst,
    input valid,
    input logic signed [31:0] multiplicand,
    input logic signed [31:0] multiplier,
    output logic signed [63:0] product,
    output logic ready);

    logic [15:0] X1;
    logic [15:0] Y1;
    logic [15:0] Xr;
    logic [15:0] Yr;
    logic [31:0] X1_Yr;
    logic [31:0] Xr_Yr;
    logic [31:0] X1_Y1;
    logic [31:0] Xr_Y1;
    logic ready1, ready2, ready3, ready4;


    assign X1 = multiplicand[31:16];
    assign Y1 = multiplier[31:16];
    assign Xr = multiplicand[15:0];
    assign Yr = multiplier[15:0];


    booth_multiplier X1Y1
    (
    .clk(clk),
    .rst(rst),
    .valid(valid),  
    .Mul_X(X1),
    .Mul_Y(Y1),
    .product(X1_Y1),
    .result_ready(ready1));

    booth_multiplier X1Yr
    (
    .clk(clk),
    .rst(rst),
    .valid(valid),  
    .Mul_X(X1),
    .Mul_Y(Yr),
    .product(X1_Yr),
    .result_ready(ready2));

    booth_multiplier XrY1
    (
    .clk(clk),
    .rst(rst),
    .valid(valid),  
    .Mul_X(Xr),
    .Mul_Y(Y1),
    .product(Xr_Y1),
    .result_ready(ready3));

    booth_multiplier XrYr
    (
    .clk(clk),
    .rst(rst),
    .valid(valid),  
    .Mul_X(Xr),
    .Mul_Y(Yr),
    .product(Xr_Yr),
    .result_ready(ready4));

    always@(posedge clk or negedge rst)
    begin
        if(~rst)
        begin
            product <= 0;
            ready <= 0;
            X1_Yr <= 0;
            X1_Y1 <= 0;
            Xr_Yr <= 0;
            Xr_Y1 <= 0;
        end
        else begin
            product <= ({32'b0,X1_Y1} << 32) + (({32'b0,X1_Yr} + {32'b0,Xr_Y1}) << 16) + {32'b0,Xr_Yr};
            ready <= ready1 & ready2 & ready3 & ready4;
        end
    end

endmodule

Also, sharing the testbench,

module top_booth_multiplier ();


    logic clk;
    logic rst;
    logic valid;    
    logic signed [31:0] multiplicand;
    logic signed [31:0] multiplier;
        logic signed [63:0] product;
    logic ready;

    fast_multiplier booth (.*);

    initial
    begin
        clk = 0;
        forever #10 clk = ~clk;
    end




    initial
    begin
        rst = 0;
        #7 rst = 1;

        @(posedge clk) valid <= 1;
        multiplier = 32'hA000000A;
        multiplicand = 32'h000AA000;
        @(posedge clk) valid <= 0;
        while(ready == 0)
        begin
            @(posedge clk);
        end


        repeat (20) @(posedge clk);
        $finish;
    end

endmodule
1

1 Answers

1
votes

You need to consider "signed" inputs in booth multiplier ONLY for X1Y1 instance. All other instances MUST use "unsigned" inputs. This change should help!