For an academic excercise, I have implemented a 32-bit Karatsuba multiplier which takes 17 cycles to run by doing parallel multiplication of 16 bits each and shifting them accordingly.
I am getting an issue where the partial products need to be unsigned, but booth multiplier is generating signed partial product for me, regardless of the input type I give, because of which I get incorrect partial products. How can I solve this?
For eg. my two signed inputs are 0xA000_000A and 0x000A_A000. So the first partial product of A000 * 000A should be 64000 but I get 0xFFFC4000 (FFFF_A000 * 0000_000A). I have shared my code here for the booth mult and its testbench.
module booth_multiplier
(
input logic clk,
input logic rst,
input logic valid,
input logic signed [15:0] Mul_X,
input logic signed [15:0] Mul_Y,
output logic signed [31:0] product,
output logic result_ready
);
logic unsigned Q_1;
bit [4:0] count;
logic signed [15:0] multiplier;
logic signed [15:0] multiplicand;
logic [15:0] A, temp_A;
logic signed [32:0] partial_product;
logic signed [32:0] partial_multiplier;
typedef enum {IDLE=0, OPERATE} fsm;
fsm state, next_state;
parameter ADD = 2'b01, SUB = 2'b10;
//assign product = multiplier[16:1];
always@(posedge clk or negedge rst)
begin
if(~rst)
begin
count <= 0;
state <= IDLE;
multiplier <= 0;
multiplicand <= 0;
end
else begin
count <= count+1;
state <= next_state;
end
end
always@(*)
begin
case(state)
IDLE : begin
Q_1 = 0;
A = 0;
count = 0;
product = 0;
temp_A = 0;
result_ready = 0;
if(valid) begin
multiplicand = Mul_X;
multiplier = Mul_Y;
partial_product = {A, multiplier, Q_1};
partial_multiplier = 0;
next_state = OPERATE;
end
end
OPERATE: begin
case(partial_product[1:0])
ADD: begin
temp_A = A + multiplicand;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = partial_product[0];
A = partial_product[32:17];
end
SUB: begin
temp_A = A - multiplicand;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = partial_product[0];
A = partial_product[32:17];
end
default: begin
temp_A = A;
multiplier = partial_product[16:1];
partial_multiplier = {temp_A, multiplier, Q_1};
partial_product = partial_multiplier >>> 1;
Q_1 = multiplier[0];
A = partial_product[32:17];
end
endcase
if(count == 16) begin
next_state = IDLE;
product = partial_product >> 1;
result_ready = 1;
end
else next_state = OPERATE;
end
endcase
end
endmodule
This I am using to do 4 parallel multiplications in
module fast_multiplier
(
input logic clk,
input logic rst,
input valid,
input logic signed [31:0] multiplicand,
input logic signed [31:0] multiplier,
output logic signed [63:0] product,
output logic ready);
logic [15:0] X1;
logic [15:0] Y1;
logic [15:0] Xr;
logic [15:0] Yr;
logic [31:0] X1_Yr;
logic [31:0] Xr_Yr;
logic [31:0] X1_Y1;
logic [31:0] Xr_Y1;
logic ready1, ready2, ready3, ready4;
assign X1 = multiplicand[31:16];
assign Y1 = multiplier[31:16];
assign Xr = multiplicand[15:0];
assign Yr = multiplier[15:0];
booth_multiplier X1Y1
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(X1),
.Mul_Y(Y1),
.product(X1_Y1),
.result_ready(ready1));
booth_multiplier X1Yr
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(X1),
.Mul_Y(Yr),
.product(X1_Yr),
.result_ready(ready2));
booth_multiplier XrY1
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(Xr),
.Mul_Y(Y1),
.product(Xr_Y1),
.result_ready(ready3));
booth_multiplier XrYr
(
.clk(clk),
.rst(rst),
.valid(valid),
.Mul_X(Xr),
.Mul_Y(Yr),
.product(Xr_Yr),
.result_ready(ready4));
always@(posedge clk or negedge rst)
begin
if(~rst)
begin
product <= 0;
ready <= 0;
X1_Yr <= 0;
X1_Y1 <= 0;
Xr_Yr <= 0;
Xr_Y1 <= 0;
end
else begin
product <= ({32'b0,X1_Y1} << 32) + (({32'b0,X1_Yr} + {32'b0,Xr_Y1}) << 16) + {32'b0,Xr_Yr};
ready <= ready1 & ready2 & ready3 & ready4;
end
end
endmodule
Also, sharing the testbench,
module top_booth_multiplier ();
logic clk;
logic rst;
logic valid;
logic signed [31:0] multiplicand;
logic signed [31:0] multiplier;
logic signed [63:0] product;
logic ready;
fast_multiplier booth (.*);
initial
begin
clk = 0;
forever #10 clk = ~clk;
end
initial
begin
rst = 0;
#7 rst = 1;
@(posedge clk) valid <= 1;
multiplier = 32'hA000000A;
multiplicand = 32'h000AA000;
@(posedge clk) valid <= 0;
while(ready == 0)
begin
@(posedge clk);
end
repeat (20) @(posedge clk);
$finish;
end
endmodule