I am designing a code for SAO filtering and my code is taking too long to synthesize. I am taking a 66x66 pixels (1 CTU) and producing a 64x64 output for a whole frame. I am considering 8 CTus in 1 frame.
module saocalc(input clk,input[7:0]sao_offset1,input[7:0]sao_offset2,
input[7:0]sao_offset3,input[7:0]sao_offset4,
outputreg[7:0]saoval);
reg[7:0]mem[0:34855];
reg[7:0]mem1[0:32767];
reg[7:0]bu[0:65][0:65];
reg[7:0]sao_out[0:63][0:63];
integer i;
reg[7:0]k=8'd0;
integer j;
reg[7:0]sao_type;
initial
begin
$readmemh("0-7ctus.txt",mem);
end
always@(posedge clk)
begin
if(k<8)
begin
for(j=0;j<66;j=j+1)
begin
for(i=0;i<66;i=i+1)
bu[j][i]<=mem[i+(j*66)+4357*k+1];
end
sao_type<=mem[4357*k];
end
else
k<=0;
k<=k+1;
end
always@(posedge clk)
begin
if(sao_type==8'd0)
begin
for(j=0;j<64;j=j+1)
begin
for(i=1;i<64;i=i+1)
begin
if((bu[j][i]>bu[j][i-1])&&(bu[j][i]>bu[j][i+1]))
saoval=bu[j][i]-sao_offset4;
else if((bu[j][i]<bu[j][i-1])&&(bu[j][i]<bu[j][i+1]))
saoval=bu[j][i]+sao_offset1;
else if(((bu[j][i]<bu[j][i-1])&&(bu[j][i]==bu[j][i+1]))||
((bu[j][i]==bu[j][i-1])&&(bu[j][i]<bu[j][i+1])))
saoval=bu[j][i]+sao_offset2;
else if(((bu[j][i]==bu[j][i-1])&&(bu[j][i]>bu[j][i+1]))||
((bu[j][i]>bu[j][i-1])&&(bu[j][i]==bu[j][i+1])))
saoval=bu[j][i]-sao_offset3;
else
saoval=bu[j][i];
sao_out[j][i]=saoval;
end
sao_out[j][0]=bu[j][0];
end
end
else if(sao_type==8'd1)
begin
for(i=0;i<64;i=i+1)
begin
for(j=1;j<64;j=j+1)
begin
if((bu[j][i]>bu[j-1][i])&&(bu[j][i]>bu[j+1][i]))
saoval=bu[j][i]-sao_offset4;
else if((bu[j][i]<bu[j-1][i])&&(bu[j][i]<bu[j+1][i]))
saoval=bu[j][i]+sao_offset1;
else if(((bu[j][i]<bu[j-1][i])&&(bu[j][i]==bu[j+1][i]))||
((bu[j][i]==bu[j-1][i])&&(bu[j][i]<bu[j+1][i])))
saoval=bu[j][i]+sao_offset2;
else if(((bu[j][i]==bu[j-1][i])&&(bu[j][i]>bu[j+1][i]))||
((bu[j][i]>bu[j-1][i])&&(bu[j][i]==bu[j+1][i])))
saoval=bu[j][i]-sao_offset3;
else
saoval=bu[j][i];
sao_out[j][i]=saoval;
end
sao_out[0][i]=bu[0][i];
end
end
else if(sao_type==8'd2)
begin
for(j=1;j<64;j=j+1)
begin
for(i=1;i<64;i=i+1)
begin
if((bu[j][i]>bu[j-1][i-1])&&(bu[j][i]>bu[j+1][i+1]))
saoval=bu[j][i]-sao_offset4;
else if((bu[j][i]<bu[j-1][i-1])&&(bu[j][i]<bu[j+1][i+1]))
saoval=bu[j][i]+sao_offset1;
else if(((bu[j][i]<bu[j-1][i-1])&&(bu[j][i]==bu[j+1][i+1]))||
((bu[j][i]==bu[j-1][i-1])&&(bu[j][i]<bu[j+1][i+1])))
saoval=bu[j][i]+sao_offset2;
else if(((bu[j][i]==bu[j-1][i-1])&&(bu[j][i]>bu[j+1][i+1]))||
((bu[j][i]>bu[j-1][i-1])&&(bu[j][i]==bu[j+1][i+1])))
saoval=bu[j][i]-sao_offset3;
else
saoval=bu[j][i];
sao_out[j][i]=saoval;
sao_out[0][i]=bu[0][i];
end
sao_out[j][0]=bu[j][0];
sao_out[0][0]=bu[0][0];
end
end
else if(sao_type==8'd3)
begin
for(j=1;j<64;j=j+1)
begin
for(i=1;i<64;i=i+1)
begin
if((bu[j][i]>bu[j-1][i+1])&&(bu[j][i]>bu[j+1][i-1]))
saoval=bu[j][i]-sao_offset4;
else if((bu[j][i]<bu[j-1][i+1])&&(bu[j][i]<bu[j+1][i-1]))
saoval=bu[j][i]+sao_offset1;
else if(((bu[j][i]<bu[j-1][i+1])&&(bu[j][i]==bu[j+1][i-1]))||
((bu[j][i]==bu[j-1][i+1])&&(bu[j][i]<bu[j+1][i-1])))
saoval=bu[j][i]+sao_offset2;
else if(((bu[j][i]==bu[j-1][i+1])&&(bu[j][i]>bu[j+1][i-1]))||
((bu[j][i]>bu[j-1][i+1])&&(bu[j][i]==bu[j+1][i-1])))
saoval=bu[j][i]-sao_offset3;
else
saoval=bu[j][i];
sao_out[j][i]=saoval;
sao_out[0][i]=bu[0][i];
end
sao_out[j][0]=bu[j][0];
sao_out[0][0]=bu[0][0];
end
end
else if(sao_type==8'd4)
begin
for(i=0;i<64;i=i+1)
begin
for(j=0;j<64;j=j+1)
begin
if((bu[i][j]>7)&&(bu[i][j]<16))
saoval=bu[i][j]+sao_offset1;
else if((bu[i][j]>15)&&(bu[i][j]<24))
saoval=bu[i][j]+sao_offset2;
else if((bu[i][j]>23)&&(bu[i][j]<32))
saoval=bu[i][j]-sao_offset3;
else if((bu[i][j]>31)&&(bu[i][j]<40))
saoval=bu[i][j]-sao_offset4;
else
saoval=bu[i][j];
sao_out[i][j]=saoval;
end
end
end
else
saoval=3;
end
always@(posedge clk)
begin
for(j=0;j<64;j=j+1)
begin
for(i=0;i<64;i=i+1)
mem1[j+(i*64)+(4096*k)]=sao_out[j][i];
end
end
endmodule