""" Generate VHDL or Verilog code for a signed multiplier. Usage: genmul.py --lang=vhdl|verilog [--nolib] Xbits Ybits npipe Usage: genmul.py --lang=vhdl|verilog --lib --lang=... Specify VHDL or Verilog code --nolib Do not generate library components --lib Generate only library components Xbits Length of input word in bits (Xbits >= 4) Ybits Length of input word in bits (Ybits >= Xbits) npipe Number of register stages (0 or 1 or 2) See also: G. Knagge, "ASIC Design for Signal Processing", http://www.geoffknagge.com/fyp/booth.shtml, 2010. X. Xiong, M. Lin, "Low Power 8-bit Baugh-Wooley Multiplier Based on Wallace Tree Architecture", Lecture Notes in Electrical Engineering, 2012. L. Dadda, "Some schemes for parallel multipliers", Associazione Elettrotecnica et Elettronica Italiana, 1965. R. P. Brent, H. T. Kung, "A Regular Layout for Parallel Adders", IEEE Transactions on Computers, 1982. """ import sys import argparse class Expr: """Represent a node in the expression tree.""" wire = None done = False class ConstBit(Expr): """Represent constant '0' or '1' bit.""" def __init__(self, v): assert v in (0, 1) self.v = v class InBit(Expr): """Represent an input bit in the expression tree.""" def __init__(self, xy, p): assert xy in ('x', 'y') self.xy = xy self.p = p class Reg(Expr): """Represent flip-flop.""" def __init__(self, v): self.v = v class NotBit(Expr): """Represent inverter.""" def __init__(self, v): self.v = v class BoothNeg(Expr): """Represent calculation of radix-4 Booth sign-inversion flag.""" def __init__(self, pat): assert len(pat) == 3 self.pat = pat class BoothProd(Expr): """Represent calculation of partial product bit with radix-4 Booth.""" def __init__(self, pat, b): assert len(pat) == 3 assert len(b) == 2 self.pat = pat self.b = b class AddBitD(Expr): """Represent selection of data bit from adder.""" def __init__(self, v): self.v = v class AddBitC(Expr): """Represent selection of carry bit from adder.""" def __init__(self, v): self.v = v class HalfAdd(Expr): """Represent half adder.""" def __init__(self, a, b): self.a = a self.b = b class FullAdd(Expr): """Represent full adder.""" def __init__(self, a, b, c): self.a = a self.b = b self.c = c class CarryProp(Expr): """Represent base node of carry propagation tree.""" def __init__(self, a, b): self.a = a self.b = b class CarryMerge(Expr): """Represent internal node of carry propagation tree.""" def __init__(self, p0, p1): self.p0 = p0 self.p1 = p1 class CarryEval(Expr): """Represent logic to calculate carry-out.""" def __init__(self, p, c): self.p = p self.c = c def gen_partial_products(xvec, yvec): """Generate list of partial products using radix-4 Booth algorithm. Return [ (exponent, bit), ... ]. """ partial_products = [ ] # Append zero on LSB side of xvec, sign-extend on MSB side of xvec. xtmp = [ ConstBit(0) ] + xvec + xvec[-1:] # Append zero on LSB side of yvec, sign-extend on MSB side of yvec. ytmp = [ ConstBit(0) ] + yvec + yvec[-1:] # Step through xvec, 2 bits at a time. for i in xrange(0, len(xvec), 2): # Select group of 3 bits from xvec (one bit overlap with last group). pat = xtmp[i:i+3] # Add either 0, +1, +2, -1 or -2 times yvec according to Booth method. # Step through the bits of yvec. for j in xrange(len(yvec)+1): # Use Booth encoder to choose between 0, yvec[j], yvec[j-1] or # inverted bits yvec[j] or yvec[j-1]. t = BoothProd(pat, ytmp[j:j+2]) # Invert the MSB bit, except on first row. if i > 0 and j == len(yvec): t = NotBit(t) # Add result as partial product. partial_products.append( (i+j, t) ) # For first row, sign-extend by two bits. # Apply sign inversion on the new MSB bit. if i == 0: partial_products.append( (i+len(yvec)+1, t) ) partial_products.append( (i+len(yvec)+2, NotBit(t)) ) # For each row except the first row, add constant 1 in the next column. if i > 0: partial_products.append( (i+len(yvec)+1, ConstBit(1)) ) # Use Booth encoder to add 1 in case of negative factor (-1 or -2). t = BoothNeg(pat) partial_products.append( (i, t) ) return partial_products def gen_dadda_tree(partial_products, nbits): """Generate carry save adder based on Dadda tree.""" # Sort partial products by bit position. tvec = [ [ ] for p in xrange(nbits) ] for (p, b) in partial_products: if p < nbits: tvec[p].append(b) # Build Dadda tree. while any([ len(t) > 3 for t in tvec ]): # New layer. nvec = [ [ ] for p in xrange(nbits+1) ] for p in xrange(nbits): t = tvec[p] i = 0 while i + 2 < len(t): # build full adder a = FullAdd(t[i], t[i+1], t[i+2]) nvec[p].append(AddBitD(a)) nvec[p+1].append(AddBitC(a)) i += 3 if i + 1 < len(t) and len(nvec[p]) % 3 == 2: # build half adder a = HalfAdd(t[i], t[i+1]) nvec[p].append(AddBitD(a)) nvec[p+1].append(AddBitC(a)) i += 2 if i < len(t): # pass through nvec[p] += t[i:] tvec = nvec[:nbits] # Last layer. nvec = [ [ ] for p in xrange(nbits+1) ] for p in xrange(nbits): t = tvec[p] if len(t) == 3: # full adder a = FullAdd(t[0], t[1], t[2]) nvec[p].append(AddBitD(a)) nvec[p+1].append(AddBitC(a)) elif len(t) == 2 and len(nvec[p]) > 0: # half adder a = HalfAdd(t[0], t[1]) nvec[p].append(AddBitD(a)) nvec[p+1].append(AddBitC(a)) else: # pass through nvec[p] += t tvec = nvec[:nbits] # Extract remaining two rows of bits. avec = [ (t[0] if len(t) > 0 else ConstBit(0)) for t in tvec ] bvec = [ (t[1] if len(t) > 1 else ConstBit(0)) for t in tvec ] return (avec, bvec) def gen_adder(avec, bvec): """Generate carry-lookahead adder.""" def carry_lookahead(pvec, cin): """Recursively determine carry propagation.""" if len(pvec) == 1: prop = pvec[0] cvec = [ cin ] else: k = (len(pvec) + 1) // 2 (p0, c0) = carry_lookahead(pvec[:k], cin) ctmp = CarryEval(p0, cin) (p1, c1) = carry_lookahead(pvec[k:], ctmp) prop = CarryMerge(p0, p1) cvec = c0 + c1 return (prop, cvec) assert len(avec) == len(bvec) # Determine carry-generate and carry-propagate for each position. pvec = [ CarryProp(a, b) for (a, b) in zip(avec, bvec) ] # Determine carry-in for each position. (prop, cvec) = carry_lookahead(pvec, ConstBit(0)) # Array of full adders. sumvec = [ AddBitD(FullAdd(a, b, c)) for (a, b, c) in zip(avec, bvec, cvec) ] return sumvec def gen_multiplier(xbits, ybits, npipe): """Generate expression tree describing multiplier logic.""" xvec = [ InBit('x', p) for p in xrange(xbits) ] yvec = [ InBit('y', p) for p in xrange(ybits) ] partial_products = gen_partial_products(xvec, yvec) (avec, bvec) = gen_dadda_tree(partial_products, xbits+ybits) if npipe > 0: avec = [ Reg(a) for a in avec ] bvec = [ Reg(b) for b in bvec ] zvec = gen_adder(avec, bvec) if npipe > 1: zvec = [ Reg(z) for z in zvec ] return zvec def gen_netlist(node, wires, insts): """Generate netlist consisting of wires and component instances.""" if node.done: # already processed this node return node.done = True if isinstance(node, ConstBit): # resolve during code generation node.wire = node elif isinstance(node, InBit): # resolve during code generation node.wire = node elif isinstance(node, Reg): # create output wire node.wire = 'wreg%d' % len(wires) wires.append(node.wire) # recurse gen_netlist(node.v, wires, insts) # create instance insts.append(node) elif isinstance(node, NotBit): # create output wire node.wire = 'winv%d' % len(wires) wires.append(node.wire) # recurse gen_netlist(node.v, wires, insts) # create instance insts.append(node) elif isinstance(node, BoothNeg): # create output wire node.wire = 'wboothneg%d' % len(wires) wires.append(node.wire) # recurse for v in node.pat: gen_netlist(v, wires, insts) # create instance insts.append(node) elif isinstance(node, BoothProd): # create output wire node.wire = 'wboothprod%d' % len(wires) wires.append(node.wire) # recurse for v in node.pat: gen_netlist(v, wires, insts) for v in node.b: gen_netlist(v, wires, insts) # create instance insts.append(node) elif isinstance(node, AddBitD): # recurse gen_netlist(node.v, wires, insts) node.wire = node.v.wire + 'd' elif isinstance(node, AddBitC): # recurse gen_netlist(node.v, wires, insts) node.wire = node.v.wire + 'c' elif isinstance(node, HalfAdd): # create output wires node.wire = 'wadd%d' % len(wires) wires.append(node.wire + 'd') wires.append(node.wire + 'c') # recurse gen_netlist(node.a, wires, insts) gen_netlist(node.b, wires, insts) # create instance insts.append(node) elif isinstance(node, FullAdd): # create output wires node.wire = 'wadd%d' % len(wires) wires.append(node.wire + 'd') wires.append(node.wire + 'c') # recurse gen_netlist(node.a, wires, insts) gen_netlist(node.b, wires, insts) gen_netlist(node.c, wires, insts) # create instance insts.append(node) elif isinstance(node, CarryProp): # create output wires node.wire = 'wcarry%d' % len(wires) wires.append(node.wire + 'g') wires.append(node.wire + 'p') # recurse gen_netlist(node.a, wires, insts) gen_netlist(node.b, wires, insts) # create instance insts.append(node) elif isinstance(node, CarryMerge): # create output wires node.wire = 'wcarry%d' % len(wires) wires.append(node.wire + 'g') wires.append(node.wire + 'p') # recurse gen_netlist(node.p0, wires, insts) gen_netlist(node.p1, wires, insts) # create instance insts.append(node) elif isinstance(node, CarryEval): # create output wire node.wire = 'wcarry%d' % len(wires) wires.append(node.wire) # recurse gen_netlist(node.p, wires, insts) gen_netlist(node.c, wires, insts) # create instance insts.append(node) else: assert False def vhdl_inst(node): """Return (name, ports) for a given instance.""" if isinstance(node, Reg): name = 'smul_flipflop' ports = ( 'clk', 'clken', node.v.wire, node.wire ) elif isinstance(node, NotBit): name = 'smul_inverter' ports = ( node.v.wire, node.wire ) elif isinstance(node, BoothNeg): name = 'smul_booth_neg' ports = ( node.pat[0].wire, node.pat[1].wire, node.pat[2].wire, node.wire ) elif isinstance(node, BoothProd): name = 'smul_booth_prod' ports = ( node.pat[0].wire, node.pat[1].wire, node.pat[2].wire, node.b[0].wire, node.b[1].wire, node.wire ) elif isinstance(node, HalfAdd): name = 'smul_half_add' ports = ( node.a.wire, node.b.wire, node.wire + 'd', node.wire + 'c' ) elif isinstance(node, FullAdd): name = 'smul_full_add' ports = ( node.a.wire, node.b.wire, node.c.wire, node.wire + 'd', node.wire + 'c' ) elif isinstance(node, CarryProp): name = 'smul_carry_prop' ports = ( node.a.wire, node.b.wire, node.wire + 'g', node.wire + 'p' ) elif isinstance(node, CarryMerge): name = 'smul_carry_merge' ports = ( node.p0.wire + 'g', node.p0.wire + 'p', node.p1.wire + 'g', node.p1.wire + 'p', node.wire + 'g', node.wire + 'p' ) elif isinstance(node, CarryEval): name = 'smul_carry_eval' ports = ( node.p.wire + 'g', node.p.wire + 'p', node.c.wire, node.wire ) else: assert False return (name, ports) def vhdl_wire(wire): """Resolve wire to VHDL expression string.""" if isinstance(wire, ConstBit): return "'%d'" % wire.v elif isinstance(wire, InBit): return "%sin(%d)" % (wire.xy, wire.p) else: assert isinstance(wire, str) return wire def gen_vhdl_lib(): """Generate VHDL code for library components.""" print """ -- -- Flip-flop. -- library ieee; use ieee.std_logic_1164.all; entity smul_flipflop is port ( clk: in std_ulogic; clken: in std_ulogic; d: in std_ulogic; q: out std_ulogic ); end entity; architecture smul_flipflop_arch of smul_flipflop is begin process (clk) is begin if rising_edge(clk) then if to_x01(clken) = '1' then q <= d; end if; end if; end process; end architecture; -- -- Inverter. -- library ieee; use ieee.std_logic_1164.all; entity smul_inverter is port ( d: in std_ulogic; q: out std_ulogic ); end entity; architecture smul_inverter_arch of smul_inverter is begin q <= not d; end architecture; -- -- Half-adder. -- library ieee; use ieee.std_logic_1164.all; entity smul_half_add is port ( x: in std_ulogic; y: in std_ulogic; d: out std_ulogic; c: out std_ulogic ); end entity; architecture smul_half_add_arch of smul_half_add is begin d <= x xor y; c <= x and y; end architecture; -- -- Full-adder. -- library ieee; use ieee.std_logic_1164.all; entity smul_full_add is port ( x: in std_ulogic; y: in std_ulogic; z: in std_ulogic; d: out std_ulogic; c: out std_ulogic ); end entity; architecture smul_full_add_arch of smul_full_add is begin d <= x xor y xor z; c <= (x and y) or (y and z) or (x and z); end architecture; -- -- Booth negative flag. -- library ieee; use ieee.std_logic_1164.all; entity smul_booth_neg is port ( p0: in std_ulogic; p1: in std_ulogic; p2: in std_ulogic; f: out std_ulogic ); end entity; architecture smul_booth_neg_arch of smul_booth_neg is begin f <= p2 and ((not p1) or (not p0)); end architecture; -- -- Booth partial product generation. -- library ieee; use ieee.std_logic_1164.all; entity smul_booth_prod is port ( p0: in std_ulogic; p1: in std_ulogic; p2: in std_ulogic; b0: in std_ulogic; b1: in std_ulogic; y: out std_ulogic ); end entity; architecture smul_booth_prod_arch of smul_booth_prod is begin process (p0, p1, p2, b0, b1) is variable p: std_ulogic_vector(2 downto 0); begin p := (p2, p1, p0); case p is when "000" => y <= '0'; -- factor 0 when "001" => y <= b1; -- factor 1 when "010" => y <= b1; -- factor 1 when "011" => y <= b0; -- factor 2 when "100" => y <= not b0; -- factor -2 when "101" => y <= not b1; -- factor -1 when "110" => y <= not b1; -- factor -1 when others => y <= '0'; -- factor 0 end case; end process; end architecture; -- -- Determine carry generate and carry propagate. -- library ieee; use ieee.std_logic_1164.all; entity smul_carry_prop is port ( a: in std_ulogic; b: in std_ulogic; g: out std_ulogic; p: out std_ulogic ); end entity; architecture smul_carry_prop of smul_carry_prop is begin g <= a and b; p <= a xor b; end architecture; -- -- Merge two carry propagation trees. -- library ieee; use ieee.std_logic_1164.all; entity smul_carry_merge is port ( g0: in std_ulogic; p0: in std_ulogic; g1: in std_ulogic; p1: in std_ulogic; g: out std_ulogic; p: out std_ulogic ); end entity; architecture smul_carry_merge of smul_carry_merge is begin g <= g1 or (g0 and p1); p <= p0 and p1; end architecture; -- -- Calculate carry-out through a carry propagation tree. -- library ieee; use ieee.std_logic_1164.all; entity smul_carry_eval is port ( g: in std_ulogic; p: in std_ulogic; cin: in std_ulogic; cout: out std_ulogic ); end entity; architecture smul_carry_eval of smul_carry_eval is begin cout <= g or (p and cin); end architecture; """ def gen_vhdl_mul(xbits, ybits, npipe, wires, insts, outputs): """Generate VHDL code and write to stdout.""" # Declaration. print """ --- --- %(xbits)d x %(ybits)d bit signed multiplier --- --- %(npipe)d cycles pipeline delay --- library ieee; use ieee.std_logic_1164.all; entity smul_%(xbits)d_%(ybits)d is port ( clk: in std_ulogic; clken: in std_ulogic; xin: in std_logic_vector(%(xleft)d downto 0); yin: in std_logic_vector(%(yleft)d downto 0); zout: out std_logic_vector(%(zleft)d downto 0) ); end entity; architecture arch of smul_%(xbits)d_%(ybits)d is """ % { 'xbits': xbits, 'ybits': ybits, 'npipe': npipe, 'xleft': xbits-1, 'yleft': ybits-1, 'zleft': xbits+ybits-1 } # Declare signals. for w in wires: print "signal %s: std_ulogic;" % w # Start architecture body. print print "begin" print # Instantiate components. for (i, node) in enumerate(insts): (name, ports) = vhdl_inst(node) print "u%d: entity work.%s port map (" % (i, name), print ", ".join([ vhdl_wire(p) for p in ports ]), print ");" print # Drive output signals. for (i, wire) in enumerate(outputs): print "zout(%d) <= %s;" % (i, vhdl_wire(wire)) # End architecture. print print "end architecture;" def verilog_wire(wire): """Resolve wire to Verilog expression string.""" if isinstance(wire, ConstBit): return "1'b%d" % wire.v elif isinstance(wire, InBit): return "%sin[%d]" % (wire.xy, wire.p) else: assert isinstance(wire, str) return wire def gen_verilog_lib(): """Generate Verilog code for library components.""" print """ // Flip-flop. module smul_flipflop ( input wire clk, input wire clken, input wire d, output reg q ); always @(posedge clk) begin if (clken) q <= d; end endmodule // Inverter. module smul_inverter ( input wire d, output wire q ); assign q = ~d; endmodule // Half-adder. module smul_half_add ( input wire x, input wire y, output wire d, output wire c ); assign d = x ^ y; assign c = x & y; endmodule // Full-adder. module smul_full_add ( input wire x, input wire y, input wire z, output wire d, output wire c ); assign d = x ^ y ^ z; assign c = (x & y) | (y & z) | (x & z); endmodule // Booth negative flag. module smul_booth_neg ( input wire p0, input wire p1, input wire p2, output wire f ); assign f = p2 & ((~p1) | (~p0)); endmodule // Booth partial product generator. module smul_booth_prod ( input wire p0, input wire p1, input wire p2, input wire u0, input wire u1, output reg y ); always @ (*) begin case ({p2, p1, p0}) 3'b000 : y = 1'b0; 3'b001 : y = u1; 3'b010 : y = u1; 3'b011 : y = u0; 3'b100 : y = ~u0; 3'b101 : y = ~u1; 3'b110 : y = ~u1; default : y = 1'b0; endcase end endmodule // Deterimine carry generate and carry propagate. module smul_carry_prop ( input wire a, input wire b, output wire g, output wire p ); assign g = a & b; assign p = a ^ b; endmodule // Merge two carry propagation trees. module smul_carry_merge ( input wire g0, input wire p0, input wire g1, input wire p1, output wire g, output wire p ); assign g = g1 | (g0 & p1); assign p = p0 & p1; endmodule // Calculate carry-out through a carry propagation tree. module smul_carry_eval ( input wire g, input wire p, input wire cin, output wire cout ); assign cout = g | (p & cin); endmodule """ def gen_verilog_mul(xbits, ybits, npipe, wires, insts, outputs): """Generate Verilog code and write to stdout.""" # Preamble. print """ /* * %(xbits)d x %(ybits)d bit signed multiplier * * %(npipe)d cycles pipeline delay */ module smul_%(xbits)d_%(ybits)d ( input wire clk, input wire clken, input wire [%(xleft)d:0] xin, input wire [%(yleft)d:0] yin, output wire [%(zleft)d:0] zout ); """ % { 'xbits': xbits, 'ybits': ybits, 'npipe': npipe, 'xleft': xbits-1, 'yleft': ybits-1, 'zleft': xbits+ybits-1 } # Declare signals. for w in wires: print "wire %s;" % w # Instantiate components. for (i, node) in enumerate(insts): (name, ports) = vhdl_inst(node) print "%s u%d (" % (name, i), print ", ".join([ verilog_wire(p) for p in ports ]), print ");" print # Drive output signals. for (i, wire) in enumerate(outputs): print "assign zout[%d] = %s;" % (i, verilog_wire(wire)) # End module. print print "endmodule" def main(): parser = argparse.ArgumentParser() parser.format_help = lambda: __doc__ parser.format_usage = lambda: __doc__ parser.add_argument('--lang', action='store', type=str) parser.add_argument('--nolib', action='store_true', default=False) parser.add_argument('--lib', action='store_true', default=False) parser.add_argument('Xbits', action='store', type=int, nargs='?') parser.add_argument('Ybits', action='store', type=int, nargs='?') parser.add_argument('npipe', action='store', type=int, nargs='?') args = parser.parse_args() if args.lang is None or args.lang.upper() not in ('VHDL', 'VERILOG'): print >>sys.stderr, __doc__ print >>sys.stderr, "ERROR: Must specify --lang=vhdl or --lang=verilog" sys.exit(1) if args.lib: if (args.nolib or args.Xbits is not None or args.Ybits is not None or args.npipe is not None): print >>sys.stderr, __doc__ print >>sys.stderr, "ERROR: Must specify either --lib or", print >>sys.stderr, "Xbits, Ybits, npipe" sys.exit(1) else: if (args.Xbits is None or args.Ybits is None or args.npipe is None): print >>sys.stderr, __doc__ print >>sys.stderr, "ERROR: Must specify either --lib or", print >>sys.stderr, "Xbits, Ybits, npipe" sys.exit(1) if args.Xbits < 4 or args.Ybits < args.Xbits: print >>sys.stderr, "ERROR: invalid word lengths" sys.exit(1) if args.npipe < 0 or args.npipe > 2: print >>sys.stderr, "ERROR: invalid number of register stages" sys.exit(1) if not args.lib: # Generate expression tree. zvec = gen_multiplier(args.Xbits, args.Ybits, args.npipe) # Generate wires and instances. wires = [ ] insts = [ ] for node in zvec: gen_netlist(node, wires, insts) outputs = [ node.wire for node in zvec ] # Write library components. if not args.nolib: if args.lang.upper() == 'VHDL': gen_vhdl_lib() elif args.lang.upper() == 'VERILOG': gen_verilog_lib() # Write multiplier. if not args.lib: if args.lang.upper() == 'VHDL': gen_vhdl_mul(args.Xbits, args.Ybits, args.npipe, wires, insts, outputs) elif args.lang.upper() == 'VERILOG': gen_verilog_mul(args.Xbits, args.Ybits, args.npipe, wires, insts, outputs) if __name__ == '__main__': main() # end