From 5303c34431d61a3c1bdd0a17bf644f925646ea93 Mon Sep 17 00:00:00 2001 From: Joris van Rantwijk Date: Fri, 21 Oct 2016 11:31:26 +0200 Subject: [PATCH] Initial commit: * Xoroshiro128+ (works, tested) * MT19337 (work in progress) * Test benches * Reference C code. --- refimpl/ref_mt19937.cpp | 48 ++++++ refimpl/ref_xoroshiro.c | 93 ++++++++++++ rtl/rng_mt19937.vhdl | 275 +++++++++++++++++++++++++++++++++++ rtl/xoroshiro128plus.vhdl | 139 ++++++++++++++++++ sim/tb_mt19937.vhdl | 141 ++++++++++++++++++ sim/tb_xoroshiro128plus.vhdl | 118 +++++++++++++++ 6 files changed, 814 insertions(+) create mode 100644 refimpl/ref_mt19937.cpp create mode 100644 refimpl/ref_xoroshiro.c create mode 100644 rtl/rng_mt19937.vhdl create mode 100644 rtl/xoroshiro128plus.vhdl create mode 100644 sim/tb_mt19937.vhdl create mode 100644 sim/tb_xoroshiro128plus.vhdl diff --git a/refimpl/ref_mt19937.cpp b/refimpl/ref_mt19937.cpp new file mode 100644 index 0000000..767e87b --- /dev/null +++ b/refimpl/ref_mt19937.cpp @@ -0,0 +1,48 @@ +/* + * Reference implementation of Mersenne Twister MT19937 in C++11. + * + * Test bench by Joris van Rantwijk. + */ + +#include +#include +#include + +using namespace std; + + +int main(int argc, const char **argv) +{ + if (argc != 3) { + fprintf(stderr, "Reference implementation of Mersenne Twister MT19937\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: ref_mt19937 SEED NUMVALUE\n"); + fprintf(stderr, " SEED seed value in range 0 .. (2**31-1)\n"); + fprintf(stderr, " NUMVALUE number of values to get from generator\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Example: ref_mt19937 0x31415926 100\n"); + exit(1); + } + + char *p; + unsigned long seed = strtoul(argv[1], &p, 0); + if (p == argv[1] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for SEED0\n"); + exit(1); + } + + unsigned long numval = strtoul(argv[2], &p, 0); + if (p == argv[3] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for NUMVALUE\n"); + exit(1); + } + + mt19937 rng(seed); + + for (unsigned long k = 0; k < numval; k++) { + printf("0x%08lx\n", (unsigned long) rng()); + } + + return 0; +} + diff --git a/refimpl/ref_xoroshiro.c b/refimpl/ref_xoroshiro.c new file mode 100644 index 0000000..c3337af --- /dev/null +++ b/refimpl/ref_xoroshiro.c @@ -0,0 +1,93 @@ +/* + * Reference implementation of "xoroshiro128+" in C. + * + * Algorithm by David Blackman and Sebastiano Vigna. + * Test bench by Joris van Rantwijk. + * + * To the extent possible under law, the author has dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. This software is distributed without any warranty. + */ + +#include +#include +#include + + +/* ========== BEGIN of reference implementation of xoroshiro128+ ========== + * See also http://xoroshiro.di.unimi.it/ + */ + +/* Written in 2016 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See . */ + +int64_t s[2]; + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + +uint64_t next(void) { + const uint64_t s0 = s[0]; + uint64_t s1 = s[1]; + const uint64_t result = s0 + s1; + + s1 ^= s0; + s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b + s[1] = rotl(s1, 36); // c + + return result; +} + +/* ========== END of reference implementation of xoroshiro128+ ========== */ + + +int main(int argc, const char **argv) +{ + char *p; + unsigned long numval; + unsigned long k; + + if (argc != 4) { + fprintf(stderr, "Reference implementation of RNG xoroshiro128+\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: ref_xoroshiro SEED0 SEED1 NUMVALUE\n"); + fprintf(stderr, " SEED0 seed value in range 0 .. (2**64-1)\n"); + fprintf(stderr, " SEED1 seed value in range 0 .. (2**64-1)\n"); + fprintf(stderr, " NUMVALUE number of values to get from generator\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Example: ref_xoroshiro 0x3141592653589793 " + "0x0123456789abcdef 100\n"); + exit(1); + } + + s[0] = strtoull(argv[1], &p, 0); + if (p == argv[1] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for SEED0\n"); + exit(1); + } + + s[1] = strtoull(argv[2], &p, 0); + if (p == argv[2] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for SEED1\n"); + exit(1); + } + + numval = strtoul(argv[3], &p, 0); + if (p == argv[3] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for NUMVALUE\n"); + exit(1); + } + + for (k = 0; k < numval; k++) { + printf("0x%016llx\n", (unsigned long long) next()); + } + + return 0; +} + diff --git a/rtl/rng_mt19937.vhdl b/rtl/rng_mt19937.vhdl new file mode 100644 index 0000000..9090cb0 --- /dev/null +++ b/rtl/rng_mt19937.vhdl @@ -0,0 +1,275 @@ +-- +-- Pseudo Random Number Generator based on Mersenne Twister MT19937. +-- +-- Author: Joris van Rantwijk +-- +-- This is a 32-bit random number generator in synthesizable VHDL. +-- The generator produces 32 new random bits on every (enabled) clock cycle. +-- +-- See also M. Matsumoto, T. Nishimura, "Mersenne Twister: +-- a 623-dimensionally equidistributed uniform pseudorandom number generator", +-- ACM TOMACS, vol. 8, no. 1, 1998. +-- +-- The generator requires a 32-bit seed value. +-- A default seed must be supplied at compile time and will be used +-- to initialize the generator at reset. The generator also supports +-- re-seeded at run time. +-- +-- After reset, and after re-seeding, the generator needs 625 clock +-- cycles to initialize its internal state. During this time, the generator +-- is unable to provide correct output. +-- +-- NOTE: This is not a cryptographic random number generator. +-- + +-- TODO : Multiplication in reseeding severely limits the maximum frequency +-- for this design. +-- Add pipelining and increase the number of clock cycles for reseeding. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + + +entity rng_mt19937 is + + generic ( + -- Default seed value. + init_seed: std_logic_vector(31 downto 0) ); + + port ( + + -- Clock, rising edge active. + clk: in std_logic; + + -- Synchronous reset, active high. + rst: in std_logic; + + -- High to generate new output value. + enable: in std_logic; + + -- High to re-seed the generator (works regardless of enable signal). + reseed: in std_logic; + + -- New seed value (must be valid when reseed = '1'). + newseed: in std_logic_vector(31 downto 0); + + -- Output value. + -- A new value appears on every rising clock edge where enable = '1'. + output: out std_logic_vector(31 downto 0); + + -- High while re-seeding (normal function not available). + busy: out std_logic ); + +end entity; + + +architecture rng_mt19937_arch of rng_mt19937 is + + -- Constants. + constant const_a: std_logic_vector(31 downto 0) := x"9908b0df"; + constant const_b: std_logic_vector(31 downto 0) := x"9d2c5680"; + constant const_c: std_logic_vector(31 downto 0) := x"efc60000"; + constant const_f: natural := 1812433253; + + -- Block RAM for generator state. + type mem_t is array(0 to 620) of std_logic_vector(31 downto 0); + signal mem: mem_t; + + -- RAM access registers. + signal reg_a_addr: std_logic_vector(9 downto 0); + signal reg_b_addr: std_logic_vector(9 downto 0); + signal reg_a_wdata: std_logic_vector(31 downto 0); + signal reg_a_rdata: std_logic_vector(31 downto 0); + signal reg_b_rdata: std_logic_vector(31 downto 0); + + -- Internal registers. + signal reg_enable: std_logic; + signal reg_reseeding1: std_logic; + signal reg_reseeding2: std_logic; + signal reg_a_wdata_p: std_logic_vector(31 downto 0); + signal reg_a_rdata_p: std_logic_vector(31 downto 0); + signal reg_reseed_cnt: std_logic_vector(9 downto 0); + + -- Output register. + signal reg_output: std_logic_vector(31 downto 0) := (others => '0'); + signal reg_busy: std_logic; + +-- -- Multiply unsigned number with constant and discard overflowing bits. +-- function mulconst(x: unsigned) +-- return unsigned +-- is +-- variable t: unsigned(2*x'length-1 downto 0); +-- begin +-- t := x * const_f; +-- return t(x'length-1 downto 0); +-- end function; + + -- Multiply unsigned number with constant and discard overflowing bits. + function mulconst(x: unsigned) + return unsigned + is + begin + return x + + shift_left(x, 2) + + shift_left(x, 5) + + shift_left(x, 6) + + shift_left(x, 8) + + shift_left(x, 11) + - shift_left(x, 15) + + shift_left(x, 19) + - shift_left(x, 26) + - shift_left(x, 28) + + shift_left(x, 31); + end function; + +begin + + -- Drive output signal. + output <= reg_output; + busy <= reg_busy; + + -- Main synchronous process. + process (clk) is + variable y: std_logic_vector(31 downto 0); + begin + if rising_edge(clk) then + + -- Update memory pointers. + if reg_enable = '1' then + + if unsigned(reg_a_addr) = 620 then + reg_a_addr <= (others => '0'); + else + reg_a_addr <= std_logic_vector(unsigned(reg_a_addr) + 1); + end if; + + if unsigned(reg_b_addr) = 620 then + reg_b_addr <= (others => '0'); + else + reg_b_addr <= std_logic_vector(unsigned(reg_b_addr) + 1); + end if; + + end if; + + -- Keep previous values of registers. + if reg_enable = '1' then + reg_a_rdata_p <= reg_a_rdata; + reg_a_wdata_p <= reg_a_wdata; + end if; + + -- Update reseeding counter. + reg_reseed_cnt <= std_logic_vector(unsigned(reg_reseed_cnt) + 1); + + -- Determine end of reseeding. + reg_busy <= reg_reseeding2; + reg_reseeding2 <= reg_reseeding1; + if unsigned(reg_reseed_cnt) = 623 then + reg_reseeding1 <= '0'; + end if; + + -- Enable state machine on next cycle + -- a) during initialization, and + -- b) on-demand for new output. + reg_enable <= reg_reseeding2 or enable; + + -- Update internal RNG state. + if reg_enable = '1' then + + if reg_reseeding1 = '1' then + + -- Continue re-seeding loop. + y := reg_a_wdata; + y(1 downto 0) := y(1 downto 0) xor y(31 downto 30); + reg_a_wdata <= std_logic_vector( + mulconst(unsigned(y)) + + unsigned(reg_reseed_cnt) ); + + else + + -- Normal operation. + -- Perform one step of the "twist" function. + + y := reg_a_rdata_p(31 downto 31) & + reg_a_rdata(30 downto 0); + + if y(0) = '1' then + y := "0" & y(31 downto 1); + y := y xor const_a; + else + y := "0" & y(31 downto 1); + end if; + + reg_a_wdata_p <= reg_a_wdata; + reg_a_wdata <= reg_b_rdata xor y; + + end if; + end if; + + -- Produce output value (when enabled). + if enable = '1' then + + if reg_enable = '1' then + y := reg_a_wdata; + else + y := reg_a_wdata_p; + end if; + + y(20 downto 0) := y(20 downto 0) xor y(31 downto 11); + y(31 downto 7) := y(31 downto 7) xor + (y(24 downto 0) and const_b(31 downto 7)); + y(31 downto 15) := y(31 downto 15) xor + (y(16 downto 0) and const_c(31 downto 15)); + y(13 downto 0) := y(13 downto 0) xor y(31 downto 18); + + reg_output <= y; + + end if; + + -- Start re-seeding. + if reseed = '1' then + reg_reseeding1 <= '1'; + reg_reseeding2 <= '1'; + reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10)); + reg_enable <= '1'; + reg_a_wdata <= newseed; + reg_busy <= '1'; + end if; + + -- Synchronous reset. + if rst = '1' then + reg_a_addr <= std_logic_vector(to_unsigned(0, 10)); + reg_b_addr <= std_logic_vector(to_unsigned(396, 10)); + reg_reseeding1 <= '1'; + reg_reseeding2 <= '1'; + reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10)); + reg_enable <= '1'; + reg_a_wdata <= init_seed; + reg_output <= (others => '0'); + reg_busy <= '1'; + end if; + + end if; + end process; + + -- Synchronous process for block RAM. + process (clk) is + begin + if rising_edge(clk) then + if reg_enable = '1' then + + -- Read from port A. + reg_a_rdata <= mem(to_integer(unsigned(reg_a_addr))); + + -- Read from port B. + reg_b_rdata <= mem(to_integer(unsigned(reg_b_addr))); + + -- Write to port A. + mem(to_integer(unsigned(reg_a_addr))) <= reg_a_wdata; + + end if; + end if; + end process; + +end architecture; + diff --git a/rtl/xoroshiro128plus.vhdl b/rtl/xoroshiro128plus.vhdl new file mode 100644 index 0000000..575b29b --- /dev/null +++ b/rtl/xoroshiro128plus.vhdl @@ -0,0 +1,139 @@ +-- +-- Pseudo Random Number Generator "xoroshiro128+". +-- +-- Author: Joris van Rantwijk +-- +-- This is a 64-bit random number generator in synthesizable VHDL. +-- The generator produces 64 new random bits on every (enabled) clock cycle. +-- +-- The algorithm "xoroshiro128+" is by David Blackman and Sebastiano Vigna. +-- See also http://xoroshiro.di.unimi.it/ +-- +-- The generator requires a 128-bit seed value, not equal to zero. +-- A default seed must be supplied at compile time and will be used +-- to initialize the generator at reset. The generator also supports +-- re-seeding at run time. +-- +-- After reset, at least one enabled clock cycle is needed before +-- a random number appears on the output. +-- +-- NOTE: This is not a cryptographic random number generator. +-- +-- NOTE: The least significant output bit is less random than +-- all other output bits. +-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + + +entity xoroshiro128plus is + + generic ( + -- Default seed value. + init_seed: std_logic_vector(127 downto 0) ); + + port ( + + -- Clock, rising edge active. + clk: in std_logic; + + -- Synchronous reset, active high. + rst: in std_logic; + + -- Clock enable, active high. + enable: in std_logic; + + -- High to re-seed the generator (requires enable = '1'). + reseed: in std_logic; + + -- New seed value (must be valid when reseed = '1'). + newseed: in std_logic_vector(127 downto 0); + + -- Output value. + -- A new value appears on every rising clock edge where enable = '1'. + output: out std_logic_vector(63 downto 0) ); + +end entity; + + +architecture xoroshiro128plus_arch of xoroshiro128plus is + + -- Internal state of RNG. + signal reg_state_s0: std_logic_vector(63 downto 0) := init_seed(63 downto 0); + signal reg_state_s1: std_logic_vector(63 downto 0) := init_seed(127 downto 64); + + -- Output register. + signal reg_output: std_logic_vector(63 downto 0) := (others => '0'); + + -- Shift left. + function shiftl(x: std_logic_vector; b: integer) + return std_logic_vector + is + constant n: integer := x'length; + variable y: std_logic_vector(n-1 downto 0); + begin + y(n-1 downto b) := x(x'high-b downto x'low); + y(b-1 downto 0) := (others => '0'); + return y; + end function; + + -- Rotate left. + function rotl(x: std_logic_vector; b: integer) + return std_logic_vector + is + constant n: integer := x'length; + variable y: std_logic_vector(n-1 downto 0); + begin + y(n-1 downto b) := x(x'high-b downto x'low); + y(b-1 downto 0) := x(x'high downto x'high-b+1); + return y; + end function; + +begin + + -- Drive output signal. + output <= reg_output; + + -- Synchronous process. + process (clk) is + begin + if rising_edge(clk) then + + if enable = '1' then + + -- Prepare output word. + reg_output <= std_logic_vector(unsigned(reg_state_s0) + + unsigned(reg_state_s1)); + + -- Update internal state. + reg_state_s0 <= reg_state_s0 xor + reg_state_s1 xor + shiftl(reg_state_s0, 14) xor + shiftl(reg_state_s1, 14) xor + rotl(reg_state_s0, 55); + + reg_state_s1 <= rotl(reg_state_s0, 36) xor + rotl(reg_state_s1, 36); + + -- Re-seed function. + if reseed = '1' then + reg_state_s0 <= newseed(63 downto 0); + reg_state_s1 <= newseed(127 downto 64); + end if; + + end if; + + -- Synchronous reset. + if rst = '1' then + reg_state_s0 <= init_seed(63 downto 0); + reg_state_s1 <= init_seed(127 downto 64); + reg_output <= (others => '0'); + end if; + + end if; + end process; + +end architecture; + diff --git a/sim/tb_mt19937.vhdl b/sim/tb_mt19937.vhdl new file mode 100644 index 0000000..c208882 --- /dev/null +++ b/sim/tb_mt19937.vhdl @@ -0,0 +1,141 @@ +-- +-- Test bench for PRNG MT19937. +-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity tb_mt19937 is +end entity; + +architecture arch of tb_mt19937 is + + signal clk: std_logic; + signal clock_active: boolean := false; + + signal s_rst: std_logic; + signal s_enable: std_logic; + signal s_reseed: std_logic; + signal s_newseed: std_logic_vector(31 downto 0); + signal s_output: std_logic_vector(31 downto 0); + signal s_busy: std_logic; + + function to_hex_string(s: std_logic_vector) + return string + is + constant alphabet: string(1 to 16) := "0123456789abcdef"; + variable y: string(1 to s'length/4); + begin + for i in y'range loop + y(i) := alphabet(to_integer(unsigned(s(s'high+4-4*i downto s'high+1-4*i))) + 1); + end loop; + return y; + end function; + +begin + + -- Instantiate PRNG. + inst_prng: entity work.rng_mt19937 + generic map ( + init_seed => x"31415926" ) + port map ( + clk => clk, + rst => s_rst, + enable => s_enable, + reseed => s_reseed, + newseed => s_newseed, + output => s_output, + busy => s_busy ); + + -- Generate clock. + clk <= (not clk) after 10 ns when clock_active else '0'; + + -- Main simulation process. + process is + begin + + report "Start test bench"; + + -- Reset. + s_rst <= '1'; + s_enable <= '0'; + s_reseed <= '0'; + s_newseed <= (others => '0'); + + -- Start clock. + clock_active <= true; + + -- Wait 2 clock cycles, then end reset. + wait for 30 ns; + wait until falling_edge(clk); + s_rst <= '0'; + + -- Check that generator is initializing. + assert s_busy = '1' report "Generator fails to indicate BUSY"; + wait until falling_edge(clk); + assert s_busy = '1' report "Generator fails to indicate BUSY"; + + -- Give generator time to complete initialization. + for i in 0 to 623 loop + wait until falling_edge(clk); + end loop; + assert s_busy = '0' report "Generator should be ready but still indicates BUSY"; + + -- Produce numbers + for i in 0 to 1500 loop + + if i mod 5 = 0 or i mod 7 = 0 then + s_enable <= '0'; + wait until falling_edge(clk); + else + s_enable <= '1'; + wait until falling_edge(clk); + report "Got 0x" & to_hex_string(s_output); + end if; + + end loop; + + -- Re-seed generator. + report "Re-seed generator"; + s_enable <= '0'; + s_reseed <= '1'; + s_newseed <= x"fedcba98"; + wait until falling_edge(clk); + s_reseed <= '0'; + s_newseed <= (others => '0'); + + -- Check that generator is initializing. + assert s_busy = '1' report "Generator fails to indicate BUSY"; + wait until falling_edge(clk); + assert s_busy = '1' report "Generator fails to indicate BUSY"; + + -- Give generator time to complete initialization. + for i in 0 to 623 loop + wait until falling_edge(clk); + end loop; + assert s_busy = '0' report "Generator should be ready but still indicates BUSY"; + + -- Produce numbers + for i in 0 to 1500 loop + + if i mod 5 = 1 or i mod 7 = 1 then + s_enable <= '0'; + wait until falling_edge(clk); + else + s_enable <= '1'; + wait until falling_edge(clk); + report "Got 0x" & to_hex_string(s_output); + end if; + + end loop; + + -- End simulation. + report "End testbench"; + + clock_active <= false; + wait; + + end process; + +end architecture; diff --git a/sim/tb_xoroshiro128plus.vhdl b/sim/tb_xoroshiro128plus.vhdl new file mode 100644 index 0000000..2720b27 --- /dev/null +++ b/sim/tb_xoroshiro128plus.vhdl @@ -0,0 +1,118 @@ +-- +-- Test bench for PRNG "xoroshiro128+". +-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity tb_xoroshiro128plus is +end entity; + +architecture arch of tb_xoroshiro128plus is + + signal clk: std_logic; + signal clock_active: boolean := false; + + signal s_rst: std_logic; + signal s_enable: std_logic; + signal s_reseed: std_logic; + signal s_newseed: std_logic_vector(127 downto 0); + signal s_output: std_logic_vector(63 downto 0); + + function to_hex_string(s: std_logic_vector) + return string + is + constant alphabet: string(1 to 16) := "0123456789abcdef"; + variable y: string(1 to s'length/4); + begin + for i in y'range loop + y(i) := alphabet(to_integer(unsigned(s(s'high+4-4*i downto s'high+1-4*i))) + 1); + end loop; + return y; + end function; + +begin + + -- Instantiate PRNG. + inst_prng: entity work.xoroshiro128plus + generic map ( + init_seed => x"0123456789abcdef3141592653589793" ) + port map ( + clk => clk, + rst => s_rst, + enable => s_enable, + reseed => s_reseed, + newseed => s_newseed, + output => s_output ); + + -- Generate clock. + clk <= (not clk) after 10 ns when clock_active else '0'; + + -- Main simulation process. + process is + begin + + report "Start test bench"; + + -- Reset. + s_rst <= '1'; + s_enable <= '0'; + s_reseed <= '0'; + s_newseed <= (others => '0'); + + -- Start clock. + clock_active <= true; + + -- Wait 2 clock cycles, then end reset. + wait for 30 ns; + wait until falling_edge(clk); + s_rst <= '0'; + + -- Produce numbers + for i in 0 to 150 loop + + if i mod 5 = 0 or i mod 7 = 0 then + s_enable <= '0'; + wait until falling_edge(clk); + else + s_enable <= '1'; + wait until falling_edge(clk); + report "Got 0x" & to_hex_string(s_output); + end if; + + end loop; + + -- Re-seed generator. + report "Re-seed generator"; + s_enable <= '1'; + s_reseed <= '1'; + s_newseed <= x"3141592653589793fedcba9876543210"; + wait until falling_edge(clk); + + s_reseed <= '0'; + s_newseed <= (others => '0'); + + -- Produce numbers + for i in 0 to 150 loop + + if i mod 5 = 0 or i mod 7 = 0 then + s_enable <= '0'; + wait until falling_edge(clk); + else + s_enable <= '1'; + wait until falling_edge(clk); + report "Got 0x" & to_hex_string(s_output); + end if; + + end loop; + + -- End simulation. + report "End testbench"; + + clock_active <= false; + wait; + + end process; + +end architecture;