From fc70f906b34673fda9f0c56fb7d9fa0b565a4697 Mon Sep 17 00:00:00 2001 From: Joris van Rantwijk Date: Thu, 13 Aug 2020 16:24:04 +0200 Subject: [PATCH] Add xoshiro128++ generator --- README.txt | 29 ++++- refimpl/Makefile | 11 +- refimpl/ref_xoshiro128plusplus.c | 119 ++++++++++++++++++++ rtl/rng_xoshiro128plusplus.vhdl | 150 +++++++++++++++++++++++++ sim/Makefile | 9 +- sim/tb_xoshiro128plusplus.vhdl | 176 ++++++++++++++++++++++++++++++ synth/top_xoshiro128plusplus.vhdl | 30 +++++ 7 files changed, 520 insertions(+), 4 deletions(-) create mode 100644 refimpl/ref_xoshiro128plusplus.c create mode 100644 rtl/rng_xoshiro128plusplus.vhdl create mode 100644 sim/tb_xoshiro128plusplus.vhdl create mode 100644 synth/top_xoshiro128plusplus.vhdl diff --git a/README.txt b/README.txt index 1b732c9..34fa1bf 100644 --- a/README.txt +++ b/README.txt @@ -21,6 +21,33 @@ NOTE: This library is not designed for cryptographic applications Most of the RNGs in this library are cryptographically weak. + Xoshiro128++ RNG + ---------------- + +Xoshiro128++ is a random number generator developed in 2019 by +David Blackman and Sebastiano Vigna. The Xoshiro construction is +based on the Xorshift concept invented by George Marsaglia. + +See also http://prng.di.unimi.it/ + +This RNG produces a sequence of 32-bit words. It passes all known +statistical tests and has a relatively long period (2**128 - 1). + +The VHDL implementation produces 32 new random bits on every (enabled) +clock cycle. It is quite efficient in terms of FPGA resources, but it +requires two cascaded 32-bit adders which limits its speed. + +Output word length: 32 bits +Seed length: 128 bits +Period: 2**128 - 1 + +FPGA resources: general logic and two 32-bit adders +Synthesis results: 148 LUTs, 161 registers on Spartan-6 + 148 LUTs, 161 registers on Spartan-7 +Timing results: 250 MHz on Spartan-6 LX45-3 + 200 MHz on Spartan-7 S25-1 + + Xoroshiro128+ RNG ------------------ @@ -122,7 +149,7 @@ FPGA resources: only general logic (AND, XOR ports, registers) Synthesis results: 202 LUTs, 332 registers on Spartan-6 (32 bits output) 145 LUTs, 332 registers on Spartan-7 (32 bits output) Timing results: 380 MHz on Spartan-6 LX45-3 (32 bits output) - 440 MHz on Spartan-7 S25 (32 bits output) + 440 MHz on Spartan-7 S25-1 (32 bits output) Code organization diff --git a/refimpl/Makefile b/refimpl/Makefile index 7eecf17..1db54a7 100644 --- a/refimpl/Makefile +++ b/refimpl/Makefile @@ -10,8 +10,12 @@ CFLAGS = -std=c11 -Wall -O2 CXXFLAGS = -std=c++11 -Wall -O2 .PHONY: all -all: ref_xoroshiro128plus ref_mt19937 ref_trivium +all: ref_xoshiro128plusplus \ + ref_xoroshiro128plus \ + ref_mt19937 \ + ref_trivium +ref_xoshiro128plusplus: ref_xoshiro128plusplus.c ref_xoroshiro128plus: ref_xoroshiro128plus.c ref_mt19937: ref_mt19937.cpp @@ -20,5 +24,8 @@ ref_trivium: ref_trivium.cpp .PHONY: clean clean: - $(RM) ref_xoroshiro128plus ref_mt19937 ref_trivium + $(RM) ref_xoshiro128plusplus + $(RM) ref_xoroshiro128plus + $(RM) ref_mt19937 + $(RM) ref_trivium diff --git a/refimpl/ref_xoshiro128plusplus.c b/refimpl/ref_xoshiro128plusplus.c new file mode 100644 index 0000000..563ae28 --- /dev/null +++ b/refimpl/ref_xoshiro128plusplus.c @@ -0,0 +1,119 @@ +/* + * Reference implementation of "xoshiro128++" in C. + * + * Algorithm code by David Blackman and Sebastiano Vigna + * Main program wrapper by Joris van Rantwijk + * + * To the extent possible under law, the author has dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. This software is distributed without any warranty. + * + * See + */ + +#include +#include + + +/* ========== BEGIN of reference implementation of xoshiro128++ ========== + * Source: http://prng.di.unimi.it/ + */ + +/* Written in 2019 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See . */ + +#include + +/* This is xoshiro128++ 1.0, one of our 32-bit all-purpose, rock-solid + generators. It has excellent speed, a state size (128 bits) that is + large enough for mild parallelism, and it passes all tests we are aware + of. + + For generating just single-precision (i.e., 32-bit) floating-point + numbers, xoshiro128+ is even faster. + + The state must be seeded so that it is not everywhere zero. */ + + +static inline uint32_t rotl(const uint32_t x, int k) { + return (x << k) | (x >> (32 - k)); +} + + +static uint32_t s[4]; + +uint32_t next(void) { + const uint32_t result = rotl(s[0] + s[3], 7) + s[0]; + + const uint32_t t = s[1] << 9; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 11); + + return result; +} + +/* ========== END of reference implementation of xoshiro128++ ========== */ + + +int main(int argc, const char **argv) +{ + char *p; + unsigned long numval; + unsigned long k; + unsigned long long seed_tmp; + + if (argc != 4) { + fprintf(stderr, "Reference implementation of RNG xoshiro128++\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: ref_xoshiro128plusplus SEED0 SEED1 NVALUE\n"); + fprintf(stderr, " SEEDn seed value in range 0 .. (2**64-1)\n"); + fprintf(stderr, " NVALUE number of values to get from generator\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Example: ref_xoshiro128plusplus 0x3141592653589793 " + "0x0123456789abcdef 100\n"); + exit(1); + } + + seed_tmp = strtoull(argv[1], &p, 0); + if (p == argv[1] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for SEED0\n"); + exit(1); + } + + s[0] = (uint32_t)seed_tmp; + s[1] = (uint32_t)(seed_tmp >> 32); + + seed_tmp = strtoull(argv[2], &p, 0); + if (p == argv[2] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for SEED1\n"); + exit(1); + } + + s[2] = (uint32_t)seed_tmp; + s[3] = (uint32_t)(seed_tmp >> 32); + + numval = strtoul(argv[3], &p, 0); + if (p == argv[3] || *p != '\0') { + fprintf(stderr, "ERROR: Invalid value for NVALUE\n"); + exit(1); + } + + for (k = 0; k < numval; k++) { + printf("0x%08lx\n", (unsigned long) next()); + } + + return 0; +} + diff --git a/rtl/rng_xoshiro128plusplus.vhdl b/rtl/rng_xoshiro128plusplus.vhdl new file mode 100644 index 0000000..7d36bc4 --- /dev/null +++ b/rtl/rng_xoshiro128plusplus.vhdl @@ -0,0 +1,150 @@ +-- +-- Pseudo Random Number Generator "xoshiro128++ 1.0". +-- +-- Author: Joris van Rantwijk +-- +-- This is a 32-bit random number generator in synthesizable VHDL. +-- The generator can produce 32 new random bits on every clock cycle. +-- +-- The algorithm "xoshiro128++" is by David Blackman and Sebastiano Vigna. +-- See also http://prng.di.unimi.it/ +-- +-- The generator requires a 128-bit seed value, not equal to all zeros. +-- A default seed must be supplied at compile time and will be used +-- to initialize the generator at reset. The generator also supports +-- re-seeding at run time. +-- +-- After reset and after re-seeding, at least one clock cycle is needed +-- before valid random data appears on the output. +-- +-- NOTE: This is not a cryptographic random number generator. +-- + +-- +-- Copyright (C) 2020 Joris van Rantwijk +-- +-- This code is free software; you can redistribute it and/or +-- modify it under the terms of the GNU Lesser General Public +-- License as published by the Free Software Foundation; either +-- version 2.1 of the License, or (at your option) any later version. +-- +-- See +-- + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + + +entity rng_xoshiro128plusplus is + + generic ( + -- Default seed value. + init_seed: std_logic_vector(127 downto 0) ); + + port ( + + -- Clock, rising edge active. + clk: in std_logic; + + -- Synchronous reset, active high. + rst: in std_logic; + + -- High to request re-seeding of the generator. + reseed: in std_logic; + + -- New seed value (must be valid when reseed = '1'). + newseed: in std_logic_vector(127 downto 0); + + -- High when the user accepts the current random data word + -- and requests new random data for the next clock cycle. + out_ready: in std_logic; + + -- High when valid random data is available on the output. + -- This signal is low during the first clock cycle after reset and + -- after re-seeding, and high in all other cases. + out_valid: out std_logic; + + -- Random output data (valid when out_valid = '1'). + -- A new random word appears after every rising clock edge + -- where out_ready = '1'. + out_data: out std_logic_vector(31 downto 0) ); + +end entity; + + +architecture xoshiro128plusplus_arch of rng_xoshiro128plusplus is + + -- Internal state of RNG. + signal reg_state_s0: std_logic_vector(31 downto 0) := init_seed(31 downto 0); + signal reg_state_s1: std_logic_vector(31 downto 0) := init_seed(63 downto 32); + signal reg_state_s2: std_logic_vector(31 downto 0) := init_seed(95 downto 64); + signal reg_state_s3: std_logic_vector(31 downto 0) := init_seed(127 downto 96); + + -- Output register. + signal reg_valid: std_logic := '0'; + signal reg_output: std_logic_vector(31 downto 0) := (others => '0'); + +begin + + -- Drive output signal. + out_valid <= reg_valid; + out_data <= reg_output; + + -- Synchronous process. + process (clk) is + begin + if rising_edge(clk) then + + if out_ready = '1' or reg_valid = '0' then + + -- Prepare output word. + reg_valid <= '1'; + reg_output <= std_logic_vector( + rotate_left(unsigned(reg_state_s0) + + unsigned(reg_state_s3), 7) + + unsigned(reg_state_s0)); + + -- Update internal state. + reg_state_s0 <= reg_state_s0 xor + reg_state_s1 xor + reg_state_s3; + + reg_state_s1 <= reg_state_s0 xor + reg_state_s1 xor + reg_state_s2; + + reg_state_s2 <= reg_state_s0 xor + reg_state_s2 xor + std_logic_vector( + shift_left(unsigned(reg_state_s1), 9)); + + reg_state_s3 <= std_logic_vector( + rotate_left(unsigned(reg_state_s1 xor reg_state_s3), 11)); + + end if; + + -- Re-seed function. + if reseed = '1' then + reg_state_s0 <= newseed(31 downto 0); + reg_state_s1 <= newseed(63 downto 32); + reg_state_s2 <= newseed(95 downto 64); + reg_state_s3 <= newseed(127 downto 96); + reg_valid <= '0'; + end if; + + -- Synchronous reset. + if rst = '1' then + reg_state_s0 <= init_seed(31 downto 0); + reg_state_s1 <= init_seed(63 downto 32); + reg_state_s2 <= init_seed(95 downto 64); + reg_state_s3 <= init_seed(127 downto 96); + reg_valid <= '0'; + reg_output <= (others => '0'); + end if; + + end if; + end process; + +end architecture; + diff --git a/sim/Makefile b/sim/Makefile index f909e2b..fcc9000 100644 --- a/sim/Makefile +++ b/sim/Makefile @@ -6,7 +6,14 @@ GHDL = ghdl GHDLFLAGS = .PHONY: all -all: tb_xoroshiro128plus tb_mt19937 tb_trivium +all: tb_xoshiro128plusplus \ + tb_xoroshiro128plus \ + tb_mt19937 \ + tb_trivium + +tb_xoshiro128plusplus: tb_xoshiro128plusplus.o rng_xoshiro128plusplus.o +tb_xoshiro128plusplus.o: tb_xoshiro128plusplus.vhdl rng_xoshiro128plusplus.o +rng_xoshiro128plusplus.o: ../rtl/rng_xoshiro128plusplus.vhdl tb_xoroshiro128plus: tb_xoroshiro128plus.o rng_xoroshiro128plus.o tb_xoroshiro128plus.o: tb_xoroshiro128plus.vhdl rng_xoroshiro128plus.o diff --git a/sim/tb_xoshiro128plusplus.vhdl b/sim/tb_xoshiro128plusplus.vhdl new file mode 100644 index 0000000..2599c47 --- /dev/null +++ b/sim/tb_xoshiro128plusplus.vhdl @@ -0,0 +1,176 @@ +-- +-- Test bench for PRNG "xoshiro128++". +-- + +use std.textio.all; + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity tb_xoshiro128plusplus is +end entity; + +architecture arch of tb_xoshiro128plusplus is + + signal clk: std_logic; + signal clock_active: boolean := false; + + signal s_rst: std_logic; + signal s_reseed: std_logic; + signal s_newseed: std_logic_vector(127 downto 0); + signal s_ready: std_logic; + signal s_valid: std_logic; + signal s_data: std_logic_vector(31 downto 0); + + function to_hex_string(s: std_logic_vector) + return string + is + constant alphabet: string(1 to 16) := "0123456789abcdef"; + variable y: string(1 to s'length/4); + begin + for i in y'range loop + y(i) := alphabet(to_integer(unsigned(s(s'high+4-4*i downto s'high+1-4*i))) + 1); + end loop; + return y; + end function; + +begin + + -- Instantiate PRNG. + inst_prng: entity work.rng_xoshiro128plusplus + generic map ( + init_seed => x"0123456789abcdef3141592653589793" ) + port map ( + clk => clk, + rst => s_rst, + reseed => s_reseed, + newseed => s_newseed, + out_ready => s_ready, + out_valid => s_valid, + out_data => s_data ); + + -- Generate clock. + clk <= (not clk) after 10 ns when clock_active else '0'; + + -- Main simulation process. + process is + file outf1: text is out "sim_xoshiro128plusplus_seed1.dat"; + file outf2: text is out "sim_xoshiro128plusplus_seed2.dat"; + variable lin: line; + variable nskip: integer; + variable v: std_logic_vector(31 downto 0); + begin + + report "Start test bench"; + + -- Reset. + s_rst <= '1'; + s_reseed <= '0'; + s_newseed <= (others => '0'); + s_ready <= '0'; + + -- Start clock. + clock_active <= true; + + -- Wait 2 clock cycles, then end reset. + wait for 30 ns; + wait until falling_edge(clk); + s_rst <= '0'; + + -- Wait 1 clock cycle to initialize generator. + wait until falling_edge(clk); + s_ready <= '1'; + + -- Produce numbers + for i in 0 to 999 loop + + -- Check that output is valid. + assert s_valid = '1' report "Output not valid"; + + -- Write output to file. + write(lin, "0x" & to_hex_string(s_data)); + writeline(outf1, lin); + + -- Sometimes skip cycles. + if i mod 5 = 1 then + nskip := 1; + if i mod 3 = 0 then + nskip := nskip + 1; + end if; + if i mod 11 = 0 then + nskip := nskip + 1; + end if; + + v := s_data; + s_ready <= '0'; + for t in 1 to nskip loop + wait until falling_edge(clk); + assert s_valid = '1' report "Output not valid"; + assert s_data = v report "Output changed while not ready"; + end loop; + s_ready <= '1'; + end if; + + -- Go to next cycle. + wait until falling_edge(clk); + + end loop; + + -- Re-seed generator. + report "Re-seed generator"; + s_reseed <= '1'; + s_newseed <= x"3141592653589793fedcba9876543210"; + s_ready <= '0'; + wait until falling_edge(clk); + s_reseed <= '0'; + s_newseed <= (others => '0'); + + -- Wait 1 clock cycle to re-seed generator. + wait until falling_edge(clk); + s_ready <= '1'; + + -- Produce numbers + for i in 0 to 999 loop + + -- Check that output is valid. + assert s_valid = '1' report "Output not valid"; + + -- Write output to file. + write(lin, "0x" & to_hex_string(s_data)); + writeline(outf2, lin); + + -- Sometimes skip cycles. + if i mod 5 = 2 then + nskip := 1; + if i mod 3 = 0 then + nskip := nskip + 1; + end if; + if i mod 11 = 0 then + nskip := nskip + 1; + end if; + + v := s_data; + s_ready <= '0'; + for t in 1 to nskip loop + wait until falling_edge(clk); + assert s_valid = '1' report "Output not valid"; + assert s_data = v report "Output changed while not ready"; + end loop; + s_ready <= '1'; + end if; + + -- Go to next cycle. + wait until falling_edge(clk); + + end loop; + + -- End simulation. + report "End testbench"; + + clock_active <= false; + wait; + + end process; + +end architecture; diff --git a/synth/top_xoshiro128plusplus.vhdl b/synth/top_xoshiro128plusplus.vhdl new file mode 100644 index 0000000..715dcbc --- /dev/null +++ b/synth/top_xoshiro128plusplus.vhdl @@ -0,0 +1,30 @@ + +library ieee; +use ieee.std_logic_1164.all; + +entity top_xoshiro128plusplus is + port ( + clk : in std_logic; + rst : in std_logic; + ready: in std_logic; + valid: out std_logic; + data: out std_logic_vector(31 downto 0) ); +end top_xoshiro128plusplus; + +architecture arch of top_xoshiro128plusplus is +begin + + inst_prng: entity work.rng_xoshiro128plusplus + generic map ( + init_seed => x"0123456789abcdef3141592653589793" ) + port map ( + clk => clk, + rst => rst, + reseed => '0', + newseed => (others => '0'), + out_ready => ready, + out_valid => valid, + out_data => data ); + +end arch; +