Add optional pipeline stage in xoshiro128++

This commit is contained in:
Joris van Rantwijk 2020-08-14 11:48:26 +02:00
parent d8acfbe985
commit 9f5c69c9cc
3 changed files with 79 additions and 15 deletions

View File

@ -35,17 +35,19 @@ statistical tests and has a relatively long period (2**128 - 1).
The VHDL implementation produces 32 new random bits on every (enabled)
clock cycle. It is quite efficient in terms of FPGA resources, but it
requires two cascaded 32-bit adders which limits its speed.
requires two cascaded 32-bit adders which limit its speed. An optional
pipeline stage can be inserted between the adders to improve the timing
performance of the circuit.
Output word length: 32 bits
Seed length: 128 bits
Period: 2**128 - 1
FPGA resources: general logic and two 32-bit adders
Synthesis results: 148 LUTs, 161 registers on Spartan-6
148 LUTs, 161 registers on Spartan-7
Timing results: 250 MHz on Spartan-6 LX45-3
200 MHz on Spartan-7 S25-1
Synthesis results: 201 LUTs, 194 registers on Spartan-6
149 LUTs, 194 registers on Spartan-7
Timing results: 400 MHz on Spartan-6 LX45-3
350 MHz on Spartan-7 S25-1
Xoroshiro128+ RNG

View File

@ -14,8 +14,9 @@
-- to initialize the generator at reset. The generator also supports
-- re-seeding at run time.
--
-- After reset and after re-seeding, at least one clock cycle is needed
-- before valid random data appears on the output.
-- After reset and after re-seeding, one or two clock cycles are needed
-- before valid random data appears on the output. The exact delay
-- depends on the setting of the "pipeline" parameter.
--
-- NOTE: This is not a cryptographic random number generator.
--
@ -40,7 +41,15 @@ entity rng_xoshiro128plusplus is
generic (
-- Default seed value.
init_seed: std_logic_vector(127 downto 0) );
init_seed: std_logic_vector(127 downto 0);
-- Enable optional pipeline stage in output calculation.
-- This uses an extra 32-bit register but tends to improve
-- the timing performance of the circuit.
-- If the pipeline stage is enabled, two clock cycles are needed
-- before valid output appears after reset and after re-seeding.
-- If the pipeline stage is disabled, just one clock cycle is needed.
pipeline: boolean := true );
port (
@ -61,7 +70,7 @@ entity rng_xoshiro128plusplus is
out_ready: in std_logic;
-- High when valid random data is available on the output.
-- This signal is low during the first clock cycle after reset and
-- This signal is low for 1 or 2 clock cycles after reset and
-- after re-seeding, and high in all other cases.
out_valid: out std_logic;
@ -81,8 +90,12 @@ architecture xoshiro128plusplus_arch of rng_xoshiro128plusplus is
signal reg_state_s2: std_logic_vector(31 downto 0) := init_seed(95 downto 64);
signal reg_state_s3: std_logic_vector(31 downto 0) := init_seed(127 downto 96);
-- Optional pipeline register.
signal reg_sum_s0s3: std_logic_vector(31 downto 0) := (others => '0');
-- Output register.
signal reg_valid: std_logic := '0';
signal reg_nvalid: std_logic := '0';
signal reg_output: std_logic_vector(31 downto 0) := (others => '0');
begin
@ -93,18 +106,51 @@ begin
-- Synchronous process.
process (clk) is
variable v_prev_s0: std_logic_vector(31 downto 0) := (others => '0');
begin
if rising_edge(clk) then
if out_ready = '1' or reg_valid = '0' then
-- Prepare output word.
if pipeline then
-- Use a pipelined output stage.
reg_valid <= reg_nvalid;
reg_nvalid <= '1';
-- Calculate the previous value of s0.
v_prev_s0 := reg_state_s0 xor
std_logic_vector(
rotate_right(unsigned(reg_state_s3),
11));
-- Derive output from prev_s0 and intermediate result
-- (prev_s0 + prev_s3) calculated in the previous cycle.
reg_output <= std_logic_vector(
unsigned(v_prev_s0) +
rotate_left(unsigned(reg_sum_s0s3),
7));
-- Update the intermediate register (s0 + s3).
reg_sum_s0s3 <= std_logic_vector(
unsigned(reg_state_s0) +
unsigned(reg_state_s3));
else
-- Derive output directly from s0 and s3.
-- This requires two cascaded 32-bit adders and
-- may limit the timing performance of the circuit.
reg_valid <= '1';
reg_output <= std_logic_vector(
rotate_left(unsigned(reg_state_s0) +
rotate_left(
unsigned(reg_state_s0) +
unsigned(reg_state_s3), 7) +
unsigned(reg_state_s0));
end if;
-- Update internal state.
reg_state_s0 <= reg_state_s0 xor
reg_state_s1 xor
@ -120,7 +166,9 @@ begin
shift_left(unsigned(reg_state_s1), 9));
reg_state_s3 <= std_logic_vector(
rotate_left(unsigned(reg_state_s1 xor reg_state_s3), 11));
rotate_left(
unsigned(reg_state_s1 xor
reg_state_s3), 11));
end if;
@ -131,6 +179,7 @@ begin
reg_state_s2 <= newseed(95 downto 64);
reg_state_s3 <= newseed(127 downto 96);
reg_valid <= '0';
reg_nvalid <= '0';
end if;
-- Synchronous reset.
@ -140,6 +189,7 @@ begin
reg_state_s2 <= init_seed(95 downto 64);
reg_state_s3 <= init_seed(127 downto 96);
reg_valid <= '0';
reg_nvalid <= '0';
reg_output <= (others => '0');
end if;

View File

@ -82,6 +82,12 @@ begin
wait until falling_edge(clk);
s_ready <= '1';
-- Optionally wait an additional pipeline cycle.
if s_valid = '0' then
report "Detected pipeline delay";
wait until falling_edge(clk);
end if;
-- Produce numbers
for i in 0 to 999 loop
@ -130,6 +136,12 @@ begin
wait until falling_edge(clk);
s_ready <= '1';
-- Optionally wait an additional pipeline cycle.
if s_valid = '0' then
report "Detected pipeline delay";
wait until falling_edge(clk);
end if;
-- Produce numbers
for i in 0 to 999 loop