Add optional pipeline stage in xoshiro128++

This commit is contained in:
Joris van Rantwijk 2020-08-14 11:48:26 +02:00
parent d8acfbe985
commit 9f5c69c9cc
3 changed files with 79 additions and 15 deletions

View File

@ -35,17 +35,19 @@ statistical tests and has a relatively long period (2**128 - 1).
The VHDL implementation produces 32 new random bits on every (enabled) The VHDL implementation produces 32 new random bits on every (enabled)
clock cycle. It is quite efficient in terms of FPGA resources, but it clock cycle. It is quite efficient in terms of FPGA resources, but it
requires two cascaded 32-bit adders which limits its speed. requires two cascaded 32-bit adders which limit its speed. An optional
pipeline stage can be inserted between the adders to improve the timing
performance of the circuit.
Output word length: 32 bits Output word length: 32 bits
Seed length: 128 bits Seed length: 128 bits
Period: 2**128 - 1 Period: 2**128 - 1
FPGA resources: general logic and two 32-bit adders FPGA resources: general logic and two 32-bit adders
Synthesis results: 148 LUTs, 161 registers on Spartan-6 Synthesis results: 201 LUTs, 194 registers on Spartan-6
148 LUTs, 161 registers on Spartan-7 149 LUTs, 194 registers on Spartan-7
Timing results: 250 MHz on Spartan-6 LX45-3 Timing results: 400 MHz on Spartan-6 LX45-3
200 MHz on Spartan-7 S25-1 350 MHz on Spartan-7 S25-1
Xoroshiro128+ RNG Xoroshiro128+ RNG

View File

@ -14,8 +14,9 @@
-- to initialize the generator at reset. The generator also supports -- to initialize the generator at reset. The generator also supports
-- re-seeding at run time. -- re-seeding at run time.
-- --
-- After reset and after re-seeding, at least one clock cycle is needed -- After reset and after re-seeding, one or two clock cycles are needed
-- before valid random data appears on the output. -- before valid random data appears on the output. The exact delay
-- depends on the setting of the "pipeline" parameter.
-- --
-- NOTE: This is not a cryptographic random number generator. -- NOTE: This is not a cryptographic random number generator.
-- --
@ -40,7 +41,15 @@ entity rng_xoshiro128plusplus is
generic ( generic (
-- Default seed value. -- Default seed value.
init_seed: std_logic_vector(127 downto 0) ); init_seed: std_logic_vector(127 downto 0);
-- Enable optional pipeline stage in output calculation.
-- This uses an extra 32-bit register but tends to improve
-- the timing performance of the circuit.
-- If the pipeline stage is enabled, two clock cycles are needed
-- before valid output appears after reset and after re-seeding.
-- If the pipeline stage is disabled, just one clock cycle is needed.
pipeline: boolean := true );
port ( port (
@ -61,7 +70,7 @@ entity rng_xoshiro128plusplus is
out_ready: in std_logic; out_ready: in std_logic;
-- High when valid random data is available on the output. -- High when valid random data is available on the output.
-- This signal is low during the first clock cycle after reset and -- This signal is low for 1 or 2 clock cycles after reset and
-- after re-seeding, and high in all other cases. -- after re-seeding, and high in all other cases.
out_valid: out std_logic; out_valid: out std_logic;
@ -81,8 +90,12 @@ architecture xoshiro128plusplus_arch of rng_xoshiro128plusplus is
signal reg_state_s2: std_logic_vector(31 downto 0) := init_seed(95 downto 64); signal reg_state_s2: std_logic_vector(31 downto 0) := init_seed(95 downto 64);
signal reg_state_s3: std_logic_vector(31 downto 0) := init_seed(127 downto 96); signal reg_state_s3: std_logic_vector(31 downto 0) := init_seed(127 downto 96);
-- Optional pipeline register.
signal reg_sum_s0s3: std_logic_vector(31 downto 0) := (others => '0');
-- Output register. -- Output register.
signal reg_valid: std_logic := '0'; signal reg_valid: std_logic := '0';
signal reg_nvalid: std_logic := '0';
signal reg_output: std_logic_vector(31 downto 0) := (others => '0'); signal reg_output: std_logic_vector(31 downto 0) := (others => '0');
begin begin
@ -93,17 +106,50 @@ begin
-- Synchronous process. -- Synchronous process.
process (clk) is process (clk) is
variable v_prev_s0: std_logic_vector(31 downto 0) := (others => '0');
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if out_ready = '1' or reg_valid = '0' then if out_ready = '1' or reg_valid = '0' then
-- Prepare output word. -- Prepare output word.
reg_valid <= '1'; if pipeline then
reg_output <= std_logic_vector(
rotate_left(unsigned(reg_state_s0) + -- Use a pipelined output stage.
unsigned(reg_state_s3), 7) + reg_valid <= reg_nvalid;
unsigned(reg_state_s0)); reg_nvalid <= '1';
-- Calculate the previous value of s0.
v_prev_s0 := reg_state_s0 xor
std_logic_vector(
rotate_right(unsigned(reg_state_s3),
11));
-- Derive output from prev_s0 and intermediate result
-- (prev_s0 + prev_s3) calculated in the previous cycle.
reg_output <= std_logic_vector(
unsigned(v_prev_s0) +
rotate_left(unsigned(reg_sum_s0s3),
7));
-- Update the intermediate register (s0 + s3).
reg_sum_s0s3 <= std_logic_vector(
unsigned(reg_state_s0) +
unsigned(reg_state_s3));
else
-- Derive output directly from s0 and s3.
-- This requires two cascaded 32-bit adders and
-- may limit the timing performance of the circuit.
reg_valid <= '1';
reg_output <= std_logic_vector(
rotate_left(
unsigned(reg_state_s0) +
unsigned(reg_state_s3), 7) +
unsigned(reg_state_s0));
end if;
-- Update internal state. -- Update internal state.
reg_state_s0 <= reg_state_s0 xor reg_state_s0 <= reg_state_s0 xor
@ -120,7 +166,9 @@ begin
shift_left(unsigned(reg_state_s1), 9)); shift_left(unsigned(reg_state_s1), 9));
reg_state_s3 <= std_logic_vector( reg_state_s3 <= std_logic_vector(
rotate_left(unsigned(reg_state_s1 xor reg_state_s3), 11)); rotate_left(
unsigned(reg_state_s1 xor
reg_state_s3), 11));
end if; end if;
@ -131,6 +179,7 @@ begin
reg_state_s2 <= newseed(95 downto 64); reg_state_s2 <= newseed(95 downto 64);
reg_state_s3 <= newseed(127 downto 96); reg_state_s3 <= newseed(127 downto 96);
reg_valid <= '0'; reg_valid <= '0';
reg_nvalid <= '0';
end if; end if;
-- Synchronous reset. -- Synchronous reset.
@ -140,6 +189,7 @@ begin
reg_state_s2 <= init_seed(95 downto 64); reg_state_s2 <= init_seed(95 downto 64);
reg_state_s3 <= init_seed(127 downto 96); reg_state_s3 <= init_seed(127 downto 96);
reg_valid <= '0'; reg_valid <= '0';
reg_nvalid <= '0';
reg_output <= (others => '0'); reg_output <= (others => '0');
end if; end if;

View File

@ -82,6 +82,12 @@ begin
wait until falling_edge(clk); wait until falling_edge(clk);
s_ready <= '1'; s_ready <= '1';
-- Optionally wait an additional pipeline cycle.
if s_valid = '0' then
report "Detected pipeline delay";
wait until falling_edge(clk);
end if;
-- Produce numbers -- Produce numbers
for i in 0 to 999 loop for i in 0 to 999 loop
@ -130,6 +136,12 @@ begin
wait until falling_edge(clk); wait until falling_edge(clk);
s_ready <= '1'; s_ready <= '1';
-- Optionally wait an additional pipeline cycle.
if s_valid = '0' then
report "Detected pipeline delay";
wait until falling_edge(clk);
end if;
-- Produce numbers -- Produce numbers
for i in 0 to 999 loop for i in 0 to 999 loop