Add optional pipeline stage in xoshiro128++
This commit is contained in:
parent
d8acfbe985
commit
9f5c69c9cc
12
README.txt
12
README.txt
|
@ -35,17 +35,19 @@ statistical tests and has a relatively long period (2**128 - 1).
|
||||||
|
|
||||||
The VHDL implementation produces 32 new random bits on every (enabled)
|
The VHDL implementation produces 32 new random bits on every (enabled)
|
||||||
clock cycle. It is quite efficient in terms of FPGA resources, but it
|
clock cycle. It is quite efficient in terms of FPGA resources, but it
|
||||||
requires two cascaded 32-bit adders which limits its speed.
|
requires two cascaded 32-bit adders which limit its speed. An optional
|
||||||
|
pipeline stage can be inserted between the adders to improve the timing
|
||||||
|
performance of the circuit.
|
||||||
|
|
||||||
Output word length: 32 bits
|
Output word length: 32 bits
|
||||||
Seed length: 128 bits
|
Seed length: 128 bits
|
||||||
Period: 2**128 - 1
|
Period: 2**128 - 1
|
||||||
|
|
||||||
FPGA resources: general logic and two 32-bit adders
|
FPGA resources: general logic and two 32-bit adders
|
||||||
Synthesis results: 148 LUTs, 161 registers on Spartan-6
|
Synthesis results: 201 LUTs, 194 registers on Spartan-6
|
||||||
148 LUTs, 161 registers on Spartan-7
|
149 LUTs, 194 registers on Spartan-7
|
||||||
Timing results: 250 MHz on Spartan-6 LX45-3
|
Timing results: 400 MHz on Spartan-6 LX45-3
|
||||||
200 MHz on Spartan-7 S25-1
|
350 MHz on Spartan-7 S25-1
|
||||||
|
|
||||||
|
|
||||||
Xoroshiro128+ RNG
|
Xoroshiro128+ RNG
|
||||||
|
|
|
@ -14,8 +14,9 @@
|
||||||
-- to initialize the generator at reset. The generator also supports
|
-- to initialize the generator at reset. The generator also supports
|
||||||
-- re-seeding at run time.
|
-- re-seeding at run time.
|
||||||
--
|
--
|
||||||
-- After reset and after re-seeding, at least one clock cycle is needed
|
-- After reset and after re-seeding, one or two clock cycles are needed
|
||||||
-- before valid random data appears on the output.
|
-- before valid random data appears on the output. The exact delay
|
||||||
|
-- depends on the setting of the "pipeline" parameter.
|
||||||
--
|
--
|
||||||
-- NOTE: This is not a cryptographic random number generator.
|
-- NOTE: This is not a cryptographic random number generator.
|
||||||
--
|
--
|
||||||
|
@ -40,7 +41,15 @@ entity rng_xoshiro128plusplus is
|
||||||
|
|
||||||
generic (
|
generic (
|
||||||
-- Default seed value.
|
-- Default seed value.
|
||||||
init_seed: std_logic_vector(127 downto 0) );
|
init_seed: std_logic_vector(127 downto 0);
|
||||||
|
|
||||||
|
-- Enable optional pipeline stage in output calculation.
|
||||||
|
-- This uses an extra 32-bit register but tends to improve
|
||||||
|
-- the timing performance of the circuit.
|
||||||
|
-- If the pipeline stage is enabled, two clock cycles are needed
|
||||||
|
-- before valid output appears after reset and after re-seeding.
|
||||||
|
-- If the pipeline stage is disabled, just one clock cycle is needed.
|
||||||
|
pipeline: boolean := true );
|
||||||
|
|
||||||
port (
|
port (
|
||||||
|
|
||||||
|
@ -61,7 +70,7 @@ entity rng_xoshiro128plusplus is
|
||||||
out_ready: in std_logic;
|
out_ready: in std_logic;
|
||||||
|
|
||||||
-- High when valid random data is available on the output.
|
-- High when valid random data is available on the output.
|
||||||
-- This signal is low during the first clock cycle after reset and
|
-- This signal is low for 1 or 2 clock cycles after reset and
|
||||||
-- after re-seeding, and high in all other cases.
|
-- after re-seeding, and high in all other cases.
|
||||||
out_valid: out std_logic;
|
out_valid: out std_logic;
|
||||||
|
|
||||||
|
@ -81,8 +90,12 @@ architecture xoshiro128plusplus_arch of rng_xoshiro128plusplus is
|
||||||
signal reg_state_s2: std_logic_vector(31 downto 0) := init_seed(95 downto 64);
|
signal reg_state_s2: std_logic_vector(31 downto 0) := init_seed(95 downto 64);
|
||||||
signal reg_state_s3: std_logic_vector(31 downto 0) := init_seed(127 downto 96);
|
signal reg_state_s3: std_logic_vector(31 downto 0) := init_seed(127 downto 96);
|
||||||
|
|
||||||
|
-- Optional pipeline register.
|
||||||
|
signal reg_sum_s0s3: std_logic_vector(31 downto 0) := (others => '0');
|
||||||
|
|
||||||
-- Output register.
|
-- Output register.
|
||||||
signal reg_valid: std_logic := '0';
|
signal reg_valid: std_logic := '0';
|
||||||
|
signal reg_nvalid: std_logic := '0';
|
||||||
signal reg_output: std_logic_vector(31 downto 0) := (others => '0');
|
signal reg_output: std_logic_vector(31 downto 0) := (others => '0');
|
||||||
|
|
||||||
begin
|
begin
|
||||||
|
@ -93,17 +106,50 @@ begin
|
||||||
|
|
||||||
-- Synchronous process.
|
-- Synchronous process.
|
||||||
process (clk) is
|
process (clk) is
|
||||||
|
variable v_prev_s0: std_logic_vector(31 downto 0) := (others => '0');
|
||||||
begin
|
begin
|
||||||
if rising_edge(clk) then
|
if rising_edge(clk) then
|
||||||
|
|
||||||
if out_ready = '1' or reg_valid = '0' then
|
if out_ready = '1' or reg_valid = '0' then
|
||||||
|
|
||||||
-- Prepare output word.
|
-- Prepare output word.
|
||||||
reg_valid <= '1';
|
if pipeline then
|
||||||
reg_output <= std_logic_vector(
|
|
||||||
rotate_left(unsigned(reg_state_s0) +
|
-- Use a pipelined output stage.
|
||||||
unsigned(reg_state_s3), 7) +
|
reg_valid <= reg_nvalid;
|
||||||
unsigned(reg_state_s0));
|
reg_nvalid <= '1';
|
||||||
|
|
||||||
|
-- Calculate the previous value of s0.
|
||||||
|
v_prev_s0 := reg_state_s0 xor
|
||||||
|
std_logic_vector(
|
||||||
|
rotate_right(unsigned(reg_state_s3),
|
||||||
|
11));
|
||||||
|
|
||||||
|
-- Derive output from prev_s0 and intermediate result
|
||||||
|
-- (prev_s0 + prev_s3) calculated in the previous cycle.
|
||||||
|
reg_output <= std_logic_vector(
|
||||||
|
unsigned(v_prev_s0) +
|
||||||
|
rotate_left(unsigned(reg_sum_s0s3),
|
||||||
|
7));
|
||||||
|
|
||||||
|
-- Update the intermediate register (s0 + s3).
|
||||||
|
reg_sum_s0s3 <= std_logic_vector(
|
||||||
|
unsigned(reg_state_s0) +
|
||||||
|
unsigned(reg_state_s3));
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
-- Derive output directly from s0 and s3.
|
||||||
|
-- This requires two cascaded 32-bit adders and
|
||||||
|
-- may limit the timing performance of the circuit.
|
||||||
|
reg_valid <= '1';
|
||||||
|
reg_output <= std_logic_vector(
|
||||||
|
rotate_left(
|
||||||
|
unsigned(reg_state_s0) +
|
||||||
|
unsigned(reg_state_s3), 7) +
|
||||||
|
unsigned(reg_state_s0));
|
||||||
|
|
||||||
|
end if;
|
||||||
|
|
||||||
-- Update internal state.
|
-- Update internal state.
|
||||||
reg_state_s0 <= reg_state_s0 xor
|
reg_state_s0 <= reg_state_s0 xor
|
||||||
|
@ -120,7 +166,9 @@ begin
|
||||||
shift_left(unsigned(reg_state_s1), 9));
|
shift_left(unsigned(reg_state_s1), 9));
|
||||||
|
|
||||||
reg_state_s3 <= std_logic_vector(
|
reg_state_s3 <= std_logic_vector(
|
||||||
rotate_left(unsigned(reg_state_s1 xor reg_state_s3), 11));
|
rotate_left(
|
||||||
|
unsigned(reg_state_s1 xor
|
||||||
|
reg_state_s3), 11));
|
||||||
|
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
|
@ -131,6 +179,7 @@ begin
|
||||||
reg_state_s2 <= newseed(95 downto 64);
|
reg_state_s2 <= newseed(95 downto 64);
|
||||||
reg_state_s3 <= newseed(127 downto 96);
|
reg_state_s3 <= newseed(127 downto 96);
|
||||||
reg_valid <= '0';
|
reg_valid <= '0';
|
||||||
|
reg_nvalid <= '0';
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
-- Synchronous reset.
|
-- Synchronous reset.
|
||||||
|
@ -140,6 +189,7 @@ begin
|
||||||
reg_state_s2 <= init_seed(95 downto 64);
|
reg_state_s2 <= init_seed(95 downto 64);
|
||||||
reg_state_s3 <= init_seed(127 downto 96);
|
reg_state_s3 <= init_seed(127 downto 96);
|
||||||
reg_valid <= '0';
|
reg_valid <= '0';
|
||||||
|
reg_nvalid <= '0';
|
||||||
reg_output <= (others => '0');
|
reg_output <= (others => '0');
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,12 @@ begin
|
||||||
wait until falling_edge(clk);
|
wait until falling_edge(clk);
|
||||||
s_ready <= '1';
|
s_ready <= '1';
|
||||||
|
|
||||||
|
-- Optionally wait an additional pipeline cycle.
|
||||||
|
if s_valid = '0' then
|
||||||
|
report "Detected pipeline delay";
|
||||||
|
wait until falling_edge(clk);
|
||||||
|
end if;
|
||||||
|
|
||||||
-- Produce numbers
|
-- Produce numbers
|
||||||
for i in 0 to 999 loop
|
for i in 0 to 999 loop
|
||||||
|
|
||||||
|
@ -130,6 +136,12 @@ begin
|
||||||
wait until falling_edge(clk);
|
wait until falling_edge(clk);
|
||||||
s_ready <= '1';
|
s_ready <= '1';
|
||||||
|
|
||||||
|
-- Optionally wait an additional pipeline cycle.
|
||||||
|
if s_valid = '0' then
|
||||||
|
report "Detected pipeline delay";
|
||||||
|
wait until falling_edge(clk);
|
||||||
|
end if;
|
||||||
|
|
||||||
-- Produce numbers
|
-- Produce numbers
|
||||||
for i in 0 to 999 loop
|
for i in 0 to 999 loop
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue