Refactor MT19937 RNG:

* Change port interface towards valid/ready stream concept.
 * Build 3-stage pipeline into initialization to hopefully improve timing.
 * TODO : carefully test if initialization is correct in all scenarios
 * TODO : synthesis run to see if timing is now reasonable
This commit is contained in:
Joris van Rantwijk 2016-10-21 22:39:51 +02:00
parent 404c307f62
commit c2142a0c09
3 changed files with 213 additions and 113 deletions

View File

@ -4,7 +4,7 @@
-- Author: Joris van Rantwijk <joris@jorisvr.nl> -- Author: Joris van Rantwijk <joris@jorisvr.nl>
-- --
-- This is a 32-bit random number generator in synthesizable VHDL. -- This is a 32-bit random number generator in synthesizable VHDL.
-- The generator produces 32 new random bits on every (enabled) clock cycle. -- The generator can produce 32 new random bits on every clock cycle.
-- --
-- See also M. Matsumoto, T. Nishimura, "Mersenne Twister: -- See also M. Matsumoto, T. Nishimura, "Mersenne Twister:
-- a 623-dimensionally equidistributed uniform pseudorandom number generator", -- a 623-dimensionally equidistributed uniform pseudorandom number generator",
@ -15,6 +15,7 @@
-- to initialize the generator at reset. The generator also supports -- to initialize the generator at reset. The generator also supports
-- re-seeded at run time. -- re-seeded at run time.
-- --
-- TODO : rewrite this thing about initialization
-- After reset, and after re-seeding, the generator needs 625 clock -- After reset, and after re-seeding, the generator needs 625 clock
-- cycles to initialize its internal state. During this time, the generator -- cycles to initialize its internal state. During this time, the generator
-- is unable to provide correct output. -- is unable to provide correct output.
@ -22,6 +23,17 @@
-- NOTE: This is not a cryptographic random number generator. -- NOTE: This is not a cryptographic random number generator.
-- --
--
-- Copyright (C) 2016 Joris van Rantwijk
--
-- This code is free software; you can redistribute it and/or
-- modify it under the terms of the GNU Lesser General Public
-- License as published by the Free Software Foundation; either
-- version 2.1 of the License, or (at your option) any later version.
--
-- See <https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html>
--
-- TODO : Multiplication in reseeding severely limits the maximum frequency -- TODO : Multiplication in reseeding severely limits the maximum frequency
-- for this design. -- for this design.
-- Add pipelining and increase the number of clock cycles for reseeding. -- Add pipelining and increase the number of clock cycles for reseeding.
@ -35,7 +47,12 @@ entity rng_mt19937 is
generic ( generic (
-- Default seed value. -- Default seed value.
init_seed: std_logic_vector(31 downto 0) ); init_seed: std_logic_vector(31 downto 0);
-- Set to TRUE to force implementation of the constant multiplier
-- as a fixed adder tree; set to FALSE to allow the synthesizer
-- to choose an implementation.
force_const_mul: boolean );
port ( port (
@ -45,21 +62,25 @@ entity rng_mt19937 is
-- Synchronous reset, active high. -- Synchronous reset, active high.
rst: in std_logic; rst: in std_logic;
-- High to generate new output value.
enable: in std_logic;
-- High to re-seed the generator (works regardless of enable signal). -- High to re-seed the generator (works regardless of enable signal).
reseed: in std_logic; reseed: in std_logic;
-- New seed value (must be valid when reseed = '1'). -- New seed value (must be valid when reseed = '1').
newseed: in std_logic_vector(31 downto 0); newseed: in std_logic_vector(31 downto 0);
-- Output value. -- High when the user accepts the current random data word
-- A new value appears on every rising clock edge where enable = '1'. -- and requests new random data for the next clock cycle.
output: out std_logic_vector(31 downto 0); out_ready: in std_logic;
-- High while re-seeding (normal function not available). -- High when valid random data is available on the output.
busy: out std_logic ); -- This signal is low during the first clock cycle after reset and
-- after re-seeding, and high in all other cases.
out_valid: out std_logic;
-- Random output data (valid when out_valid = '1').
-- A new random word appears after every rising clock edge
-- where out_ready = '1'.
out_data: out std_logic_vector(31 downto 0) );
end entity; end entity;
@ -85,31 +106,27 @@ architecture rng_mt19937_arch of rng_mt19937 is
-- Internal registers. -- Internal registers.
signal reg_enable: std_logic; signal reg_enable: std_logic;
signal reg_reseeding1: std_logic; signal reg_reseeding: std_logic;
signal reg_reseeding2: std_logic; signal reg_reseedstate: std_logic_vector(2 downto 0);
signal reg_a_wdata_p: std_logic_vector(31 downto 0); signal reg_validwait: std_logic;
signal reg_a_rdata_p: std_logic_vector(31 downto 0); signal reg_a_rdata_p: std_logic_vector(31 downto 0);
signal reg_reseed_cnt: std_logic_vector(9 downto 0); signal reg_reseed_cnt: std_logic_vector(9 downto 0);
signal reg_output_buf: std_logic_vector(31 downto 0);
signal reg_seed_a: std_logic_vector(31 downto 0);
signal reg_seed_b: std_logic_vector(31 downto 0);
-- Output register. -- Output register.
signal reg_valid: std_logic;
signal reg_output: std_logic_vector(31 downto 0) := (others => '0'); signal reg_output: std_logic_vector(31 downto 0) := (others => '0');
signal reg_busy: std_logic;
-- -- Multiply unsigned number with constant and discard overflowing bits.
-- function mulconst(x: unsigned)
-- return unsigned
-- is
-- variable t: unsigned(2*x'length-1 downto 0);
-- begin
-- t := x * const_f;
-- return t(x'length-1 downto 0);
-- end function;
-- Multiply unsigned number with constant and discard overflowing bits. -- Multiply unsigned number with constant and discard overflowing bits.
function mulconst(x: unsigned) function mulconst(x: unsigned)
return unsigned return unsigned
is is
variable t: unsigned(2*x'length-1 downto 0);
begin begin
if force_const_mul then
-- Force multiplication via repeated shifts and adds.
return x return x
+ shift_left(x, 2) + shift_left(x, 2)
+ shift_left(x, 5) + shift_left(x, 5)
@ -121,15 +138,24 @@ architecture rng_mt19937_arch of rng_mt19937 is
- shift_left(x, 26) - shift_left(x, 26)
- shift_left(x, 28) - shift_left(x, 28)
+ shift_left(x, 31); + shift_left(x, 31);
else
-- Let synthesizer choose a multiplier implementation.
t := x * const_f;
return t(x'length-1 downto 0);
end if;
end function; end function;
begin begin
--
-- Drive output signal. -- Drive output signal.
output <= reg_output; --
busy <= reg_busy; out_valid <= reg_valid;
out_data <= reg_output;
--
-- Main synchronous process. -- Main synchronous process.
--
process (clk) is process (clk) is
variable y: std_logic_vector(31 downto 0); variable y: std_logic_vector(31 downto 0);
begin begin
@ -152,37 +178,50 @@ begin
end if; end if;
-- Keep previous values of registers. -- Keep previous value from read port A.
if reg_enable = '1' then if reg_enable = '1' then
reg_a_rdata_p <= reg_a_rdata; reg_a_rdata_p <= reg_a_rdata;
reg_a_wdata_p <= reg_a_wdata;
end if; end if;
-- Update reseeding state (3 cycles per address step).
reg_reseedstate(2 downto 1) <= reg_reseedstate(1 downto 0);
reg_reseedstate(0) <= reg_reseedstate(2) and reg_reseeding;
-- Update reseeding counter. -- Update reseeding counter.
reg_reseed_cnt <= std_logic_vector(unsigned(reg_reseed_cnt) + 1); if reg_enable = '1' then
reg_reseed_cnt <=
std_logic_vector(unsigned(reg_reseed_cnt) + 1);
end if;
-- Determine end of reseeding. -- Determine end of reseeding.
reg_busy <= reg_reseeding2; if unsigned(reg_reseed_cnt) = 624 then
reg_reseeding2 <= reg_reseeding1; reg_reseeding <= '0';
if unsigned(reg_reseed_cnt) = 623 then
reg_reseeding1 <= '0';
end if; end if;
-- Enable state machine on next cycle -- Enable state machine on next cycle
-- a) during initialization, and -- a) during initialization, and
-- b) on-demand for new output. -- b) on-demand for new output.
reg_enable <= reg_reseeding2 or enable; reg_enable <= reg_reseedstate(1) or
(not reg_reseeding and
(out_ready or not reg_valid));
-- Reseed state 1: XOR and shift previous state element.
if reg_reseedstate(0) = '1' then
y := reg_a_wdata;
y(1 downto 0) := y(1 downto 0) xor y(31 downto 30);
reg_seed_a <= y;
end if;
-- Reseed state 2: Multiply by constant.
reg_seed_b <= std_logic_vector(mulconst(unsigned(reg_seed_a)));
-- Update internal RNG state. -- Update internal RNG state.
if reg_enable = '1' then if reg_enable = '1' then
if reg_reseeding1 = '1' then if reg_reseeding = '1' then
-- Continue re-seeding loop. -- Reseed state 3: Write next state element.
y := reg_a_wdata; reg_a_wdata <= std_logic_vector(unsigned(reg_seed_b) +
y(1 downto 0) := y(1 downto 0) xor y(31 downto 30);
reg_a_wdata <= std_logic_vector(
mulconst(unsigned(y)) +
unsigned(reg_reseed_cnt)); unsigned(reg_reseed_cnt));
else else
@ -200,20 +239,15 @@ begin
y := "0" & y(31 downto 1); y := "0" & y(31 downto 1);
end if; end if;
reg_a_wdata_p <= reg_a_wdata;
reg_a_wdata <= reg_b_rdata xor y; reg_a_wdata <= reg_b_rdata xor y;
end if; end if;
end if; end if;
-- Produce output value (when enabled). -- Prepare output value.
if enable = '1' then
if reg_enable = '1' then if reg_enable = '1' then
y := reg_a_wdata; y := reg_a_wdata;
else
y := reg_a_wdata_p;
end if;
y(20 downto 0) := y(20 downto 0) xor y(31 downto 11); y(20 downto 0) := y(20 downto 0) xor y(31 downto 11);
y(31 downto 7) := y(31 downto 7) xor y(31 downto 7) := y(31 downto 7) xor
@ -222,37 +256,55 @@ begin
(y(16 downto 0) and const_c(31 downto 15)); (y(16 downto 0) and const_c(31 downto 15));
y(13 downto 0) := y(13 downto 0) xor y(31 downto 18); y(13 downto 0) := y(13 downto 0) xor y(31 downto 18);
reg_output <= y; reg_output_buf <= y;
-- Conditionally push to final output register.
if out_ready = '1' or reg_valid = '0' then
reg_output <= y;
end if;
end if;
-- Use buffered value when restarting after pause.
if out_ready = '1' and reg_enable = '0' then
reg_output <= reg_output_buf;
end if;
-- Indicate valid data at end of initialization.
if reg_enable = '1' then
reg_validwait <= not reg_reseeding;
reg_valid <= reg_validwait and not reg_reseeding;
end if; end if;
-- Start re-seeding. -- Start re-seeding.
if reseed = '1' then if reseed = '1' then
reg_reseeding1 <= '1'; reg_reseeding <= '1';
reg_reseeding2 <= '1'; reg_reseedstate <= "001";
reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10)); reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10));
reg_enable <= '1'; reg_enable <= '0';
reg_a_wdata <= newseed; reg_a_wdata <= newseed;
reg_busy <= '1'; reg_valid <= '0';
end if; end if;
-- Synchronous reset. -- Synchronous reset.
if rst = '1' then if rst = '1' then
reg_a_addr <= std_logic_vector(to_unsigned(0, 10)); reg_a_addr <= std_logic_vector(to_unsigned(0, 10));
reg_b_addr <= std_logic_vector(to_unsigned(396, 10)); reg_b_addr <= std_logic_vector(to_unsigned(396, 10));
reg_reseeding1 <= '1'; reg_reseeding <= '1';
reg_reseeding2 <= '1'; reg_reseedstate <= "001";
reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10)); reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10));
reg_enable <= '1'; reg_enable <= '0';
reg_a_wdata <= init_seed; reg_a_wdata <= init_seed;
reg_valid <= '0';
reg_output <= (others => '0'); reg_output <= (others => '0');
reg_busy <= '1';
end if; end if;
end if; end if;
end process; end process;
--
-- Synchronous process for block RAM. -- Synchronous process for block RAM.
--
process (clk) is process (clk) is
begin begin
if rising_edge(clk) then if rising_edge(clk) then

View File

@ -6,12 +6,16 @@ GHDL = ghdl
GHDLFLAGS = GHDLFLAGS =
.PHONY: all .PHONY: all
all: tb_xoroshiro128plus all: tb_xoroshiro128plus tb_mt19937
tb_xoroshiro128plus: tb_xoroshiro128plus.o rng_xoroshiro128plus.o tb_xoroshiro128plus: tb_xoroshiro128plus.o rng_xoroshiro128plus.o
tb_xoroshiro128plus.o: tb_xoroshiro128plus.vhdl rng_xoroshiro128plus.o tb_xoroshiro128plus.o: tb_xoroshiro128plus.vhdl rng_xoroshiro128plus.o
rng_xoroshiro128plus.o: ../rtl/rng_xoroshiro128plus.vhdl rng_xoroshiro128plus.o: ../rtl/rng_xoroshiro128plus.vhdl
tb_mt19937: tb_mt19937.o rng_mt19937.o
tb_mt19937.o: tb_mt19937.vhdl rng_mt19937.o
rng_mt19937.o: ../rtl/rng_mt19937.vhdl
tb_%: tb_%.o tb_%: tb_%.o
$(GHDL) $(GHDLFLAGS) -e $@ $(GHDL) $(GHDLFLAGS) -e $@

View File

@ -2,6 +2,8 @@
-- Test bench for PRNG MT19937. -- Test bench for PRNG MT19937.
-- --
use std.textio.all;
library ieee; library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;
@ -15,11 +17,11 @@ architecture arch of tb_mt19937 is
signal clock_active: boolean := false; signal clock_active: boolean := false;
signal s_rst: std_logic; signal s_rst: std_logic;
signal s_enable: std_logic;
signal s_reseed: std_logic; signal s_reseed: std_logic;
signal s_newseed: std_logic_vector(31 downto 0); signal s_newseed: std_logic_vector(31 downto 0);
signal s_output: std_logic_vector(31 downto 0); signal s_ready: std_logic;
signal s_busy: std_logic; signal s_valid: std_logic;
signal s_data: std_logic_vector(31 downto 0);
function to_hex_string(s: std_logic_vector) function to_hex_string(s: std_logic_vector)
return string return string
@ -38,30 +40,36 @@ begin
-- Instantiate PRNG. -- Instantiate PRNG.
inst_prng: entity work.rng_mt19937 inst_prng: entity work.rng_mt19937
generic map ( generic map (
init_seed => x"31415926" ) init_seed => x"31415926",
force_const_mul => false )
port map ( port map (
clk => clk, clk => clk,
rst => s_rst, rst => s_rst,
enable => s_enable,
reseed => s_reseed, reseed => s_reseed,
newseed => s_newseed, newseed => s_newseed,
output => s_output, out_ready => s_ready,
busy => s_busy ); out_valid => s_valid,
out_data => s_data );
-- Generate clock. -- Generate clock.
clk <= (not clk) after 10 ns when clock_active else '0'; clk <= (not clk) after 10 ns when clock_active else '0';
-- Main simulation process. -- Main simulation process.
process is process is
file outf1: text is out "sim_mt19937_seed1.dat";
file outf2: text is out "sim_mt19937_seed2.dat";
variable lin: line;
variable nskip: integer;
variable v: std_logic_vector(31 downto 0);
begin begin
report "Start test bench"; report "Start test bench";
-- Reset. -- Reset.
s_rst <= '1'; s_rst <= '1';
s_enable <= '0';
s_reseed <= '0'; s_reseed <= '0';
s_newseed <= (others => '0'); s_newseed <= (others => '0');
s_ready <= '0';
-- Start clock. -- Start clock.
clock_active <= true; clock_active <= true;
@ -71,62 +79,98 @@ begin
wait until falling_edge(clk); wait until falling_edge(clk);
s_rst <= '0'; s_rst <= '0';
-- Check that generator is initializing.
assert s_busy = '1' report "Generator fails to indicate BUSY";
wait until falling_edge(clk);
assert s_busy = '1' report "Generator fails to indicate BUSY";
-- Give generator time to complete initialization. -- Give generator time to complete initialization.
for i in 0 to 623 loop for i in 0 to 3*624 loop
assert s_valid = '0' report "Generator indicates VALID too early";
wait until falling_edge(clk); wait until falling_edge(clk);
end loop; end loop;
assert s_busy = '0' report "Generator should be ready but still indicates BUSY";
-- Start producing numbers.
s_ready <= '1';
-- Produce numbers -- Produce numbers
for i in 0 to 1500 loop for i in 0 to 999 loop
if i mod 5 = 0 or i mod 7 = 0 then -- Check that output is valid.
s_enable <= '0'; assert s_valid = '1' report "Output not VALID";
wait until falling_edge(clk);
else -- Write output to file.
s_enable <= '1'; write(lin, "0x" & to_hex_string(s_data));
wait until falling_edge(clk); writeline(outf1, lin);
report "Got 0x" & to_hex_string(s_output);
-- Sometimes skip cycles.
if i mod 5 = 1 then
nskip := 1;
if i mod 3 = 0 then
nskip := nskip + 1;
end if; end if;
if i mod 11 = 0 then
nskip := nskip + 1;
end if;
v := s_data;
s_ready <= '0';
for t in 1 to nskip loop
wait until falling_edge(clk);
assert s_valid = '1' report "Output not valid";
assert s_data = v report "Output changed while not ready";
end loop;
s_ready <= '1';
end if;
-- Go to next cycle.
wait until falling_edge(clk);
end loop; end loop;
-- Re-seed generator. -- Re-seed generator.
report "Re-seed generator"; report "Re-seed generator";
s_enable <= '0';
s_reseed <= '1'; s_reseed <= '1';
s_newseed <= x"fedcba98"; s_newseed <= x"fedcba98";
s_ready <= '0';
wait until falling_edge(clk); wait until falling_edge(clk);
s_reseed <= '0'; s_reseed <= '0';
s_newseed <= (others => '0'); s_newseed <= (others => '0');
-- Check that generator is initializing.
assert s_busy = '1' report "Generator fails to indicate BUSY";
wait until falling_edge(clk);
assert s_busy = '1' report "Generator fails to indicate BUSY";
-- Give generator time to complete initialization. -- Give generator time to complete initialization.
for i in 0 to 623 loop for i in 0 to 3*624 loop
assert s_valid = '0' report "Generator indicates VALID too early";
wait until falling_edge(clk); wait until falling_edge(clk);
s_ready <= '1';
end loop; end loop;
assert s_busy = '0' report "Generator should be ready but still indicates BUSY";
-- Produce numbers -- Produce numbers
for i in 0 to 1500 loop for i in 0 to 999 loop
if i mod 5 = 1 or i mod 7 = 1 then -- Check that output is valid.
s_enable <= '0'; assert s_valid = '1' report "Output not VALID";
wait until falling_edge(clk);
else -- Write output to file.
s_enable <= '1'; write(lin, "0x" & to_hex_string(s_data));
wait until falling_edge(clk); writeline(outf2, lin);
report "Got 0x" & to_hex_string(s_output);
-- Sometimes skip cycles.
if i mod 5 = 2 then
nskip := 1;
if i mod 3 = 0 then
nskip := nskip + 1;
end if; end if;
if i mod 11 = 0 then
nskip := nskip + 1;
end if;
v := s_data;
s_ready <= '0';
for t in 1 to nskip loop
wait until falling_edge(clk);
assert s_valid = '1' report "Output not valid";
assert s_data = v report "Output changed while not ready";
end loop;
s_ready <= '1';
end if;
-- Go to next cycle.
wait until falling_edge(clk);
end loop; end loop;