Refactor MT19937 RNG:

* Change port interface towards valid/ready stream concept.
 * Build 3-stage pipeline into initialization to hopefully improve timing.
 * TODO : carefully test if initialization is correct in all scenarios
 * TODO : synthesis run to see if timing is now reasonable
This commit is contained in:
Joris van Rantwijk 2016-10-21 22:39:51 +02:00
parent 404c307f62
commit c2142a0c09
3 changed files with 213 additions and 113 deletions

View File

@ -4,7 +4,7 @@
-- Author: Joris van Rantwijk <joris@jorisvr.nl>
--
-- This is a 32-bit random number generator in synthesizable VHDL.
-- The generator produces 32 new random bits on every (enabled) clock cycle.
-- The generator can produce 32 new random bits on every clock cycle.
--
-- See also M. Matsumoto, T. Nishimura, "Mersenne Twister:
-- a 623-dimensionally equidistributed uniform pseudorandom number generator",
@ -15,6 +15,7 @@
-- to initialize the generator at reset. The generator also supports
-- re-seeded at run time.
--
-- TODO : rewrite this thing about initialization
-- After reset, and after re-seeding, the generator needs 625 clock
-- cycles to initialize its internal state. During this time, the generator
-- is unable to provide correct output.
@ -22,6 +23,17 @@
-- NOTE: This is not a cryptographic random number generator.
--
--
-- Copyright (C) 2016 Joris van Rantwijk
--
-- This code is free software; you can redistribute it and/or
-- modify it under the terms of the GNU Lesser General Public
-- License as published by the Free Software Foundation; either
-- version 2.1 of the License, or (at your option) any later version.
--
-- See <https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html>
--
-- TODO : Multiplication in reseeding severely limits the maximum frequency
-- for this design.
-- Add pipelining and increase the number of clock cycles for reseeding.
@ -35,7 +47,12 @@ entity rng_mt19937 is
generic (
-- Default seed value.
init_seed: std_logic_vector(31 downto 0) );
init_seed: std_logic_vector(31 downto 0);
-- Set to TRUE to force implementation of the constant multiplier
-- as a fixed adder tree; set to FALSE to allow the synthesizer
-- to choose an implementation.
force_const_mul: boolean );
port (
@ -45,21 +62,25 @@ entity rng_mt19937 is
-- Synchronous reset, active high.
rst: in std_logic;
-- High to generate new output value.
enable: in std_logic;
-- High to re-seed the generator (works regardless of enable signal).
reseed: in std_logic;
-- New seed value (must be valid when reseed = '1').
newseed: in std_logic_vector(31 downto 0);
-- Output value.
-- A new value appears on every rising clock edge where enable = '1'.
output: out std_logic_vector(31 downto 0);
-- High when the user accepts the current random data word
-- and requests new random data for the next clock cycle.
out_ready: in std_logic;
-- High while re-seeding (normal function not available).
busy: out std_logic );
-- High when valid random data is available on the output.
-- This signal is low during the first clock cycle after reset and
-- after re-seeding, and high in all other cases.
out_valid: out std_logic;
-- Random output data (valid when out_valid = '1').
-- A new random word appears after every rising clock edge
-- where out_ready = '1'.
out_data: out std_logic_vector(31 downto 0) );
end entity;
@ -85,31 +106,27 @@ architecture rng_mt19937_arch of rng_mt19937 is
-- Internal registers.
signal reg_enable: std_logic;
signal reg_reseeding1: std_logic;
signal reg_reseeding2: std_logic;
signal reg_a_wdata_p: std_logic_vector(31 downto 0);
signal reg_reseeding: std_logic;
signal reg_reseedstate: std_logic_vector(2 downto 0);
signal reg_validwait: std_logic;
signal reg_a_rdata_p: std_logic_vector(31 downto 0);
signal reg_reseed_cnt: std_logic_vector(9 downto 0);
signal reg_output_buf: std_logic_vector(31 downto 0);
signal reg_seed_a: std_logic_vector(31 downto 0);
signal reg_seed_b: std_logic_vector(31 downto 0);
-- Output register.
signal reg_valid: std_logic;
signal reg_output: std_logic_vector(31 downto 0) := (others => '0');
signal reg_busy: std_logic;
-- -- Multiply unsigned number with constant and discard overflowing bits.
-- function mulconst(x: unsigned)
-- return unsigned
-- is
-- variable t: unsigned(2*x'length-1 downto 0);
-- begin
-- t := x * const_f;
-- return t(x'length-1 downto 0);
-- end function;
-- Multiply unsigned number with constant and discard overflowing bits.
function mulconst(x: unsigned)
return unsigned
is
variable t: unsigned(2*x'length-1 downto 0);
begin
if force_const_mul then
-- Force multiplication via repeated shifts and adds.
return x
+ shift_left(x, 2)
+ shift_left(x, 5)
@ -121,15 +138,24 @@ architecture rng_mt19937_arch of rng_mt19937 is
- shift_left(x, 26)
- shift_left(x, 28)
+ shift_left(x, 31);
else
-- Let synthesizer choose a multiplier implementation.
t := x * const_f;
return t(x'length-1 downto 0);
end if;
end function;
begin
--
-- Drive output signal.
output <= reg_output;
busy <= reg_busy;
--
out_valid <= reg_valid;
out_data <= reg_output;
--
-- Main synchronous process.
--
process (clk) is
variable y: std_logic_vector(31 downto 0);
begin
@ -152,38 +178,51 @@ begin
end if;
-- Keep previous values of registers.
-- Keep previous value from read port A.
if reg_enable = '1' then
reg_a_rdata_p <= reg_a_rdata;
reg_a_wdata_p <= reg_a_wdata;
end if;
-- Update reseeding state (3 cycles per address step).
reg_reseedstate(2 downto 1) <= reg_reseedstate(1 downto 0);
reg_reseedstate(0) <= reg_reseedstate(2) and reg_reseeding;
-- Update reseeding counter.
reg_reseed_cnt <= std_logic_vector(unsigned(reg_reseed_cnt) + 1);
if reg_enable = '1' then
reg_reseed_cnt <=
std_logic_vector(unsigned(reg_reseed_cnt) + 1);
end if;
-- Determine end of reseeding.
reg_busy <= reg_reseeding2;
reg_reseeding2 <= reg_reseeding1;
if unsigned(reg_reseed_cnt) = 623 then
reg_reseeding1 <= '0';
if unsigned(reg_reseed_cnt) = 624 then
reg_reseeding <= '0';
end if;
-- Enable state machine on next cycle
-- a) during initialization, and
-- b) on-demand for new output.
reg_enable <= reg_reseeding2 or enable;
reg_enable <= reg_reseedstate(1) or
(not reg_reseeding and
(out_ready or not reg_valid));
-- Reseed state 1: XOR and shift previous state element.
if reg_reseedstate(0) = '1' then
y := reg_a_wdata;
y(1 downto 0) := y(1 downto 0) xor y(31 downto 30);
reg_seed_a <= y;
end if;
-- Reseed state 2: Multiply by constant.
reg_seed_b <= std_logic_vector(mulconst(unsigned(reg_seed_a)));
-- Update internal RNG state.
if reg_enable = '1' then
if reg_reseeding1 = '1' then
if reg_reseeding = '1' then
-- Continue re-seeding loop.
y := reg_a_wdata;
y(1 downto 0) := y(1 downto 0) xor y(31 downto 30);
reg_a_wdata <= std_logic_vector(
mulconst(unsigned(y)) +
unsigned(reg_reseed_cnt) );
-- Reseed state 3: Write next state element.
reg_a_wdata <= std_logic_vector(unsigned(reg_seed_b) +
unsigned(reg_reseed_cnt));
else
@ -200,20 +239,15 @@ begin
y := "0" & y(31 downto 1);
end if;
reg_a_wdata_p <= reg_a_wdata;
reg_a_wdata <= reg_b_rdata xor y;
end if;
end if;
-- Produce output value (when enabled).
if enable = '1' then
-- Prepare output value.
if reg_enable = '1' then
y := reg_a_wdata;
else
y := reg_a_wdata_p;
end if;
y(20 downto 0) := y(20 downto 0) xor y(31 downto 11);
y(31 downto 7) := y(31 downto 7) xor
@ -222,37 +256,55 @@ begin
(y(16 downto 0) and const_c(31 downto 15));
y(13 downto 0) := y(13 downto 0) xor y(31 downto 18);
reg_output <= y;
reg_output_buf <= y;
-- Conditionally push to final output register.
if out_ready = '1' or reg_valid = '0' then
reg_output <= y;
end if;
end if;
-- Use buffered value when restarting after pause.
if out_ready = '1' and reg_enable = '0' then
reg_output <= reg_output_buf;
end if;
-- Indicate valid data at end of initialization.
if reg_enable = '1' then
reg_validwait <= not reg_reseeding;
reg_valid <= reg_validwait and not reg_reseeding;
end if;
-- Start re-seeding.
if reseed = '1' then
reg_reseeding1 <= '1';
reg_reseeding2 <= '1';
reg_reseeding <= '1';
reg_reseedstate <= "001";
reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10));
reg_enable <= '1';
reg_enable <= '0';
reg_a_wdata <= newseed;
reg_busy <= '1';
reg_valid <= '0';
end if;
-- Synchronous reset.
if rst = '1' then
reg_a_addr <= std_logic_vector(to_unsigned(0, 10));
reg_b_addr <= std_logic_vector(to_unsigned(396, 10));
reg_reseeding1 <= '1';
reg_reseeding2 <= '1';
reg_reseeding <= '1';
reg_reseedstate <= "001";
reg_reseed_cnt <= std_logic_vector(to_unsigned(1, 10));
reg_enable <= '1';
reg_enable <= '0';
reg_a_wdata <= init_seed;
reg_valid <= '0';
reg_output <= (others => '0');
reg_busy <= '1';
end if;
end if;
end process;
--
-- Synchronous process for block RAM.
--
process (clk) is
begin
if rising_edge(clk) then

View File

@ -6,12 +6,16 @@ GHDL = ghdl
GHDLFLAGS =
.PHONY: all
all: tb_xoroshiro128plus
all: tb_xoroshiro128plus tb_mt19937
tb_xoroshiro128plus: tb_xoroshiro128plus.o rng_xoroshiro128plus.o
tb_xoroshiro128plus.o: tb_xoroshiro128plus.vhdl rng_xoroshiro128plus.o
rng_xoroshiro128plus.o: ../rtl/rng_xoroshiro128plus.vhdl
tb_mt19937: tb_mt19937.o rng_mt19937.o
tb_mt19937.o: tb_mt19937.vhdl rng_mt19937.o
rng_mt19937.o: ../rtl/rng_mt19937.vhdl
tb_%: tb_%.o
$(GHDL) $(GHDLFLAGS) -e $@

View File

@ -2,6 +2,8 @@
-- Test bench for PRNG MT19937.
--
use std.textio.all;
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
@ -15,11 +17,11 @@ architecture arch of tb_mt19937 is
signal clock_active: boolean := false;
signal s_rst: std_logic;
signal s_enable: std_logic;
signal s_reseed: std_logic;
signal s_newseed: std_logic_vector(31 downto 0);
signal s_output: std_logic_vector(31 downto 0);
signal s_busy: std_logic;
signal s_ready: std_logic;
signal s_valid: std_logic;
signal s_data: std_logic_vector(31 downto 0);
function to_hex_string(s: std_logic_vector)
return string
@ -38,30 +40,36 @@ begin
-- Instantiate PRNG.
inst_prng: entity work.rng_mt19937
generic map (
init_seed => x"31415926" )
init_seed => x"31415926",
force_const_mul => false )
port map (
clk => clk,
rst => s_rst,
enable => s_enable,
reseed => s_reseed,
newseed => s_newseed,
output => s_output,
busy => s_busy );
out_ready => s_ready,
out_valid => s_valid,
out_data => s_data );
-- Generate clock.
clk <= (not clk) after 10 ns when clock_active else '0';
-- Main simulation process.
process is
file outf1: text is out "sim_mt19937_seed1.dat";
file outf2: text is out "sim_mt19937_seed2.dat";
variable lin: line;
variable nskip: integer;
variable v: std_logic_vector(31 downto 0);
begin
report "Start test bench";
-- Reset.
s_rst <= '1';
s_enable <= '0';
s_reseed <= '0';
s_newseed <= (others => '0');
s_ready <= '0';
-- Start clock.
clock_active <= true;
@ -71,62 +79,98 @@ begin
wait until falling_edge(clk);
s_rst <= '0';
-- Check that generator is initializing.
assert s_busy = '1' report "Generator fails to indicate BUSY";
wait until falling_edge(clk);
assert s_busy = '1' report "Generator fails to indicate BUSY";
-- Give generator time to complete initialization.
for i in 0 to 623 loop
for i in 0 to 3*624 loop
assert s_valid = '0' report "Generator indicates VALID too early";
wait until falling_edge(clk);
end loop;
assert s_busy = '0' report "Generator should be ready but still indicates BUSY";
-- Start producing numbers.
s_ready <= '1';
-- Produce numbers
for i in 0 to 1500 loop
for i in 0 to 999 loop
if i mod 5 = 0 or i mod 7 = 0 then
s_enable <= '0';
wait until falling_edge(clk);
else
s_enable <= '1';
wait until falling_edge(clk);
report "Got 0x" & to_hex_string(s_output);
-- Check that output is valid.
assert s_valid = '1' report "Output not VALID";
-- Write output to file.
write(lin, "0x" & to_hex_string(s_data));
writeline(outf1, lin);
-- Sometimes skip cycles.
if i mod 5 = 1 then
nskip := 1;
if i mod 3 = 0 then
nskip := nskip + 1;
end if;
if i mod 11 = 0 then
nskip := nskip + 1;
end if;
v := s_data;
s_ready <= '0';
for t in 1 to nskip loop
wait until falling_edge(clk);
assert s_valid = '1' report "Output not valid";
assert s_data = v report "Output changed while not ready";
end loop;
s_ready <= '1';
end if;
-- Go to next cycle.
wait until falling_edge(clk);
end loop;
-- Re-seed generator.
report "Re-seed generator";
s_enable <= '0';
s_reseed <= '1';
s_newseed <= x"fedcba98";
s_ready <= '0';
wait until falling_edge(clk);
s_reseed <= '0';
s_newseed <= (others => '0');
-- Check that generator is initializing.
assert s_busy = '1' report "Generator fails to indicate BUSY";
wait until falling_edge(clk);
assert s_busy = '1' report "Generator fails to indicate BUSY";
-- Give generator time to complete initialization.
for i in 0 to 623 loop
for i in 0 to 3*624 loop
assert s_valid = '0' report "Generator indicates VALID too early";
wait until falling_edge(clk);
s_ready <= '1';
end loop;
assert s_busy = '0' report "Generator should be ready but still indicates BUSY";
-- Produce numbers
for i in 0 to 1500 loop
for i in 0 to 999 loop
if i mod 5 = 1 or i mod 7 = 1 then
s_enable <= '0';
wait until falling_edge(clk);
else
s_enable <= '1';
wait until falling_edge(clk);
report "Got 0x" & to_hex_string(s_output);
-- Check that output is valid.
assert s_valid = '1' report "Output not VALID";
-- Write output to file.
write(lin, "0x" & to_hex_string(s_data));
writeline(outf2, lin);
-- Sometimes skip cycles.
if i mod 5 = 2 then
nskip := 1;
if i mod 3 = 0 then
nskip := nskip + 1;
end if;
if i mod 11 = 0 then
nskip := nskip + 1;
end if;
v := s_data;
s_ready <= '0';
for t in 1 to nskip loop
wait until falling_edge(clk);
assert s_valid = '1' report "Output not valid";
assert s_data = v report "Output changed while not ready";
end loop;
s_ready <= '1';
end if;
-- Go to next cycle.
wait until falling_edge(clk);
end loop;