Digital Net Base 2 Generator¶
from qmcpy import *
from numpy import *
from matplotlib import pyplot
from time import time
import os
Basic usage¶
s = DigitalNetB2(5,seed=7)
s
DigitalNetB2 (AbstractLDDiscreteDistribution) d 5 replications 1 randomize LMS_DS gen_mats_source joe_kuo.6.21201.txt order NATURAL t 63 alpha 1 n_limit 2^(32) entropy 7
s.gen_samples(4) # generate Sobol' samples
array([[0.864483 , 0.31330935, 0.09580848, 0.24636182, 0.13239161], [0.03735175, 0.82581618, 0.71117322, 0.9042245 , 0.5285374 ], [0.65627575, 0.57484149, 0.94341128, 0.67738059, 0.38460276], [0.48731325, 0.06429227, 0.37126087, 0.45706178, 0.7690279 ]])
s.gen_samples(n_min=2,n_max=4) # generate from specific range. If range is not powers of 2, use graycode
array([[0.65627575, 0.57484149, 0.94341128, 0.67738059, 0.38460276], [0.48731325, 0.06429227, 0.37126087, 0.45706178, 0.7690279 ]])
t0 = time()
s.gen_samples(2**25)
print('Time: %.2f'%(time()-t0))
Time: 4.61
Randomize with digital shift / linear matrix scramble¶
s = DigitalNetB2(2,randomize='LMS_DS') # linear matrix scramble with digital shift (default)
s.gen_samples(2)
array([[0.5032812 , 0.3002277 ], [0.37437374, 0.86989486]])
s = DigitalNetB2(2,randomize='LMS') # just linear matrix scrambling
s.gen_samples(2, warn=False) # suppress warning that the first point is still the origin
array([[0. , 0. ], [0.95339709, 0.93679526]])
s = DigitalNetB2(2,randomize='DS') # just digital shift
s.gen_samples(2)
array([[0.72193614, 0.16766092], [0.22193614, 0.66766092]])
Support for graycode and natural ordering¶
s = DigitalNetB2(2,randomize=False,graycode=False)
s.gen_samples(n_min=4,n_max=8,warn=False) # don't warn about non-randomized samples including the origin
/Users/alegresor/Desktop/QMCSoftware/qmcpy/discrete_distribution/digital_net_b2/digital_net_b2.py:263: ParameterWarning: graycode argument deprecated, set order='GRAY' or order='NATURAL' instead. Using order='NATURAL' warnings.warn("graycode argument deprecated, set order='GRAY' or order='NATURAL' instead. Using order='%s'"%order,ParameterWarning)
array([[0.125, 0.625], [0.625, 0.125], [0.375, 0.375], [0.875, 0.875]])
s = DigitalNetB2(2,randomize=False,graycode=True)
s.gen_samples(n_min=4,n_max=8,warn=False)
/Users/alegresor/Desktop/QMCSoftware/qmcpy/discrete_distribution/digital_net_b2/digital_net_b2.py:263: ParameterWarning: graycode argument deprecated, set order='GRAY' or order='NATURAL' instead. Using order='GRAY' warnings.warn("graycode argument deprecated, set order='GRAY' or order='NATURAL' instead. Using order='%s'"%order,ParameterWarning)
array([[0.375, 0.375], [0.875, 0.875], [0.625, 0.125], [0.125, 0.625]])
Custom Dimensions¶
s = DigitalNetB2(3,randomize=False)
s.gen_samples(n_min=4,n_max=8)
array([[0.125, 0.625, 0.375], [0.625, 0.125, 0.875], [0.375, 0.375, 0.625], [0.875, 0.875, 0.125]])
s = DigitalNetB2([1,2],randomize=False) # use only the second and third dimensions in the sequence
s.gen_samples(n_min=4,n_max=8)
array([[0.625, 0.375], [0.125, 0.875], [0.375, 0.625], [0.875, 0.125]])
Elementary intervals¶
def plt_ei(x,ax,x_cuts,y_cuts):
for ix in arange(1,x_cuts,dtype=float): ax.axvline(x=ix/x_cuts,color='r',alpha=.5)
for iy in arange(1,y_cuts,dtype=float): ax.axhline(y=iy/y_cuts,color='r',alpha=.5)
ax.scatter(x[:,0],x[:,1],color='b',s=25)
ax.set_xlim([0,1])
ax.set_xticks([0,1])
ax.set_ylim([0,1])
ax.set_yticks([0,1])
ax.set_aspect(1)
# unrandomized
fig,ax = pyplot.subplots(ncols=3,nrows=2,figsize=(15,10))
s = DigitalNetB2(2,randomize=False)
plt_ei(s.gen_samples(2**2,warn=False),ax[0,0],2,2)
plt_ei(s.gen_samples(2**3,warn=False),ax[1,0],8,0)
plt_ei(s.gen_samples(2**4,warn=False),ax[0,1],4,4)
plt_ei(s.gen_samples(2**4,warn=False),ax[1,1],2,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[0,2],8,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[1,2],16,4)
fig.suptitle("Unrandomized Sobol' points");
fig,ax = pyplot.subplots(ncols=3,nrows=2,figsize=(15,10))
s = DigitalNetB2(2,randomize='LMS_DS')
plt_ei(s.gen_samples(2**2,warn=False),ax[0,0],2,2)
plt_ei(s.gen_samples(2**3,warn=False),ax[1,0],8,0)
plt_ei(s.gen_samples(2**4,warn=False),ax[0,1],4,4)
plt_ei(s.gen_samples(2**4,warn=False),ax[1,1],2,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[0,2],8,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[1,2],16,4)
fig.suptitle("Sobol' points with linear matrix scramble and digital shift");
fig,ax = pyplot.subplots(ncols=3,nrows=2,figsize=(15,10))
s = DigitalNetB2(2,order="GRAY",randomize='LMS')
plt_ei(s.gen_samples(2**2,warn=False),ax[0,0],2,2)
plt_ei(s.gen_samples(2**3,warn=False),ax[1,0],8,0)
plt_ei(s.gen_samples(2**4,warn=False),ax[0,1],4,4)
plt_ei(s.gen_samples(2**4,warn=False),ax[1,1],2,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[0,2],8,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[1,2],16,4)
fig.suptitle("Sobol' points with linear matrix scrambling shift");
fig,ax = pyplot.subplots(ncols=3,nrows=2,figsize=(15,10))
s = DigitalNetB2(2,order="GRAY",randomize='DS')
plt_ei(s.gen_samples(2**2,warn=False),ax[0,0],2,2)
plt_ei(s.gen_samples(2**3,warn=False),ax[1,0],8,0)
plt_ei(s.gen_samples(2**4,warn=False),ax[0,1],4,4)
plt_ei(s.gen_samples(2**4,warn=False),ax[1,1],2,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[0,2],8,8)
plt_ei(s.gen_samples(2**6,warn=False),ax[1,2],16,4)
fig.suptitle("Sobol' points with digital shift");
fig,ax = pyplot.subplots(figsize=(15,5))
s = DigitalNetB2([50,51],randomize='LMS_DS')
plt_ei(s.gen_samples(2**4),ax,4,4)
fig.suptitle("Sobol' points dimension 50 vs 51");
# nice properties do not necessary hold in higher dimensions
fig,ax = pyplot.subplots(figsize=(15,5))
s = DigitalNetB2(2,randomize='LMS_DS',graycode=True)
plt_ei(s.gen_samples(n_min=1,n_max=16),ax,4,4)
x16 = s.gen_samples(n_min=16,n_max=17)
ax.scatter(x16[:,0],x16[:,1],color='g',s=100)
fig.suptitle("Sobol' points 2-17");
# better to take points 1-16 instead of 2-16
/Users/alegresor/Desktop/QMCSoftware/qmcpy/discrete_distribution/digital_net_b2/digital_net_b2.py:263: ParameterWarning: graycode argument deprecated, set order='GRAY' or order='NATURAL' instead. Using order='GRAY' warnings.warn("graycode argument deprecated, set order='GRAY' or order='NATURAL' instead. Using order='%s'"%order,ParameterWarning)
Skipping points vs. randomization¶
The first point in a Sobol' sequence is $\vec{0}$. Therefore, some software packages skip this point as various transformations will map 0 to NAN. For example, the inverse CDF of a Gaussian density at $\vec{0}$ is $-\infty$. However, we argue that it is better to randomize points than to simply skip the first point for the following reasons:
- Skipping the first point does not give the uniformity advantages when taking powers of 2. For example, notice the green point in the above plot of "Sobol' points 2-17".
- Randomizing Sobol' points will return 0 with probability 0.
A more thorough explanation can be found in Art Owen's paper On dropping the first Sobol' point
So always randomize your Sobol' unless you specifically need unrandomized points. In QMCPy the Sobol' generator defaults to randomization with a linear matrix scramble.
The below code runs tests comparing unrandomized vs. randomization with linear matrix scramble with digital shift vs. randomization with just digital shift. Furthermore, we compare taking the first $2^n$ points vs. dropping the first point and taking $2^n$ points vs. dropping the first point and taking $2^n-1$ points.
The 2D keister function is used for testing purposes as it can be exactly integrated using Mathematica:
N[Integrate[E^(-x1^2 - x2^2) Cos[Sqrt[x1^2 + x2^2]], {x1, -Infinity, Infinity}, {x2, -Infinity, Infinity}]]
Plots the median (line) and fills the middle 10% of observations
def plt_k2d_sobol(ax,rtype,colors,plts=[1,2,3]):
trials = 100
ms = arange(10,18)
ax.set_xscale('log',base=2)
ax.set_yscale('log',base=10)
epsilons = {}
if 1 in plts:
epsilons['$2^m$ points with skipped $1^{st}$ point'] = zeros((trials,len(ms)),dtype=double)
if 2 in plts:
epsilons['$2^m-1$ points with skipped $1^{st}$ point'] = zeros((trials,len(ms)),dtype=double)
if 3 in plts:
epsilons['$2^n$ points no skipping'] = zeros((trials,len(ms)),dtype=double)
for i,m in enumerate(ms):
for t in range(trials):
s = DigitalNetB2(2,randomize=rtype,order="GRAY")
k = Keister(s)
solution = k.get_exact_value(d=2)
if 1 in plts:
epsilons['$2^m$ points with skipped $1^{st}$ point'][t,i] = \
abs(k.f(s.gen_samples(n_min=1,n_max=1+2**m)).mean()-solution)
if 2 in plts:
epsilons['$2^m-1$ points with skipped $1^{st}$ point'][t,i] = \
abs(k.f(s.gen_samples(n_min=1,n_max=2**m)).mean()-solution)
if 3 in plts:
epsilons['$2^n$ points no skipping'][t,i] = \
abs(k.f(s.gen_samples(n=2**m)).mean()-solution)
for i,(label,eps) in enumerate(epsilons.items()):
bot = percentile(eps, 40, axis=0)
med = percentile(eps, 50, axis=0)
top = percentile(eps, 60, axis=0)
ax.loglog(2**ms,med,label=label+' with %s randomization'%rtype,color=colors[i])
ax.fill_between(2**ms, bot, top, color=colors[i], alpha=.3)
# compare randomization fig,ax = pyplot.subplots(figsize=(10,10))
fig,ax = pyplot.subplots(figsize=(10,10))
plt_k2d_sobol(ax,None,['r','g'],[1,2])
plt_k2d_sobol(ax,'LMS_DS',['c','m','b'],[1,2,3])
plt_k2d_sobol(ax,'DS',['y','k','orangered'],[1,2,3])
ax.legend()
ax.set_xlabel('n')
ax.set_ylabel('$\\epsilon$');