Aleksei Sorokin PhD Thesis 2025: Quasi-Monte Carlo Codes¶
In [1]:
Copied!
import qmcpy as qp
import numpy as np
import qmcpy as qp
import numpy as np
Point Sets¶
In [2]:
Copied!
%%time
lattice = qp.Lattice(
dimension = 52,
randomize = "shift", # for unrandomized lattice set randomize = None
replications = 16, # R
order = "radical inverse", # also supports "linear"
seed = None, # pass integer seed for reproducibility
generating_vector = "mps.exod2_base2_m20_CKN.txt")
x = lattice(2**16) # a numpy.ndarray with shape 16 x 65536 x 52
%%time
lattice = qp.Lattice(
dimension = 52,
randomize = "shift", # for unrandomized lattice set randomize = None
replications = 16, # R
order = "radical inverse", # also supports "linear"
seed = None, # pass integer seed for reproducibility
generating_vector = "mps.exod2_base2_m20_CKN.txt")
x = lattice(2**16) # a numpy.ndarray with shape 16 x 65536 x 52
CPU times: user 59.7 ms, sys: 28.5 ms, total: 88.2 ms Wall time: 459 ms
In [3]:
Copied!
%%time
dnb2 = qp.DigitalNetB2(
dimension = 52,
randomize = "LMS DS", # Matousek's LMS then a digital shift
# other options ["NUS", "DS", "LMS", None]
t = 64, # number of LMS bits i.e. number of rows in S_j
alpha = 2, # interlacing factor for higher order digital nets
replications = 16, # R
order = "radical inverse", # also supports "Gray code"
seed = None, # pass integer seed for reproducibility
generating_matrices = "joe_kuo.6.21201.txt")
x = dnb2(2**16) # a numpy.ndarray with shape 16 x 65536 x 52
%%time
dnb2 = qp.DigitalNetB2(
dimension = 52,
randomize = "LMS DS", # Matousek's LMS then a digital shift
# other options ["NUS", "DS", "LMS", None]
t = 64, # number of LMS bits i.e. number of rows in S_j
alpha = 2, # interlacing factor for higher order digital nets
replications = 16, # R
order = "radical inverse", # also supports "Gray code"
seed = None, # pass integer seed for reproducibility
generating_matrices = "joe_kuo.6.21201.txt")
x = dnb2(2**16) # a numpy.ndarray with shape 16 x 65536 x 52
CPU times: user 377 ms, sys: 101 ms, total: 478 ms Wall time: 479 ms
In [4]:
Copied!
%%time
halton = qp.Halton(
dimension = 52,
randomize = "LMS DP", # Matousek's LMS then a digital permutation
# other options ["LMS DS", "LMS", "DP", "DS", "NUS", "QRNG", None]
t = 64, # number of LMS digits i.e. number of rows in S_j
replications = 16, # R
seed = None) # pass integer seed for reproducibility
x = halton(2**10) # a numpy.ndarray with shape 16 x 1024 x 52
%%time
halton = qp.Halton(
dimension = 52,
randomize = "LMS DP", # Matousek's LMS then a digital permutation
# other options ["LMS DS", "LMS", "DP", "DS", "NUS", "QRNG", None]
t = 64, # number of LMS digits i.e. number of rows in S_j
replications = 16, # R
seed = None) # pass integer seed for reproducibility
x = halton(2**10) # a numpy.ndarray with shape 16 x 1024 x 52
CPU times: user 341 ms, sys: 61.9 ms, total: 403 ms Wall time: 402 ms
Kernel Methods¶
Lattice + FFTBR + IFFTBR¶
In [5]:
Copied!
d = 3 # dimension
m = 10 # will generate 2^m points
n = 2**m # number of points
lattice = qp.Lattice(d) # default to radical inverse order
kernel = qp.KernelShiftInvar(
d, # dimension
alpha = [1,2,3], # per dimension smoothness parameters
lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = lattice(n_min=0,n_max=n) # shape=(n,d) lattice points
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fftbr(k1) # vector of eigenvalues
yt = qp.fftbr(y)
u = qp.ifftbr(yt*lam) # fast matrix multiplication
v = qp.ifftbr(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fftbr(m) # shape=(n,)
ytnew = qp.fftbr(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)
d = 3 # dimension
m = 10 # will generate 2^m points
n = 2**m # number of points
lattice = qp.Lattice(d) # default to radical inverse order
kernel = qp.KernelShiftInvar(
d, # dimension
alpha = [1,2,3], # per dimension smoothness parameters
lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = lattice(n_min=0,n_max=n) # shape=(n,d) lattice points
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fftbr(k1) # vector of eigenvalues
yt = qp.fftbr(y)
u = qp.ifftbr(yt*lam) # fast matrix multiplication
v = qp.ifftbr(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fftbr(m) # shape=(n,)
ytnew = qp.fftbr(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)
In [6]:
Copied!
# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples
xnew = lattice(n_min=n,n_max=2*n) # shape=(n,d) new lattice points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fftbr(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fftbr(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fftbr(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)
# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples
xnew = lattice(n_min=n,n_max=2*n) # shape=(n,d) new lattice points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fftbr(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fftbr(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fftbr(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)
Digital Net + FWHT¶
In [7]:
Copied!
d = 3 # dimension
m = 10 # will generate 2^m points
n = 2**m # number of points
dnb2 = qp.DigitalNetB2(d) # default to radical inverse order
kernel = qp.KernelDigShiftInvar(
d, # dimension
t = dnb2.t, # number of bits in integer representation of points
alpha = [1,2,3], # per dimension smoothness parameters
lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = dnb2(n_min=0,n_max=n) # shape=(n,d) digital net
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fwht(k1) # vector of eigenvalues
yt = qp.fwht(y)
u = qp.fwht(yt*lam) # fast matrix multiplication
v = qp.fwht(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fwht(m) # shape=(n,)
ytnew = qp.fwht(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)
d = 3 # dimension
m = 10 # will generate 2^m points
n = 2**m # number of points
dnb2 = qp.DigitalNetB2(d) # default to radical inverse order
kernel = qp.KernelDigShiftInvar(
d, # dimension
t = dnb2.t, # number of bits in integer representation of points
alpha = [1,2,3], # per dimension smoothness parameters
lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = dnb2(n_min=0,n_max=n) # shape=(n,d) digital net
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fwht(k1) # vector of eigenvalues
yt = qp.fwht(y)
u = qp.fwht(yt*lam) # fast matrix multiplication
v = qp.fwht(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fwht(m) # shape=(n,)
ytnew = qp.fwht(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)
In [8]:
Copied!
# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples
xnew = dnb2(n_min=n,n_max=2*n) # shape=(n,d) new digital net points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fwht(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fwht(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fwht(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)
# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples
xnew = dnb2(n_min=n,n_max=2*n) # shape=(n,d) new digital net points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fwht(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fwht(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fwht(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)
Integration¶
In [9]:
Copied!
import scipy.stats
def gen_corner_peak_2(x):
d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d)
c_tilde = 1/np.arange(1,d+1)**2
c = 0.25*c_tilde/np.sum(c_tilde)
y = (1+np.sum(c*x,axis=-1))**(-(d+1))
return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10 # number of randomizations
n = 2**15 # number of points
d = 50 # dimension
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
x = dnb2(n) # x.shape=(R,n,d)
y = gen_corner_peak_2(x) # y.shape=(R,n)
muhats = np.mean(y,axis=1) # muhats.shape=(R,)
muhat_aggregate = np.mean(muhats) # muhat_aggregate is a scalar
print(muhat_aggregate)
""" 0.014936813948394042 """
alpha = 0.01 # uncertainty level
t_star = -scipy.stats.t.ppf(alpha/2,df=R-1) # quantile of Student's t
stdhat = np.std(muhats,ddof=1) # unbiased estimate of standard deviation
std_error = t_star*stdhat/np.sqrt(R)
print(std_error)
""" 5.247445301861484e-07 """
conf_int = [muhat_aggregate-std_error,muhat_aggregate+std_error]
import scipy.stats
def gen_corner_peak_2(x):
d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d)
c_tilde = 1/np.arange(1,d+1)**2
c = 0.25*c_tilde/np.sum(c_tilde)
y = (1+np.sum(c*x,axis=-1))**(-(d+1))
return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10 # number of randomizations
n = 2**15 # number of points
d = 50 # dimension
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
x = dnb2(n) # x.shape=(R,n,d)
y = gen_corner_peak_2(x) # y.shape=(R,n)
muhats = np.mean(y,axis=1) # muhats.shape=(R,)
muhat_aggregate = np.mean(muhats) # muhat_aggregate is a scalar
print(muhat_aggregate)
""" 0.014936813948394042 """
alpha = 0.01 # uncertainty level
t_star = -scipy.stats.t.ppf(alpha/2,df=R-1) # quantile of Student's t
stdhat = np.std(muhats,ddof=1) # unbiased estimate of standard deviation
std_error = t_star*stdhat/np.sqrt(R)
print(std_error)
""" 5.247445301861484e-07 """
conf_int = [muhat_aggregate-std_error,muhat_aggregate+std_error]
0.014936813948394042 5.247445301861484e-07
In [10]:
Copied!
def gen_corner_peak_2(x):
d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d)
c_tilde = 1/np.arange(1,d+1)**2
c = 0.25*c_tilde/np.sum(c_tilde)
y = (1+np.sum(c*x,axis=-1))**(-(d+1))
return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10
d = 50
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
true_measure = qp.Uniform(dnb2, lower_bound=0, upper_bound=1)
integrand = qp.CustomFun(true_measure=true_measure, g=gen_corner_peak_2)
# equivalent to
# integrand = qp.Genz(dnb2, kind_func="CORNER PEAK", kind_coeff=2)
qmc_algo = qp.CubQMCRepStudentT(integrand, abs_tol=1e-4)
solution,data = qmc_algo.integrate() # run adaptive QMC algorithm
print(solution)
""" 0.014950908095474802 """
conf_int = [data.comb_bound_low,data.comb_bound_high]
std_error = (conf_int[1]-conf_int[0])/2
print(std_error)
""" 2.7968149935497788e-05 """
print(data)
"""
Data (Data)
solution 0.015
comb_bound_low 0.015
comb_bound_high 0.015
comb_bound_diff 5.59e-05
comb_flags 1
n_total 10240
n 10240
n_rep 2^(10)
time_integrate 0.019
CubQMCRepStudentT (AbstractStoppingCriterion)
inflate 1
alpha 0.010
abs_tol 1.00e-04
rel_tol 0
n_init 2^(8)
n_limit 2^(30)
CustomFun (AbstractIntegrand)
Uniform (AbstractTrueMeasure)
lower_bound 0
upper_bound 1
DigitalNetB2 (AbstractLDDiscreteDistribution)
d 50
replications 10
randomize LMS DS
gen_mats_source joe_kuo.6.21201.txt
order RADICAL INVERSE
t 63
alpha 3
n_limit 2^(32)
entropy 7
"""
def gen_corner_peak_2(x):
d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d)
c_tilde = 1/np.arange(1,d+1)**2
c = 0.25*c_tilde/np.sum(c_tilde)
y = (1+np.sum(c*x,axis=-1))**(-(d+1))
return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10
d = 50
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
true_measure = qp.Uniform(dnb2, lower_bound=0, upper_bound=1)
integrand = qp.CustomFun(true_measure=true_measure, g=gen_corner_peak_2)
# equivalent to
# integrand = qp.Genz(dnb2, kind_func="CORNER PEAK", kind_coeff=2)
qmc_algo = qp.CubQMCRepStudentT(integrand, abs_tol=1e-4)
solution,data = qmc_algo.integrate() # run adaptive QMC algorithm
print(solution)
""" 0.014950908095474802 """
conf_int = [data.comb_bound_low,data.comb_bound_high]
std_error = (conf_int[1]-conf_int[0])/2
print(std_error)
""" 2.7968149935497788e-05 """
print(data)
"""
Data (Data)
solution 0.015
comb_bound_low 0.015
comb_bound_high 0.015
comb_bound_diff 5.59e-05
comb_flags 1
n_total 10240
n 10240
n_rep 2^(10)
time_integrate 0.019
CubQMCRepStudentT (AbstractStoppingCriterion)
inflate 1
alpha 0.010
abs_tol 1.00e-04
rel_tol 0
n_init 2^(8)
n_limit 2^(30)
CustomFun (AbstractIntegrand)
Uniform (AbstractTrueMeasure)
lower_bound 0
upper_bound 1
DigitalNetB2 (AbstractLDDiscreteDistribution)
d 50
replications 10
randomize LMS DS
gen_mats_source joe_kuo.6.21201.txt
order RADICAL INVERSE
t 63
alpha 3
n_limit 2^(32)
entropy 7
"""
0.014950908095474802
2.7968149935497788e-05
Data (Data)
solution 0.015
comb_bound_low 0.015
comb_bound_high 0.015
comb_bound_diff 5.59e-05
comb_flags 1
n_total 10240
n 10240
n_rep 2^(10)
time_integrate 0.004
CubQMCRepStudentT (AbstractStoppingCriterion)
inflate 1
alpha 0.010
abs_tol 1.00e-04
rel_tol 0
n_init 2^(8)
n_limit 2^(30)
CustomFun (AbstractIntegrand)
Uniform (AbstractTrueMeasure)
lower_bound 0
upper_bound 1
DigitalNetB2 (AbstractLDDiscreteDistribution)
d 50
replications 10
randomize LMS DS
gen_mats_source joe_kuo.6.21201.txt
order RADICAL INVERSE
t 63
alpha 3
n_limit 2^(32)
entropy 7
Out[10]:
'\nData (Data)\n solution 0.015\n comb_bound_low 0.015\n comb_bound_high 0.015\n comb_bound_diff 5.59e-05\n comb_flags 1\n n_total 10240\n n 10240\n n_rep 2^(10)\n time_integrate 0.019\nCubQMCRepStudentT (AbstractStoppingCriterion)\n inflate 1\n alpha 0.010\n abs_tol 1.00e-04\n rel_tol 0\n n_init 2^(8)\n n_limit 2^(30)\nCustomFun (AbstractIntegrand)\nUniform (AbstractTrueMeasure)\n lower_bound 0\n upper_bound 1\nDigitalNetB2 (AbstractLDDiscreteDistribution)\n d 50\n replications 10\n randomize LMS DS\n gen_mats_source joe_kuo.6.21201.txt\n order RADICAL INVERSE\n t 63\n alpha 3\n n_limit 2^(32)\n entropy 7\n'
Cantilever Beam¶
In [11]:
Copied!
def cantilever_beam_function(T,compute_flags): # T is (n x 3)
Y = np.zeros((2,len(T)),dtype=float) # (n x 2)
l,w,t = 100,4,2
T1,T2,T3 = T[:,0],T[:,1],T[:,2] # Python is indexed from 0
if compute_flags[0]: # compute D. x^2 is "x**2" in Python
Y[0] = 4*l**3/(T1*w*t)*np.sqrt(T2**2/t**4+T3**2/w**4)
if compute_flags[1]: # compute S
Y[1] = 600*(T2/(w*t**2)+T3/(w**2*t))
return Y
true_measure = qp.Gaussian(
sampler = qp.DigitalNetB2(dimension=3,seed=7),
mean = [2.9e7,500,1000],
covariance = np.diag([(1.45e6)**2,(100)**2,(100)**2]))
integrand = qp.CustomFun(true_measure,
g = cantilever_beam_function,
dimension_indv = 2)
qmc_stop_crit = qp.CubQMCNetG(integrand,
abs_tol = 1e-3,
rel_tol = 1e-6)
solution,data = qmc_stop_crit.integrate()
print(data)
def cantilever_beam_function(T,compute_flags): # T is (n x 3)
Y = np.zeros((2,len(T)),dtype=float) # (n x 2)
l,w,t = 100,4,2
T1,T2,T3 = T[:,0],T[:,1],T[:,2] # Python is indexed from 0
if compute_flags[0]: # compute D. x^2 is "x**2" in Python
Y[0] = 4*l**3/(T1*w*t)*np.sqrt(T2**2/t**4+T3**2/w**4)
if compute_flags[1]: # compute S
Y[1] = 600*(T2/(w*t**2)+T3/(w**2*t))
return Y
true_measure = qp.Gaussian(
sampler = qp.DigitalNetB2(dimension=3,seed=7),
mean = [2.9e7,500,1000],
covariance = np.diag([(1.45e6)**2,(100)**2,(100)**2]))
integrand = qp.CustomFun(true_measure,
g = cantilever_beam_function,
dimension_indv = 2)
qmc_stop_crit = qp.CubQMCNetG(integrand,
abs_tol = 1e-3,
rel_tol = 1e-6)
solution,data = qmc_stop_crit.integrate()
print(data)
Data (Data)
solution [2.426e+00 3.750e+04]
comb_bound_low [2.425e+00 3.750e+04]
comb_bound_high [2.427e+00 3.750e+04]
comb_bound_diff [0.002 0.041]
comb_flags [ True True]
n_total 2^(18)
n [ 1024 262144]
time_integrate 0.053
CubQMCNetG (AbstractStoppingCriterion)
abs_tol 0.001
rel_tol 1.00e-06
n_init 2^(10)
n_limit 2^(35)
CustomFun (AbstractIntegrand)
Gaussian (AbstractTrueMeasure)
mean [2.9e+07 5.0e+02 1.0e+03]
covariance [[2.102e+12 0.000e+00 0.000e+00]
[0.000e+00 1.000e+04 0.000e+00]
[0.000e+00 0.000e+00 1.000e+04]]
decomp_type PCA
DigitalNetB2 (AbstractLDDiscreteDistribution)
d 3
replications 1
randomize LMS DS
gen_mats_source joe_kuo.6.21201.txt
order RADICAL INVERSE
t 63
alpha 1
n_limit 2^(32)
entropy 7
Bayesian Logistic Regression¶
In [12]:
Copied!
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data',header=None)
df.columns = ['Age','1900 Year','Axillary Nodes','Survival Status']
df.loc[df['Survival Status']==2,'Survival Status'] = 0
x,y = df[['Age','1900 Year','Axillary Nodes']],df['Survival Status']
xt,xv,yt,yv = train_test_split(x,y,test_size=.33,random_state=7)
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data',header=None)
df.columns = ['Age','1900 Year','Axillary Nodes','Survival Status']
df.loc[df['Survival Status']==2,'Survival Status'] = 0
x,y = df[['Age','1900 Year','Axillary Nodes']],df['Survival Status']
xt,xv,yt,yv = train_test_split(x,y,test_size=.33,random_state=7)
In [13]:
Copied!
print(df.head(),'\n')
print(df[['Age','1900 Year','Axillary Nodes']].describe(),'\n')
print(df['Survival Status'].astype(str).describe())
print('\ntrain samples: %d test samples: %d\n'%(len(xt),len(xv)))
print('train positives %d train negatives: %d'%(np.sum(yt==1),np.sum(yt==0)))
print(' test positives %d test negatives: %d'%(np.sum(yv==1),np.sum(yv==0)))
xt.head()
print(df.head(),'\n')
print(df[['Age','1900 Year','Axillary Nodes']].describe(),'\n')
print(df['Survival Status'].astype(str).describe())
print('\ntrain samples: %d test samples: %d\n'%(len(xt),len(xv)))
print('train positives %d train negatives: %d'%(np.sum(yt==1),np.sum(yt==0)))
print(' test positives %d test negatives: %d'%(np.sum(yv==1),np.sum(yv==0)))
xt.head()
Age 1900 Year Axillary Nodes Survival Status
0 30 64 1 1
1 30 62 3 1
2 30 65 0 1
3 31 59 2 1
4 31 65 4 1
Age 1900 Year Axillary Nodes
count 306.000000 306.000000 306.000000
mean 52.457516 62.852941 4.026144
std 10.803452 3.249405 7.189654
min 30.000000 58.000000 0.000000
25% 44.000000 60.000000 0.000000
50% 52.000000 63.000000 1.000000
75% 60.750000 65.750000 4.000000
max 83.000000 69.000000 52.000000
count 306
unique 2
top 1
freq 225
Name: Survival Status, dtype: object
train samples: 205 test samples: 101
train positives 151 train negatives: 54
test positives 74 test negatives: 27
Out[13]:
| Age | 1900 Year | Axillary Nodes | |
|---|---|---|---|
| 46 | 41 | 58 | 0 |
| 199 | 57 | 64 | 1 |
| 115 | 49 | 64 | 10 |
| 128 | 50 | 61 | 0 |
| 249 | 63 | 63 | 0 |
In [14]:
Copied!
blr = qp.BayesianLRCoeffs(
sampler = qp.DigitalNetB2(4,seed=1),
feature_array = xt, # np.ndarray of shape (n,d-1)
response_vector = yt, # np.ndarray of shape (n,)
prior_mean = 0, # normal prior mean = (0,0,...,0)
prior_covariance = 5) # normal prior covariance = 5I
qmc_sc = qp.CubQMCNetG(blr,
abs_tol = .1,
rel_tol = .5,
error_fun = "BOTH",
n_limit=2**18)
blr_coefs,blr_data = qmc_sc.integrate()
print(blr_data)
"""
Data (Data)
solution [-0.128 0.084 -0.091 0.09 ]
comb_bound_low [-0.198 0.069 -0.14 0.074]
comb_bound_high [-0.105 0.129 -0.075 0.138]
comb_bound_diff [0.092 0.06 0.065 0.065]
comb_flags [ True True True True]
n_total 2^(17)
n [[ 1024 1024 2048 131072]
[ 1024 1024 2048 131072]]
time_integrate 0.351
"""
blr = qp.BayesianLRCoeffs(
sampler = qp.DigitalNetB2(4,seed=1),
feature_array = xt, # np.ndarray of shape (n,d-1)
response_vector = yt, # np.ndarray of shape (n,)
prior_mean = 0, # normal prior mean = (0,0,...,0)
prior_covariance = 5) # normal prior covariance = 5I
qmc_sc = qp.CubQMCNetG(blr,
abs_tol = .1,
rel_tol = .5,
error_fun = "BOTH",
n_limit=2**18)
blr_coefs,blr_data = qmc_sc.integrate()
print(blr_data)
"""
Data (Data)
solution [-0.128 0.084 -0.091 0.09 ]
comb_bound_low [-0.198 0.069 -0.14 0.074]
comb_bound_high [-0.105 0.129 -0.075 0.138]
comb_bound_diff [0.092 0.06 0.065 0.065]
comb_flags [ True True True True]
n_total 2^(17)
n [[ 1024 1024 2048 131072]
[ 1024 1024 2048 131072]]
time_integrate 0.351
"""
Data (Data)
solution [-0.128 0.084 -0.091 0.09 ]
comb_bound_low [-0.198 0.069 -0.14 0.074]
comb_bound_high [-0.105 0.129 -0.075 0.138]
comb_bound_diff [0.092 0.06 0.065 0.065]
comb_flags [ True True True True]
n_total 2^(17)
n [[ 1024 1024 2048 131072]
[ 1024 1024 2048 131072]]
time_integrate 0.350
CubQMCNetG (AbstractStoppingCriterion)
abs_tol 0.100
rel_tol 2^(-1)
n_init 2^(10)
n_limit 2^(18)
BayesianLRCoeffs (AbstractIntegrand)
Gaussian (AbstractTrueMeasure)
mean 0
covariance 5
decomp_type PCA
DigitalNetB2 (AbstractLDDiscreteDistribution)
d 2^(2)
replications 1
randomize LMS DS
gen_mats_source joe_kuo.6.21201.txt
order RADICAL INVERSE
t 63
alpha 1
n_limit 2^(32)
entropy 1
Out[14]:
'\nData (Data)\nsolution [-0.128 0.084 -0.091 0.09 ]\ncomb_bound_low [-0.198 0.069 -0.14 0.074]\ncomb_bound_high [-0.105 0.129 -0.075 0.138]\ncomb_bound_diff [0.092 0.06 0.065 0.065]\ncomb_flags [ True True True True]\nn_total 2^(17)\nn [[ 1024 1024 2048 131072]\n [ 1024 1024 2048 131072]]\ntime_integrate 0.351\n'
In [15]:
Copied!
from sklearn.linear_model import LogisticRegression
def metrics(y,yhat):
y,yhat = np.array(y),np.array(yhat)
tp = np.sum((y==1)*(yhat==1))
tn = np.sum((y==0)*(yhat==0))
fp = np.sum((y==0)*(yhat==1))
fn = np.sum((y==1)*(yhat==0))
accuracy = (tp+tn)/(len(y))
precision = tp/(tp+fp)
recall = tp/(tp+fn)
return [accuracy,precision,recall]
results = pd.DataFrame({name:[] for name in ['method','Age','1900 Year','Axillary Nodes','Intercept','Accuracy','Precision','Recall']})
for i,l1_ratio in enumerate([0,.5,1]):
lr = LogisticRegression(random_state=7,penalty="elasticnet",solver='saga',l1_ratio=l1_ratio).fit(xt,yt)
results.loc[i] = [r'Elastic-Net \lambda=%.1f'%l1_ratio]+lr.coef_.squeeze().tolist()+[lr.intercept_.item()]+metrics(yv,lr.predict(xv))
blr_predict = lambda x: 1/(1+np.exp(-np.array(x)@blr_coefs[:-1]-blr_coefs[-1]))>=.5
blr_train_accuracy = np.mean(blr_predict(xt)==yt)
blr_test_accuracy = np.mean(blr_predict(xv)==yv)
results.loc[len(results)] = ['Bayesian']+blr_coefs.squeeze().tolist()+metrics(yv,blr_predict(xv))
import warnings
warnings.simplefilter('ignore',FutureWarning)
results.set_index('method',inplace=True)
print(results.head())
#root: results.to_latex(root+'lr_table.tex',formatters={'%s'%tt:lambda v:'%.1f'%(100*v) for tt in ['accuracy','precision','recall']},float_format="%.2e")
from sklearn.linear_model import LogisticRegression
def metrics(y,yhat):
y,yhat = np.array(y),np.array(yhat)
tp = np.sum((y==1)*(yhat==1))
tn = np.sum((y==0)*(yhat==0))
fp = np.sum((y==0)*(yhat==1))
fn = np.sum((y==1)*(yhat==0))
accuracy = (tp+tn)/(len(y))
precision = tp/(tp+fp)
recall = tp/(tp+fn)
return [accuracy,precision,recall]
results = pd.DataFrame({name:[] for name in ['method','Age','1900 Year','Axillary Nodes','Intercept','Accuracy','Precision','Recall']})
for i,l1_ratio in enumerate([0,.5,1]):
lr = LogisticRegression(random_state=7,penalty="elasticnet",solver='saga',l1_ratio=l1_ratio).fit(xt,yt)
results.loc[i] = [r'Elastic-Net \lambda=%.1f'%l1_ratio]+lr.coef_.squeeze().tolist()+[lr.intercept_.item()]+metrics(yv,lr.predict(xv))
blr_predict = lambda x: 1/(1+np.exp(-np.array(x)@blr_coefs[:-1]-blr_coefs[-1]))>=.5
blr_train_accuracy = np.mean(blr_predict(xt)==yt)
blr_test_accuracy = np.mean(blr_predict(xv)==yv)
results.loc[len(results)] = ['Bayesian']+blr_coefs.squeeze().tolist()+metrics(yv,blr_predict(xv))
import warnings
warnings.simplefilter('ignore',FutureWarning)
results.set_index('method',inplace=True)
print(results.head())
#root: results.to_latex(root+'lr_table.tex',formatters={'%s'%tt:lambda v:'%.1f'%(100*v) for tt in ['accuracy','precision','recall']},float_format="%.2e")
Age 1900 Year Axillary Nodes Intercept \
method
Elastic-Net \lambda=0.0 -0.012279 0.034401 -0.115153 0.001990
Elastic-Net \lambda=0.5 -0.012041 0.034170 -0.114770 0.002025
Elastic-Net \lambda=1.0 -0.011803 0.033940 -0.114387 0.002061
Bayesian -0.128270 0.083826 -0.090906 0.089903
Accuracy Precision Recall
method
Elastic-Net \lambda=0.0 0.742574 0.766667 0.932432
Elastic-Net \lambda=0.5 0.742574 0.766667 0.932432
Elastic-Net \lambda=1.0 0.742574 0.766667 0.932432
Bayesian 0.326733 1.000000 0.081081
Ishigami Sensitivity Indices¶
In [16]:
Copied!
a,b = 7,0.1
dnb2 = qp.DigitalNetB2(3,seed=7)
ishigami = qp.Ishigami(dnb2,a,b)
idxs = np.array([
[True,False,False],
[False,True,False],
[False,False,True],
[True,True,False],
[True,False,True],
[False,True,True]],dtype=bool)
ishigami_si = qp.SensitivityIndices(ishigami,idxs)
qmc_algo = qp.CubQMCNetG(ishigami_si,abs_tol=.05)
solution,data = qmc_algo.integrate()
print(data)
si_closed = solution[0].squeeze()
si_total = solution[1].squeeze()
ci_comb_low_closed = data.comb_bound_low[0].squeeze()
ci_comb_high_closed = data.comb_bound_high[0].squeeze()
ci_comb_low_total = data.comb_bound_low[1].squeeze()
ci_comb_high_total = data.comb_bound_high[1].squeeze()
print("\nApprox took %.1f sec and n = 2^(%d)"%
(data.time_integrate,np.log2(data.n_total)))
print('\t si_closed:',si_closed)
print('\t si_total:',si_total)
print('\t ci_comb_low_closed:',ci_comb_low_closed)
print('\t ci_comb_high_closed:',ci_comb_high_closed)
print('\t ci_comb_low_total:',ci_comb_low_total)
print('\t ci_comb_high_total:',ci_comb_high_total)
true_indices = qp.Ishigami._exact_sensitivity_indices(idxs,a,b)
si_closed_true = true_indices[0]
si_total_true = true_indices[1]
a,b = 7,0.1
dnb2 = qp.DigitalNetB2(3,seed=7)
ishigami = qp.Ishigami(dnb2,a,b)
idxs = np.array([
[True,False,False],
[False,True,False],
[False,False,True],
[True,True,False],
[True,False,True],
[False,True,True]],dtype=bool)
ishigami_si = qp.SensitivityIndices(ishigami,idxs)
qmc_algo = qp.CubQMCNetG(ishigami_si,abs_tol=.05)
solution,data = qmc_algo.integrate()
print(data)
si_closed = solution[0].squeeze()
si_total = solution[1].squeeze()
ci_comb_low_closed = data.comb_bound_low[0].squeeze()
ci_comb_high_closed = data.comb_bound_high[0].squeeze()
ci_comb_low_total = data.comb_bound_low[1].squeeze()
ci_comb_high_total = data.comb_bound_high[1].squeeze()
print("\nApprox took %.1f sec and n = 2^(%d)"%
(data.time_integrate,np.log2(data.n_total)))
print('\t si_closed:',si_closed)
print('\t si_total:',si_total)
print('\t ci_comb_low_closed:',ci_comb_low_closed)
print('\t ci_comb_high_closed:',ci_comb_high_closed)
print('\t ci_comb_low_total:',ci_comb_low_total)
print('\t ci_comb_high_total:',ci_comb_high_total)
true_indices = qp.Ishigami._exact_sensitivity_indices(idxs,a,b)
si_closed_true = true_indices[0]
si_total_true = true_indices[1]
Data (Data)
solution [[0.315 0.421 0.004 0.735 0.558 0.419]
[0.565 0.444 0.244 0.987 0.558 0.688]]
comb_bound_low [[0.293 0.409 0. 0.706 0.534 0.399]
[0.541 0.435 0.234 0.974 0.539 0.667]]
comb_bound_high [[0.336 0.432 0.008 0.765 0.582 0.438]
[0.59 0.452 0.253 1. 0.577 0.708]]
comb_bound_diff [[0.043 0.023 0.008 0.059 0.047 0.039]
[0.049 0.017 0.02 0.026 0.038 0.042]]
comb_flags [[ True True True True True True]
[ True True True True True True]]
n_total 2^(10)
n [[[1024 1024 1024 1024 1024 1024]
[1024 1024 1024 1024 1024 1024]
[1024 1024 1024 1024 1024 1024]]
[[1024 1024 1024 1024 1024 1024]
[1024 1024 1024 1024 1024 1024]
[1024 1024 1024 1024 1024 1024]]]
time_integrate 0.005
CubQMCNetG (AbstractStoppingCriterion)
abs_tol 0.050
rel_tol 0
n_init 2^(10)
n_limit 2^(35)
SensitivityIndices (AbstractIntegrand)
indices [[ True False False]
[False True False]
[False False True]
[ True True False]
[ True False True]
[False True True]]
Uniform (AbstractTrueMeasure)
lower_bound -3.142
upper_bound 3.142
DigitalNetB2 (AbstractLDDiscreteDistribution)
d 3
replications 1
randomize LMS DS
gen_mats_source joe_kuo.6.21201.txt
order RADICAL INVERSE
t 63
alpha 1
n_limit 2^(32)
entropy 7
Approx took 0.0 sec and n = 2^(10)
si_closed: [0.31495301 0.42054985 0.00403061 0.73545156 0.5580266 0.41868287]
si_total: [0.56524058 0.4435145 0.24364672 0.98722305 0.557734 0.68753131]
ci_comb_low_closed: [0.2934895 0.4092381 0. 0.70577578 0.53435058 0.39922204]
ci_comb_high_closed: [0.33641652 0.4318616 0.00806122 0.76512734 0.58170261 0.4381437 ]
ci_comb_low_total: [0.54066449 0.43508219 0.23389506 0.9744461 0.53864679 0.66669245]
ci_comb_high_total: [0.58981667 0.45194681 0.25339839 1. 0.57682122 0.70837017]
Neural Network Classifier Sensitiviy Indices¶
In [17]:
Copied!
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
data = load_iris()
feature_names = data["feature_names"]
feature_names = [fn.replace('sepal ','S')\
.replace('length ','L')\
.replace('petal ','P')\
.replace('width ','W')\
.replace('(cm)','') for fn in feature_names]
target_names = data["target_names"]
xt,xv,yt,yv = train_test_split(data["data"],data["target"],
test_size = 1/3,
random_state = 7)
mlpc = MLPClassifier(random_state=7,max_iter=1024).fit(xt,yt)
yhat = mlpc.predict(xv)
print("accuracy: %.1f%%"%(100*(yv==yhat).mean()))
# accuracy: 98.0%
sampler = qp.DigitalNetB2(4,seed=7)
true_measure = qp.Uniform(sampler,
lower_bound = xt.min(0),
upper_bound = xt.max(0))
fun = qp.CustomFun(
true_measure = true_measure,
g = lambda x: mlpc.predict_proba(x).T,
dimension_indv = 3)
si_fun = qp.SensitivityIndices(fun,indices="all")
qmc_algo = qp.CubQMCNetG(si_fun,abs_tol=.005)
nn_sis,nn_sis_data = qmc_algo.integrate()
nn_sis.shape
"""
(2, 14, 3)
"""
#print(nn_sis_data.flags_indv.shape)
#print(nn_sis_data.flags_comb.shape)
print('samples: 2^(%d)'%np.log2(nn_sis_data.n_total))
print('time: %.1e'%nn_sis_data.time_integrate)
print('indices:\n%s'%nn_sis_data.integrand.indices)
import pandas as pd
df_closed = pd.DataFrame(nn_sis[0],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nClosed Indices')
print(df_closed)
df_total = pd.DataFrame(nn_sis[1],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nTotal Indices')
df_closed_singletons = df_closed.loc[['[%d]'%i for i in range(4)]].T
df_closed_singletons['sum singletons'] = df_closed_singletons[['[%d]'%i for i in range(4)]].sum(1)
df_closed_singletons.columns = data['feature_names']+['sum']
df_closed_singletons = df_closed_singletons*100
df_closed_singletons
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
data = load_iris()
feature_names = data["feature_names"]
feature_names = [fn.replace('sepal ','S')\
.replace('length ','L')\
.replace('petal ','P')\
.replace('width ','W')\
.replace('(cm)','') for fn in feature_names]
target_names = data["target_names"]
xt,xv,yt,yv = train_test_split(data["data"],data["target"],
test_size = 1/3,
random_state = 7)
mlpc = MLPClassifier(random_state=7,max_iter=1024).fit(xt,yt)
yhat = mlpc.predict(xv)
print("accuracy: %.1f%%"%(100*(yv==yhat).mean()))
# accuracy: 98.0%
sampler = qp.DigitalNetB2(4,seed=7)
true_measure = qp.Uniform(sampler,
lower_bound = xt.min(0),
upper_bound = xt.max(0))
fun = qp.CustomFun(
true_measure = true_measure,
g = lambda x: mlpc.predict_proba(x).T,
dimension_indv = 3)
si_fun = qp.SensitivityIndices(fun,indices="all")
qmc_algo = qp.CubQMCNetG(si_fun,abs_tol=.005)
nn_sis,nn_sis_data = qmc_algo.integrate()
nn_sis.shape
"""
(2, 14, 3)
"""
#print(nn_sis_data.flags_indv.shape)
#print(nn_sis_data.flags_comb.shape)
print('samples: 2^(%d)'%np.log2(nn_sis_data.n_total))
print('time: %.1e'%nn_sis_data.time_integrate)
print('indices:\n%s'%nn_sis_data.integrand.indices)
import pandas as pd
df_closed = pd.DataFrame(nn_sis[0],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nClosed Indices')
print(df_closed)
df_total = pd.DataFrame(nn_sis[1],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nTotal Indices')
df_closed_singletons = df_closed.loc[['[%d]'%i for i in range(4)]].T
df_closed_singletons['sum singletons'] = df_closed_singletons[['[%d]'%i for i in range(4)]].sum(1)
df_closed_singletons.columns = data['feature_names']+['sum']
df_closed_singletons = df_closed_singletons*100
df_closed_singletons
accuracy: 98.0%
samples: 2^(15)
time: 6.4e-01
indices:
[[ True False False False]
[False True False False]
[False False True False]
[False False False True]
[ True True False False]
[ True False True False]
[ True False False True]
[False True True False]
[False True False True]
[False False True True]
[ True True True False]
[ True True False True]
[ True False True True]
[False True True True]]
Closed Indices
setosa versicolor virginica
[0] 0.001323 0.068645 0.077325
[1] 0.063749 0.026565 0.004784
[2] 0.713825 0.325072 0.497800
[3] 0.052967 0.025579 0.120317
[0 1] 0.063925 0.091300 0.085151
[0 2] 0.715316 0.460314 0.637738
[0 3] 0.053469 0.092601 0.205639
[1 2] 0.841655 0.431035 0.513277
[1 3] 0.110739 0.039410 0.131264
[2 3] 0.822910 0.583282 0.703142
[0 1 2] 0.843726 0.570076 0.658272
[0 1 3] 0.112798 0.104804 0.215817
[0 2 3] 0.825330 0.815263 0.945267
[1 2 3] 0.995864 0.739588 0.728499
Total Indices
Out[17]:
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | sum | |
|---|---|---|---|---|---|
| setosa | 0.132343 | 6.374860 | 71.382498 | 5.296674 | 83.186375 |
| versicolor | 6.864536 | 2.656530 | 32.507230 | 2.557911 | 44.586208 |
| virginica | 7.732521 | 0.478364 | 49.779978 | 12.031725 | 70.022587 |
In [ ]:
Copied!