Aleksei Sorokin PhD Thesis 2025: Quasi-Monte Carlo Codes¶

https://www.arxiv.org/abs/2511.21915

In [1]:

Copied!

import qmcpy as qp
import numpy as np
import qmcpy as qp
import numpy as np

Point Sets¶

In [2]:

Copied!





%%time 
lattice = qp.Lattice(
    dimension = 52,
    randomize = "shift", # for unrandomized lattice set randomize = None
    replications = 16, # R
    order = "radical inverse", # also supports "linear"
    seed = None, # pass integer seed for reproducibility
    generating_vector = "mps.exod2_base2_m20_CKN.txt")
x = lattice(2**16) # a numpy.ndarray with shape 16 x 65536 x 52
%%time 
lattice = qp.Lattice(
    dimension = 52,
    randomize = "shift", # for unrandomized lattice set randomize = None
    replications = 16, # R
    order = "radical inverse", # also supports "linear"
    seed = None, # pass integer seed for reproducibility
    generating_vector = "mps.exod2_base2_m20_CKN.txt")
x = lattice(2**16) # a numpy.ndarray with shape 16 x 65536 x 52

CPU times: user 59.7 ms, sys: 28.5 ms, total: 88.2 ms
Wall time: 459 ms

In [3]:

Copied!





%%time
dnb2 = qp.DigitalNetB2(
    dimension = 52, 
    randomize = "LMS DS", # Matousek's LMS then a digital shift
    # other options ["NUS", "DS", "LMS", None]
    t = 64, # number of LMS bits i.e. number of rows in S_j
    alpha = 2, # interlacing factor for higher order digital nets
    replications = 16, # R
    order = "radical inverse", # also supports "Gray code"
    seed = None, # pass integer seed for reproducibility
    generating_matrices = "joe_kuo.6.21201.txt")
x = dnb2(2**16) # a numpy.ndarray with shape 16 x 65536 x 52
%%time
dnb2 = qp.DigitalNetB2(
    dimension = 52, 
    randomize = "LMS DS", # Matousek's LMS then a digital shift
    # other options ["NUS", "DS", "LMS", None]
    t = 64, # number of LMS bits i.e. number of rows in S_j
    alpha = 2, # interlacing factor for higher order digital nets
    replications = 16, # R
    order = "radical inverse", # also supports "Gray code"
    seed = None, # pass integer seed for reproducibility
    generating_matrices = "joe_kuo.6.21201.txt")
x = dnb2(2**16) # a numpy.ndarray with shape 16 x 65536 x 52

CPU times: user 377 ms, sys: 101 ms, total: 478 ms
Wall time: 479 ms

In [4]:

Copied!





%%time 
halton = qp.Halton(
    dimension = 52, 
    randomize = "LMS DP", # Matousek's LMS then a digital permutation
    # other options ["LMS DS", "LMS", "DP", "DS", "NUS", "QRNG", None]
    t = 64, # number of LMS digits i.e. number of rows in S_j
    replications = 16, # R
    seed = None) # pass integer seed for reproducibility
x = halton(2**10) # a numpy.ndarray with shape 16 x 1024 x 52
%%time 
halton = qp.Halton(
    dimension = 52, 
    randomize = "LMS DP", # Matousek's LMS then a digital permutation
    # other options ["LMS DS", "LMS", "DP", "DS", "NUS", "QRNG", None]
    t = 64, # number of LMS digits i.e. number of rows in S_j
    replications = 16, # R
    seed = None) # pass integer seed for reproducibility
x = halton(2**10) # a numpy.ndarray with shape 16 x 1024 x 52

CPU times: user 341 ms, sys: 61.9 ms, total: 403 ms
Wall time: 402 ms

Kernel Methods¶

Lattice + FFTBR + IFFTBR¶

In [5]:

Copied!





d = 3 # dimension 
m = 10 # will generate 2^m points
n = 2**m # number of points
lattice = qp.Lattice(d) # default to radical inverse order
kernel = qp.KernelShiftInvar(
    d, # dimension 
    alpha = [1,2,3], # per dimension smoothness parameters
    lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = lattice(n_min=0,n_max=n) # shape=(n,d) lattice points
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fftbr(k1) # vector of eigenvalues
yt = qp.fftbr(y)
u = qp.ifftbr(yt*lam) # fast matrix multiplication 
v = qp.ifftbr(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fftbr(m) # shape=(n,)
ytnew = qp.fftbr(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)
d = 3 # dimension 
m = 10 # will generate 2^m points
n = 2**m # number of points
lattice = qp.Lattice(d) # default to radical inverse order
kernel = qp.KernelShiftInvar(
    d, # dimension 
    alpha = [1,2,3], # per dimension smoothness parameters
    lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = lattice(n_min=0,n_max=n) # shape=(n,d) lattice points
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fftbr(k1) # vector of eigenvalues
yt = qp.fftbr(y)
u = qp.ifftbr(yt*lam) # fast matrix multiplication 
v = qp.ifftbr(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fftbr(m) # shape=(n,)
ytnew = qp.fftbr(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)

In [6]:

Copied!





# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples 
xnew = lattice(n_min=n,n_max=2*n) # shape=(n,d) new lattice points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update 
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fftbr(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fftbr(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fftbr(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)
# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples 
xnew = lattice(n_min=n,n_max=2*n) # shape=(n,d) new lattice points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update 
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fftbr(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fftbr(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fftbr(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)

Digital Net + FWHT¶

In [7]:

Copied!





d = 3 # dimension 
m = 10 # will generate 2^m points
n = 2**m # number of points
dnb2 = qp.DigitalNetB2(d) # default to radical inverse order
kernel = qp.KernelDigShiftInvar(
    d, # dimension 
    t = dnb2.t, # number of bits in integer representation of points
    alpha = [1,2,3], # per dimension smoothness parameters
    lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = dnb2(n_min=0,n_max=n) # shape=(n,d) digital net
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fwht(k1) # vector of eigenvalues
yt = qp.fwht(y)
u = qp.fwht(yt*lam) # fast matrix multiplication 
v = qp.fwht(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fwht(m) # shape=(n,)
ytnew = qp.fwht(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)
d = 3 # dimension 
m = 10 # will generate 2^m points
n = 2**m # number of points
dnb2 = qp.DigitalNetB2(d) # default to radical inverse order
kernel = qp.KernelDigShiftInvar(
    d, # dimension 
    t = dnb2.t, # number of bits in integer representation of points
    alpha = [1,2,3], # per dimension smoothness parameters
    lengthscales = [1, 1/2, 1/4]) # per dimension product weights
x = dnb2(n_min=0,n_max=n) # shape=(n,d) digital net
y = np.random.rand(n) # shape=(n,) random uniforms
# fast matrix multiplication and linear system solve
k1 = kernel(x,x[0]) # shape=(n,) first column of Gram matrix
lam = np.sqrt(n)*qp.fwht(k1) # vector of eigenvalues
yt = qp.fwht(y)
u = qp.fwht(yt*lam) # fast matrix multiplication 
v = qp.fwht(yt/lam) # fast linear system solve
# efficient fast transform updates
ynew = np.random.rand(n) # shape=(n,) new random uniforms
omega = qp.omega_fwht(m) # shape=(n,)
ytnew = qp.fwht(ynew) # shape=(n,)
ytfull = np.concatenate([yt+omega*ytnew,yt-omega*ytnew])/np.sqrt(2) # shape=(2n,)

In [8]:

Copied!





# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples 
xnew = dnb2(n_min=n,n_max=2*n) # shape=(n,d) new digital net points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update 
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fwht(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fwht(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fwht(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)
# slow matrix multiplication and linear system solve
kmat = kernel(x[:,None,:],x[None,:,:]) # shape=(n,n)
u_slow = kmat@y # matrix multiplication
v_slow = np.linalg.solve(kmat,y) # solve a linear system
# verify correctness
assert np.allclose(u,u_slow)
assert np.allclose(v,v_slow)
# get next samples 
xnew = dnb2(n_min=n,n_max=2*n) # shape=(n,d) new digital net points
k1new = kernel(xnew,x[0]) # shape=(n,) new values in the first column
# inefficient fast transform update 
k1full = np.concatenate([k1,k1new]) # shape=(2*n,) full first column
lamfull_inefficient = np.sqrt(2*n)*qp.fwht(k1full) # shape=(2*n,) full eigenvalues
yfull = np.concatenate([y,ynew]) # shape=(2*n,) full random values
ytfull_inefficient = qp.fwht(yfull) # shape=(2*n,) full transformed points
# efficient fast transform updates
lamnew = np.sqrt(n)*qp.fwht(k1new) # shape=(n,) new eigenvalues
lamfull = np.hstack([lam+omega*lamnew,lam-omega*lamnew])
# verify correctness
assert np.allclose(lamfull,lamfull_inefficient)
assert np.allclose(ytfull,ytfull_inefficient)

Integration¶

In [9]:

Copied!





import scipy.stats
def gen_corner_peak_2(x):
    d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d) 
    c_tilde = 1/np.arange(1,d+1)**2
    c = 0.25*c_tilde/np.sum(c_tilde)
    y = (1+np.sum(c*x,axis=-1))**(-(d+1)) 
    return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10 # number of randomizations
n = 2**15 # number of points 
d = 50 # dimension
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
x = dnb2(n) # x.shape=(R,n,d)
y = gen_corner_peak_2(x) # y.shape=(R,n) 
muhats = np.mean(y,axis=1) # muhats.shape=(R,)
muhat_aggregate = np.mean(muhats) # muhat_aggregate is a scalar 
print(muhat_aggregate)
""" 0.014936813948394042 """
alpha = 0.01 # uncertainty level
t_star = -scipy.stats.t.ppf(alpha/2,df=R-1) # quantile of Student's t 
stdhat = np.std(muhats,ddof=1) # unbiased estimate of standard deviation
std_error = t_star*stdhat/np.sqrt(R)
print(std_error)
""" 5.247445301861484e-07 """
conf_int = [muhat_aggregate-std_error,muhat_aggregate+std_error]
import scipy.stats
def gen_corner_peak_2(x):
    d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d) 
    c_tilde = 1/np.arange(1,d+1)**2
    c = 0.25*c_tilde/np.sum(c_tilde)
    y = (1+np.sum(c*x,axis=-1))**(-(d+1)) 
    return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10 # number of randomizations
n = 2**15 # number of points 
d = 50 # dimension
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
x = dnb2(n) # x.shape=(R,n,d)
y = gen_corner_peak_2(x) # y.shape=(R,n) 
muhats = np.mean(y,axis=1) # muhats.shape=(R,)
muhat_aggregate = np.mean(muhats) # muhat_aggregate is a scalar 
print(muhat_aggregate)
""" 0.014936813948394042 """
alpha = 0.01 # uncertainty level
t_star = -scipy.stats.t.ppf(alpha/2,df=R-1) # quantile of Student's t 
stdhat = np.std(muhats,ddof=1) # unbiased estimate of standard deviation
std_error = t_star*stdhat/np.sqrt(R)
print(std_error)
""" 5.247445301861484e-07 """
conf_int = [muhat_aggregate-std_error,muhat_aggregate+std_error]

0.014936813948394042
5.247445301861484e-07

In [10]:

Copied!





def gen_corner_peak_2(x):
    d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d) 
    c_tilde = 1/np.arange(1,d+1)**2
    c = 0.25*c_tilde/np.sum(c_tilde)
    y = (1+np.sum(c*x,axis=-1))**(-(d+1)) 
    return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10
d = 50 
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
true_measure = qp.Uniform(dnb2, lower_bound=0, upper_bound=1)
integrand = qp.CustomFun(true_measure=true_measure, g=gen_corner_peak_2)
# equivalent to 
# integrand = qp.Genz(dnb2, kind_func="CORNER PEAK", kind_coeff=2)
qmc_algo = qp.CubQMCRepStudentT(integrand, abs_tol=1e-4)
solution,data = qmc_algo.integrate() # run adaptive QMC algorithm 
print(solution)
""" 0.014950908095474802 """
conf_int = [data.comb_bound_low,data.comb_bound_high]
std_error = (conf_int[1]-conf_int[0])/2
print(std_error)
""" 2.7968149935497788e-05 """
print(data)
"""
Data (Data)
    solution        0.015
    comb_bound_low  0.015
    comb_bound_high 0.015
    comb_bound_diff 5.59e-05
    comb_flags      1
    n_total         10240
    n               10240
    n_rep           2^(10)
    time_integrate  0.019
CubQMCRepStudentT (AbstractStoppingCriterion)
    inflate         1
    alpha           0.010
    abs_tol         1.00e-04
    rel_tol         0
    n_init          2^(8)
    n_limit         2^(30)
CustomFun (AbstractIntegrand)
Uniform (AbstractTrueMeasure)
    lower_bound     0
    upper_bound     1
DigitalNetB2 (AbstractLDDiscreteDistribution)
    d               50
    replications    10
    randomize       LMS DS
    gen_mats_source joe_kuo.6.21201.txt
    order           RADICAL INVERSE
    t               63
    alpha           3
    n_limit         2^(32)
    entropy         7
"""
def gen_corner_peak_2(x):
    d = x.shape[-1] # x.shape=(...,n,d), e.g., (n,d) or (R,n,d) 
    c_tilde = 1/np.arange(1,d+1)**2
    c = 0.25*c_tilde/np.sum(c_tilde)
    y = (1+np.sum(c*x,axis=-1))**(-(d+1)) 
    return y # y.shape=(...,n), e.g., (n,) or (R,n)
R = 10
d = 50 
dnb2 = qp.DigitalNet(dimension=d, replications=R, seed=7, alpha=3)
true_measure = qp.Uniform(dnb2, lower_bound=0, upper_bound=1)
integrand = qp.CustomFun(true_measure=true_measure, g=gen_corner_peak_2)
# equivalent to 
# integrand = qp.Genz(dnb2, kind_func="CORNER PEAK", kind_coeff=2)
qmc_algo = qp.CubQMCRepStudentT(integrand, abs_tol=1e-4)
solution,data = qmc_algo.integrate() # run adaptive QMC algorithm 
print(solution)
""" 0.014950908095474802 """
conf_int = [data.comb_bound_low,data.comb_bound_high]
std_error = (conf_int[1]-conf_int[0])/2
print(std_error)
""" 2.7968149935497788e-05 """
print(data)
"""
Data (Data)
    solution        0.015
    comb_bound_low  0.015
    comb_bound_high 0.015
    comb_bound_diff 5.59e-05
    comb_flags      1
    n_total         10240
    n               10240
    n_rep           2^(10)
    time_integrate  0.019
CubQMCRepStudentT (AbstractStoppingCriterion)
    inflate         1
    alpha           0.010
    abs_tol         1.00e-04
    rel_tol         0
    n_init          2^(8)
    n_limit         2^(30)
CustomFun (AbstractIntegrand)
Uniform (AbstractTrueMeasure)
    lower_bound     0
    upper_bound     1
DigitalNetB2 (AbstractLDDiscreteDistribution)
    d               50
    replications    10
    randomize       LMS DS
    gen_mats_source joe_kuo.6.21201.txt
    order           RADICAL INVERSE
    t               63
    alpha           3
    n_limit         2^(32)
    entropy         7
"""

0.014950908095474802
2.7968149935497788e-05
Data (Data)
    solution        0.015
    comb_bound_low  0.015
    comb_bound_high 0.015
    comb_bound_diff 5.59e-05
    comb_flags      1
    n_total         10240
    n               10240
    n_rep           2^(10)
    time_integrate  0.004
CubQMCRepStudentT (AbstractStoppingCriterion)
    inflate         1
    alpha           0.010
    abs_tol         1.00e-04
    rel_tol         0
    n_init          2^(8)
    n_limit         2^(30)
CustomFun (AbstractIntegrand)
Uniform (AbstractTrueMeasure)
    lower_bound     0
    upper_bound     1
DigitalNetB2 (AbstractLDDiscreteDistribution)
    d               50
    replications    10
    randomize       LMS DS
    gen_mats_source joe_kuo.6.21201.txt
    order           RADICAL INVERSE
    t               63
    alpha           3
    n_limit         2^(32)
    entropy         7

Out[10]:

'\nData (Data)\n    solution        0.015\n    comb_bound_low  0.015\n    comb_bound_high 0.015\n    comb_bound_diff 5.59e-05\n    comb_flags      1\n    n_total         10240\n    n               10240\n    n_rep           2^(10)\n    time_integrate  0.019\nCubQMCRepStudentT (AbstractStoppingCriterion)\n    inflate         1\n    alpha           0.010\n    abs_tol         1.00e-04\n    rel_tol         0\n    n_init          2^(8)\n    n_limit         2^(30)\nCustomFun (AbstractIntegrand)\nUniform (AbstractTrueMeasure)\n    lower_bound     0\n    upper_bound     1\nDigitalNetB2 (AbstractLDDiscreteDistribution)\n    d               50\n    replications    10\n    randomize       LMS DS\n    gen_mats_source joe_kuo.6.21201.txt\n    order           RADICAL INVERSE\n    t               63\n    alpha           3\n    n_limit         2^(32)\n    entropy         7\n'

Cantilever Beam¶

In [11]:

Copied!





def cantilever_beam_function(T,compute_flags): # T is (n x 3)
    Y = np.zeros((2,len(T)),dtype=float) # (n x 2)
    l,w,t = 100,4,2
    T1,T2,T3 = T[:,0],T[:,1],T[:,2] # Python is indexed from 0
    if compute_flags[0]: # compute D. x^2 is "x**2" in Python
        Y[0] = 4*l**3/(T1*w*t)*np.sqrt(T2**2/t**4+T3**2/w**4)
    if compute_flags[1]: # compute S
        Y[1] = 600*(T2/(w*t**2)+T3/(w**2*t))
    return Y
true_measure = qp.Gaussian(
    sampler = qp.DigitalNetB2(dimension=3,seed=7),
    mean = [2.9e7,500,1000],
    covariance = np.diag([(1.45e6)**2,(100)**2,(100)**2]))
integrand = qp.CustomFun(true_measure,
    g = cantilever_beam_function,
    dimension_indv = 2)
qmc_stop_crit = qp.CubQMCNetG(integrand,
    abs_tol = 1e-3,
    rel_tol = 1e-6)
solution,data = qmc_stop_crit.integrate()
print(data)
def cantilever_beam_function(T,compute_flags): # T is (n x 3)
    Y = np.zeros((2,len(T)),dtype=float) # (n x 2)
    l,w,t = 100,4,2
    T1,T2,T3 = T[:,0],T[:,1],T[:,2] # Python is indexed from 0
    if compute_flags[0]: # compute D. x^2 is "x**2" in Python
        Y[0] = 4*l**3/(T1*w*t)*np.sqrt(T2**2/t**4+T3**2/w**4)
    if compute_flags[1]: # compute S
        Y[1] = 600*(T2/(w*t**2)+T3/(w**2*t))
    return Y
true_measure = qp.Gaussian(
    sampler = qp.DigitalNetB2(dimension=3,seed=7),
    mean = [2.9e7,500,1000],
    covariance = np.diag([(1.45e6)**2,(100)**2,(100)**2]))
integrand = qp.CustomFun(true_measure,
    g = cantilever_beam_function,
    dimension_indv = 2)
qmc_stop_crit = qp.CubQMCNetG(integrand,
    abs_tol = 1e-3,
    rel_tol = 1e-6)
solution,data = qmc_stop_crit.integrate()
print(data)

Data (Data)
    solution        [2.426e+00 3.750e+04]
    comb_bound_low  [2.425e+00 3.750e+04]
    comb_bound_high [2.427e+00 3.750e+04]
    comb_bound_diff [0.002 0.041]
    comb_flags      [ True  True]
    n_total         2^(18)
    n               [  1024 262144]
    time_integrate  0.053
CubQMCNetG (AbstractStoppingCriterion)
    abs_tol         0.001
    rel_tol         1.00e-06
    n_init          2^(10)
    n_limit         2^(35)
CustomFun (AbstractIntegrand)
Gaussian (AbstractTrueMeasure)
    mean            [2.9e+07 5.0e+02 1.0e+03]
    covariance      [[2.102e+12 0.000e+00 0.000e+00]
                     [0.000e+00 1.000e+04 0.000e+00]
                     [0.000e+00 0.000e+00 1.000e+04]]
    decomp_type     PCA
DigitalNetB2 (AbstractLDDiscreteDistribution)
    d               3
    replications    1
    randomize       LMS DS
    gen_mats_source joe_kuo.6.21201.txt
    order           RADICAL INVERSE
    t               63
    alpha           1
    n_limit         2^(32)
    entropy         7

Bayesian Logistic Regression¶

In [12]:

Copied!





import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data',header=None)
df.columns = ['Age','1900 Year','Axillary Nodes','Survival Status']
df.loc[df['Survival Status']==2,'Survival Status'] = 0
x,y = df[['Age','1900 Year','Axillary Nodes']],df['Survival Status']
xt,xv,yt,yv = train_test_split(x,y,test_size=.33,random_state=7)
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data',header=None)
df.columns = ['Age','1900 Year','Axillary Nodes','Survival Status']
df.loc[df['Survival Status']==2,'Survival Status'] = 0
x,y = df[['Age','1900 Year','Axillary Nodes']],df['Survival Status']
xt,xv,yt,yv = train_test_split(x,y,test_size=.33,random_state=7)

In [13]:

Copied!





print(df.head(),'\n')
print(df[['Age','1900 Year','Axillary Nodes']].describe(),'\n')
print(df['Survival Status'].astype(str).describe())
print('\ntrain samples: %d test samples: %d\n'%(len(xt),len(xv)))
print('train positives %d   train negatives: %d'%(np.sum(yt==1),np.sum(yt==0)))
print(' test positives %d    test negatives: %d'%(np.sum(yv==1),np.sum(yv==0)))
xt.head()
print(df.head(),'\n')
print(df[['Age','1900 Year','Axillary Nodes']].describe(),'\n')
print(df['Survival Status'].astype(str).describe())
print('\ntrain samples: %d test samples: %d\n'%(len(xt),len(xv)))
print('train positives %d   train negatives: %d'%(np.sum(yt==1),np.sum(yt==0)))
print(' test positives %d    test negatives: %d'%(np.sum(yv==1),np.sum(yv==0)))
xt.head()

   Age  1900 Year  Axillary Nodes  Survival Status
0   30         64               1                1
1   30         62               3                1
2   30         65               0                1
3   31         59               2                1
4   31         65               4                1 

              Age   1900 Year  Axillary Nodes
count  306.000000  306.000000      306.000000
mean    52.457516   62.852941        4.026144
std     10.803452    3.249405        7.189654
min     30.000000   58.000000        0.000000
25%     44.000000   60.000000        0.000000
50%     52.000000   63.000000        1.000000
75%     60.750000   65.750000        4.000000
max     83.000000   69.000000       52.000000 

count     306
unique      2
top         1
freq      225
Name: Survival Status, dtype: object

train samples: 205 test samples: 101

train positives 151   train negatives: 54
 test positives 74    test negatives: 27

Out[13]:

	Age	1900 Year	Axillary Nodes
46	41	58	0
199	57	64	1
115	49	64	10
128	50	61	0
249	63	63	0

In [14]:

Copied!





blr = qp.BayesianLRCoeffs(
    sampler = qp.DigitalNetB2(4,seed=1),
    feature_array = xt, # np.ndarray of shape (n,d-1)
    response_vector = yt, # np.ndarray of shape (n,)
    prior_mean = 0, # normal prior mean = (0,0,...,0)
    prior_covariance = 5) # normal prior covariance = 5I
qmc_sc = qp.CubQMCNetG(blr,
    abs_tol = .1,
    rel_tol = .5,
    error_fun = "BOTH",
    n_limit=2**18)
blr_coefs,blr_data = qmc_sc.integrate()
print(blr_data)
"""
Data (Data)
solution        [-0.128  0.084 -0.091  0.09 ]
comb_bound_low  [-0.198  0.069 -0.14   0.074]
comb_bound_high [-0.105  0.129 -0.075  0.138]
comb_bound_diff [0.092 0.06  0.065 0.065]
comb_flags      [ True  True  True  True]
n_total         2^(17)
n               [[  1024   1024   2048 131072]
                    [  1024   1024   2048 131072]]
time_integrate  0.351
"""
blr = qp.BayesianLRCoeffs(
    sampler = qp.DigitalNetB2(4,seed=1),
    feature_array = xt, # np.ndarray of shape (n,d-1)
    response_vector = yt, # np.ndarray of shape (n,)
    prior_mean = 0, # normal prior mean = (0,0,...,0)
    prior_covariance = 5) # normal prior covariance = 5I
qmc_sc = qp.CubQMCNetG(blr,
    abs_tol = .1,
    rel_tol = .5,
    error_fun = "BOTH",
    n_limit=2**18)
blr_coefs,blr_data = qmc_sc.integrate()
print(blr_data)
"""
Data (Data)
solution        [-0.128  0.084 -0.091  0.09 ]
comb_bound_low  [-0.198  0.069 -0.14   0.074]
comb_bound_high [-0.105  0.129 -0.075  0.138]
comb_bound_diff [0.092 0.06  0.065 0.065]
comb_flags      [ True  True  True  True]
n_total         2^(17)
n               [[  1024   1024   2048 131072]
                    [  1024   1024   2048 131072]]
time_integrate  0.351
"""

Data (Data)
    solution        [-0.128  0.084 -0.091  0.09 ]
    comb_bound_low  [-0.198  0.069 -0.14   0.074]
    comb_bound_high [-0.105  0.129 -0.075  0.138]
    comb_bound_diff [0.092 0.06  0.065 0.065]
    comb_flags      [ True  True  True  True]
    n_total         2^(17)
    n               [[  1024   1024   2048 131072]
                     [  1024   1024   2048 131072]]
    time_integrate  0.350
CubQMCNetG (AbstractStoppingCriterion)
    abs_tol         0.100
    rel_tol         2^(-1)
    n_init          2^(10)
    n_limit         2^(18)
BayesianLRCoeffs (AbstractIntegrand)
Gaussian (AbstractTrueMeasure)
    mean            0
    covariance      5
    decomp_type     PCA
DigitalNetB2 (AbstractLDDiscreteDistribution)
    d               2^(2)
    replications    1
    randomize       LMS DS
    gen_mats_source joe_kuo.6.21201.txt
    order           RADICAL INVERSE
    t               63
    alpha           1
    n_limit         2^(32)
    entropy         1

Out[14]:

'\nData (Data)\nsolution        [-0.128  0.084 -0.091  0.09 ]\ncomb_bound_low  [-0.198  0.069 -0.14   0.074]\ncomb_bound_high [-0.105  0.129 -0.075  0.138]\ncomb_bound_diff [0.092 0.06  0.065 0.065]\ncomb_flags      [ True  True  True  True]\nn_total         2^(17)\nn               [[  1024   1024   2048 131072]\n                    [  1024   1024   2048 131072]]\ntime_integrate  0.351\n'

In [15]:

Copied!





from sklearn.linear_model import LogisticRegression
def metrics(y,yhat):
    y,yhat = np.array(y),np.array(yhat)
    tp = np.sum((y==1)*(yhat==1))
    tn = np.sum((y==0)*(yhat==0))
    fp = np.sum((y==0)*(yhat==1))
    fn = np.sum((y==1)*(yhat==0))
    accuracy = (tp+tn)/(len(y))
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    return [accuracy,precision,recall]

results = pd.DataFrame({name:[] for name in ['method','Age','1900 Year','Axillary Nodes','Intercept','Accuracy','Precision','Recall']})
for i,l1_ratio in enumerate([0,.5,1]):
    lr = LogisticRegression(random_state=7,penalty="elasticnet",solver='saga',l1_ratio=l1_ratio).fit(xt,yt)
    results.loc[i] = [r'Elastic-Net \lambda=%.1f'%l1_ratio]+lr.coef_.squeeze().tolist()+[lr.intercept_.item()]+metrics(yv,lr.predict(xv))

blr_predict = lambda x: 1/(1+np.exp(-np.array(x)@blr_coefs[:-1]-blr_coefs[-1]))>=.5
blr_train_accuracy = np.mean(blr_predict(xt)==yt)
blr_test_accuracy = np.mean(blr_predict(xv)==yv)
results.loc[len(results)] = ['Bayesian']+blr_coefs.squeeze().tolist()+metrics(yv,blr_predict(xv))

import warnings
warnings.simplefilter('ignore',FutureWarning)
results.set_index('method',inplace=True)
print(results.head())
#root: results.to_latex(root+'lr_table.tex',formatters={'%s'%tt:lambda v:'%.1f'%(100*v) for tt in ['accuracy','precision','recall']},float_format="%.2e")
from sklearn.linear_model import LogisticRegression
def metrics(y,yhat):
    y,yhat = np.array(y),np.array(yhat)
    tp = np.sum((y==1)*(yhat==1))
    tn = np.sum((y==0)*(yhat==0))
    fp = np.sum((y==0)*(yhat==1))
    fn = np.sum((y==1)*(yhat==0))
    accuracy = (tp+tn)/(len(y))
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    return [accuracy,precision,recall]

results = pd.DataFrame({name:[] for name in ['method','Age','1900 Year','Axillary Nodes','Intercept','Accuracy','Precision','Recall']})
for i,l1_ratio in enumerate([0,.5,1]):
    lr = LogisticRegression(random_state=7,penalty="elasticnet",solver='saga',l1_ratio=l1_ratio).fit(xt,yt)
    results.loc[i] = [r'Elastic-Net \lambda=%.1f'%l1_ratio]+lr.coef_.squeeze().tolist()+[lr.intercept_.item()]+metrics(yv,lr.predict(xv))

blr_predict = lambda x: 1/(1+np.exp(-np.array(x)@blr_coefs[:-1]-blr_coefs[-1]))>=.5
blr_train_accuracy = np.mean(blr_predict(xt)==yt)
blr_test_accuracy = np.mean(blr_predict(xv)==yv)
results.loc[len(results)] = ['Bayesian']+blr_coefs.squeeze().tolist()+metrics(yv,blr_predict(xv))

import warnings
warnings.simplefilter('ignore',FutureWarning)
results.set_index('method',inplace=True)
print(results.head())
#root: results.to_latex(root+'lr_table.tex',formatters={'%s'%tt:lambda v:'%.1f'%(100*v) for tt in ['accuracy','precision','recall']},float_format="%.2e")

                              Age  1900 Year  Axillary Nodes  Intercept  \
method                                                                    
Elastic-Net \lambda=0.0 -0.012279   0.034401       -0.115153   0.001990   
Elastic-Net \lambda=0.5 -0.012041   0.034170       -0.114770   0.002025   
Elastic-Net \lambda=1.0 -0.011803   0.033940       -0.114387   0.002061   
Bayesian                -0.128270   0.083826       -0.090906   0.089903   

                         Accuracy  Precision    Recall  
method                                                  
Elastic-Net \lambda=0.0  0.742574   0.766667  0.932432  
Elastic-Net \lambda=0.5  0.742574   0.766667  0.932432  
Elastic-Net \lambda=1.0  0.742574   0.766667  0.932432  
Bayesian                 0.326733   1.000000  0.081081

Ishigami Sensitivity Indices¶

In [16]:

Copied!





a,b = 7,0.1
dnb2 = qp.DigitalNetB2(3,seed=7)
ishigami = qp.Ishigami(dnb2,a,b)
idxs = np.array([
    [True,False,False],
    [False,True,False],
    [False,False,True],
    [True,True,False],
    [True,False,True],
    [False,True,True]],dtype=bool)
ishigami_si = qp.SensitivityIndices(ishigami,idxs)
qmc_algo = qp.CubQMCNetG(ishigami_si,abs_tol=.05)
solution,data = qmc_algo.integrate()
print(data)
si_closed = solution[0].squeeze()
si_total = solution[1].squeeze()
ci_comb_low_closed = data.comb_bound_low[0].squeeze()
ci_comb_high_closed = data.comb_bound_high[0].squeeze()
ci_comb_low_total = data.comb_bound_low[1].squeeze()
ci_comb_high_total = data.comb_bound_high[1].squeeze()
print("\nApprox took %.1f sec and n = 2^(%d)"%
    (data.time_integrate,np.log2(data.n_total)))
print('\t si_closed:',si_closed)
print('\t si_total:',si_total)
print('\t ci_comb_low_closed:',ci_comb_low_closed)
print('\t ci_comb_high_closed:',ci_comb_high_closed)
print('\t ci_comb_low_total:',ci_comb_low_total)
print('\t ci_comb_high_total:',ci_comb_high_total)

true_indices = qp.Ishigami._exact_sensitivity_indices(idxs,a,b)
si_closed_true = true_indices[0]
si_total_true = true_indices[1]
a,b = 7,0.1
dnb2 = qp.DigitalNetB2(3,seed=7)
ishigami = qp.Ishigami(dnb2,a,b)
idxs = np.array([
    [True,False,False],
    [False,True,False],
    [False,False,True],
    [True,True,False],
    [True,False,True],
    [False,True,True]],dtype=bool)
ishigami_si = qp.SensitivityIndices(ishigami,idxs)
qmc_algo = qp.CubQMCNetG(ishigami_si,abs_tol=.05)
solution,data = qmc_algo.integrate()
print(data)
si_closed = solution[0].squeeze()
si_total = solution[1].squeeze()
ci_comb_low_closed = data.comb_bound_low[0].squeeze()
ci_comb_high_closed = data.comb_bound_high[0].squeeze()
ci_comb_low_total = data.comb_bound_low[1].squeeze()
ci_comb_high_total = data.comb_bound_high[1].squeeze()
print("\nApprox took %.1f sec and n = 2^(%d)"%
    (data.time_integrate,np.log2(data.n_total)))
print('\t si_closed:',si_closed)
print('\t si_total:',si_total)
print('\t ci_comb_low_closed:',ci_comb_low_closed)
print('\t ci_comb_high_closed:',ci_comb_high_closed)
print('\t ci_comb_low_total:',ci_comb_low_total)
print('\t ci_comb_high_total:',ci_comb_high_total)

true_indices = qp.Ishigami._exact_sensitivity_indices(idxs,a,b)
si_closed_true = true_indices[0]
si_total_true = true_indices[1]

Data (Data)
    solution        [[0.315 0.421 0.004 0.735 0.558 0.419]
                     [0.565 0.444 0.244 0.987 0.558 0.688]]
    comb_bound_low  [[0.293 0.409 0.    0.706 0.534 0.399]
                     [0.541 0.435 0.234 0.974 0.539 0.667]]
    comb_bound_high [[0.336 0.432 0.008 0.765 0.582 0.438]
                     [0.59  0.452 0.253 1.    0.577 0.708]]
    comb_bound_diff [[0.043 0.023 0.008 0.059 0.047 0.039]
                     [0.049 0.017 0.02  0.026 0.038 0.042]]
    comb_flags      [[ True  True  True  True  True  True]
                     [ True  True  True  True  True  True]]
    n_total         2^(10)
    n               [[[1024 1024 1024 1024 1024 1024]
                      [1024 1024 1024 1024 1024 1024]
                      [1024 1024 1024 1024 1024 1024]]
                    
                     [[1024 1024 1024 1024 1024 1024]
                      [1024 1024 1024 1024 1024 1024]
                      [1024 1024 1024 1024 1024 1024]]]
    time_integrate  0.005
CubQMCNetG (AbstractStoppingCriterion)
    abs_tol         0.050
    rel_tol         0
    n_init          2^(10)
    n_limit         2^(35)
SensitivityIndices (AbstractIntegrand)
    indices         [[ True False False]
                     [False  True False]
                     [False False  True]
                     [ True  True False]
                     [ True False  True]
                     [False  True  True]]
Uniform (AbstractTrueMeasure)
    lower_bound     -3.142
    upper_bound     3.142
DigitalNetB2 (AbstractLDDiscreteDistribution)
    d               3
    replications    1
    randomize       LMS DS
    gen_mats_source joe_kuo.6.21201.txt
    order           RADICAL INVERSE
    t               63
    alpha           1
    n_limit         2^(32)
    entropy         7

Approx took 0.0 sec and n = 2^(10)
	 si_closed: [0.31495301 0.42054985 0.00403061 0.73545156 0.5580266  0.41868287]
	 si_total: [0.56524058 0.4435145  0.24364672 0.98722305 0.557734   0.68753131]
	 ci_comb_low_closed: [0.2934895  0.4092381  0.         0.70577578 0.53435058 0.39922204]
	 ci_comb_high_closed: [0.33641652 0.4318616  0.00806122 0.76512734 0.58170261 0.4381437 ]
	 ci_comb_low_total: [0.54066449 0.43508219 0.23389506 0.9744461  0.53864679 0.66669245]
	 ci_comb_high_total: [0.58981667 0.45194681 0.25339839 1.         0.57682122 0.70837017]

Neural Network Classifier Sensitiviy Indices¶

In [17]:

Copied!





from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
data = load_iris()
feature_names = data["feature_names"]
feature_names = [fn.replace('sepal ','S')\
    .replace('length ','L')\
    .replace('petal ','P')\
    .replace('width ','W')\
    .replace('(cm)','') for fn in feature_names]
target_names = data["target_names"]
xt,xv,yt,yv = train_test_split(data["data"],data["target"],
    test_size = 1/3,
    random_state = 7)
mlpc = MLPClassifier(random_state=7,max_iter=1024).fit(xt,yt)
yhat = mlpc.predict(xv)
print("accuracy: %.1f%%"%(100*(yv==yhat).mean()))
# accuracy: 98.0%
sampler = qp.DigitalNetB2(4,seed=7)
true_measure =  qp.Uniform(sampler,
    lower_bound = xt.min(0),
    upper_bound = xt.max(0))
fun = qp.CustomFun(
    true_measure = true_measure,
    g = lambda x: mlpc.predict_proba(x).T,
    dimension_indv = 3)
si_fun = qp.SensitivityIndices(fun,indices="all")
qmc_algo = qp.CubQMCNetG(si_fun,abs_tol=.005)
nn_sis,nn_sis_data = qmc_algo.integrate()
nn_sis.shape
"""
(2, 14, 3)
"""
#print(nn_sis_data.flags_indv.shape)
#print(nn_sis_data.flags_comb.shape)
print('samples: 2^(%d)'%np.log2(nn_sis_data.n_total))
print('time: %.1e'%nn_sis_data.time_integrate)
print('indices:\n%s'%nn_sis_data.integrand.indices)

import pandas as pd

df_closed = pd.DataFrame(nn_sis[0],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nClosed Indices')
print(df_closed)
df_total = pd.DataFrame(nn_sis[1],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nTotal Indices')
df_closed_singletons = df_closed.loc[['[%d]'%i for i in range(4)]].T
df_closed_singletons['sum singletons'] = df_closed_singletons[['[%d]'%i for i in range(4)]].sum(1)
df_closed_singletons.columns = data['feature_names']+['sum']
df_closed_singletons = df_closed_singletons*100
df_closed_singletons
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
data = load_iris()
feature_names = data["feature_names"]
feature_names = [fn.replace('sepal ','S')\
    .replace('length ','L')\
    .replace('petal ','P')\
    .replace('width ','W')\
    .replace('(cm)','') for fn in feature_names]
target_names = data["target_names"]
xt,xv,yt,yv = train_test_split(data["data"],data["target"],
    test_size = 1/3,
    random_state = 7)
mlpc = MLPClassifier(random_state=7,max_iter=1024).fit(xt,yt)
yhat = mlpc.predict(xv)
print("accuracy: %.1f%%"%(100*(yv==yhat).mean()))
# accuracy: 98.0%
sampler = qp.DigitalNetB2(4,seed=7)
true_measure =  qp.Uniform(sampler,
    lower_bound = xt.min(0),
    upper_bound = xt.max(0))
fun = qp.CustomFun(
    true_measure = true_measure,
    g = lambda x: mlpc.predict_proba(x).T,
    dimension_indv = 3)
si_fun = qp.SensitivityIndices(fun,indices="all")
qmc_algo = qp.CubQMCNetG(si_fun,abs_tol=.005)
nn_sis,nn_sis_data = qmc_algo.integrate()
nn_sis.shape
"""
(2, 14, 3)
"""
#print(nn_sis_data.flags_indv.shape)
#print(nn_sis_data.flags_comb.shape)
print('samples: 2^(%d)'%np.log2(nn_sis_data.n_total))
print('time: %.1e'%nn_sis_data.time_integrate)
print('indices:\n%s'%nn_sis_data.integrand.indices)

import pandas as pd

df_closed = pd.DataFrame(nn_sis[0],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nClosed Indices')
print(df_closed)
df_total = pd.DataFrame(nn_sis[1],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nTotal Indices')
df_closed_singletons = df_closed.loc[['[%d]'%i for i in range(4)]].T
df_closed_singletons['sum singletons'] = df_closed_singletons[['[%d]'%i for i in range(4)]].sum(1)
df_closed_singletons.columns = data['feature_names']+['sum']
df_closed_singletons = df_closed_singletons*100
df_closed_singletons

accuracy: 98.0%
samples: 2^(15)
time: 6.4e-01
indices:
[[ True False False False]
 [False  True False False]
 [False False  True False]
 [False False False  True]
 [ True  True False False]
 [ True False  True False]
 [ True False False  True]
 [False  True  True False]
 [False  True False  True]
 [False False  True  True]
 [ True  True  True False]
 [ True  True False  True]
 [ True False  True  True]
 [False  True  True  True]]

Closed Indices
           setosa  versicolor  virginica
[0]      0.001323    0.068645   0.077325
[1]      0.063749    0.026565   0.004784
[2]      0.713825    0.325072   0.497800
[3]      0.052967    0.025579   0.120317
[0 1]    0.063925    0.091300   0.085151
[0 2]    0.715316    0.460314   0.637738
[0 3]    0.053469    0.092601   0.205639
[1 2]    0.841655    0.431035   0.513277
[1 3]    0.110739    0.039410   0.131264
[2 3]    0.822910    0.583282   0.703142
[0 1 2]  0.843726    0.570076   0.658272
[0 1 3]  0.112798    0.104804   0.215817
[0 2 3]  0.825330    0.815263   0.945267
[1 2 3]  0.995864    0.739588   0.728499

Total Indices

Out[17]:

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	sum
setosa	0.132343	6.374860	71.382498	5.296674	83.186375
versicolor	6.864536	2.656530	32.507230	2.557911	44.586208
virginica	7.732521	0.478364	49.779978	12.031725	70.022587

In [ ]: