Monte Carlo for Vector Functions of Integrals¶

Demo Accompanying Aleksei Sorokin's PyData Chicago 2023 Talk

Monte Carlo Problem¶

$$\text{True Mean} = \mu = \mathbb{E}[g(T)] = \mathbb{E}[f(X)] = \int_{[0,1]^d} f(x) \mathrm{d} x \approx \frac{1}{n} \sum_{i=0}^{n-1} f(X_i) = \hat{\mu} = \text{Sample Mean}$$

$T$, original measure on $\mathcal{T}$
$g: \mathcal{T} \to \mathbb{R}$, original integrand
$X \sim \mathcal{U}[0,1]^d$, transformed measure
$f: [0,1]^d \to \mathbb{R}$, transformed integrand

Python Setup¶

In [1]:

Copied!





import qmcpy as qp
import numpy as np
import scipy.stats
import pandas as pd
import time
from matplotlib import pyplot
colors = pyplot.rcParams['axes.prop_cycle'].by_key()['color']
%matplotlib inline
import qmcpy as qp
import numpy as np
import scipy.stats
import pandas as pd
import time
from matplotlib import pyplot
colors = pyplot.rcParams['axes.prop_cycle'].by_key()['color']
%matplotlib inline

Discrete Distribution¶

Generate sampling locations $X_0,\dots,X_{n-1} \sim \mathcal{U}[0,1]^d$

Independent Identically Distributed (IID) Points for Crude Monte Carlo (CMC)¶

In [2]:

Copied!

iid = qp.IIDStdUniform(dimension=3)
iid.gen_samples(n=4)
iid = qp.IIDStdUniform(dimension=3)
iid.gen_samples(n=4)

Out[2]:

array([[0.61584953, 0.08238197, 0.79597353],
       [0.47972561, 0.36991422, 0.4520753 ],
       [0.9705357 , 0.20129176, 0.05264096],
       [0.12821756, 0.27695067, 0.4200091 ]])

In [3]:

Copied!

iid.gen_samples(4)
iid.gen_samples(4)

Out[3]:

array([[0.75988525, 0.06535736, 0.88857573],
       [0.93750365, 0.82111122, 0.62840155],
       [0.73705715, 0.22116385, 0.86364701],
       [0.76327139, 0.67695714, 0.73070693]])

In [4]:

Copied!

iid
iid

Out[4]:

IIDStdUniform (AbstractIIDDiscreteDistribution)
    d               3
    replications    1
    entropy         255445230289856938445909338179220309576

Low Discrepancy (LD) Points for Quasi-Monte Carlo (QMC)¶

In [5]:

Copied!

ld_lattice = qp.Lattice(3)
ld_lattice.gen_samples(4)
ld_lattice = qp.Lattice(3)
ld_lattice.gen_samples(4)

Out[5]:

array([[0.7323192 , 0.47466547, 0.80517287],
       [0.2323192 , 0.97466547, 0.30517287],
       [0.9823192 , 0.22466547, 0.55517287],
       [0.4823192 , 0.72466547, 0.05517287]])

In [6]:

Copied!

ld_lattice.gen_samples(4)
ld_lattice.gen_samples(4)

Out[6]:

array([[0.7323192 , 0.47466547, 0.80517287],
       [0.2323192 , 0.97466547, 0.30517287],
       [0.9823192 , 0.22466547, 0.55517287],
       [0.4823192 , 0.72466547, 0.05517287]])

In [7]:

Copied!

ld_lattice.gen_samples(n_min=2,n_max=4)
ld_lattice.gen_samples(n_min=2,n_max=4)

Out[7]:

array([[0.9823192 , 0.22466547, 0.55517287],
       [0.4823192 , 0.72466547, 0.05517287]])

In [8]:

Copied!

ld_lattice
ld_lattice

Out[8]:

Lattice (AbstractLDDiscreteDistribution)
    d               3
    replications    1
    randomize       SHIFT
    gen_vec_source  kuo.lattice-33002-1024-1048576.9125.txt
    order           RADICAL INVERSE
    n_limit         2^(20)
    entropy         218929209270440955509681448220425620652

Visuals¶

IID vs LD Points¶

In [9]:

Copied!





n = 2**7 # Lattice and Digital Net prefer powers of 2 sample sizes
discrete_distribs = {
    'IID': qp.IIDStdUniform(2),
    'LD Lattice': qp.Lattice(2),
    'LD Digital Net': qp.DigitalNetB2(2),
    'LD Halton': qp.Halton(2)}
fig,ax = pyplot.subplots(nrows=1,ncols=len(discrete_distribs),figsize=(3*len(discrete_distribs),3))
ax = np.atleast_1d(ax)
for i,(name,discrete_distrib) in enumerate(discrete_distribs.items()):
    x = discrete_distrib.gen_samples(n)
    ax[i].scatter(x[:,0],x[:,1],s=5,color=colors[i])
    ax[i].set_title(name)
    ax[i].set_aspect(1)
    ax[i].set_xlabel(r'$X_{i0}$'); ax[i].set_ylabel(r'$X_{i1}$')
    ax[i].set_xlim([0,1]); ax[i].set_ylim([0,1])
    ax[i].set_xticks([0,1]); ax[i].set_yticks([0,1])
n = 2**7 # Lattice and Digital Net prefer powers of 2 sample sizes
discrete_distribs = {
    'IID': qp.IIDStdUniform(2),
    'LD Lattice': qp.Lattice(2),
    'LD Digital Net': qp.DigitalNetB2(2),
    'LD Halton': qp.Halton(2)}
fig,ax = pyplot.subplots(nrows=1,ncols=len(discrete_distribs),figsize=(3*len(discrete_distribs),3))
ax = np.atleast_1d(ax)
for i,(name,discrete_distrib) in enumerate(discrete_distribs.items()):
    x = discrete_distrib.gen_samples(n)
    ax[i].scatter(x[:,0],x[:,1],s=5,color=colors[i])
    ax[i].set_title(name)
    ax[i].set_aspect(1)
    ax[i].set_xlabel(r'$X_{i0}$'); ax[i].set_ylabel(r'$X_{i1}$')
    ax[i].set_xlim([0,1]); ax[i].set_ylim([0,1])
    ax[i].set_xticks([0,1]); ax[i].set_yticks([0,1])

No description has been provided for this image

LD Space Filling Extensibility¶

In [10]:

Copied!





m_min,m_max = 6,8
fig,ax = pyplot.subplots(nrows=1,ncols=len(discrete_distribs),figsize=(3*len(discrete_distribs),3))
ax = np.atleast_1d(ax)
for i,(name,discrete_distrib) in enumerate(discrete_distribs.items()):
    x = discrete_distrib.gen_samples(2**m_max)
    n_min = 0
    for m in range(m_min,m_max+1):
        n_max = 2**m
        ax[i].scatter(x[n_min:n_max,0],x[n_min:n_max,1],s=5,color=colors[m-m_min],label='n_min = %d, n_max = %d'%(n_min,n_max))
        n_min = 2**m
    ax[i].set_title(name)
    ax[i].set_aspect(1)
    ax[i].set_xlabel(r'$X_{i0}$'); ax[i].set_ylabel(r'$X_{i1}$')
    ax[i].set_xlim([0,1]); ax[i].set_ylim([0,1])
    ax[i].set_xticks([0,1]); ax[i].set_yticks([0,1])
m_min,m_max = 6,8
fig,ax = pyplot.subplots(nrows=1,ncols=len(discrete_distribs),figsize=(3*len(discrete_distribs),3))
ax = np.atleast_1d(ax)
for i,(name,discrete_distrib) in enumerate(discrete_distribs.items()):
    x = discrete_distrib.gen_samples(2**m_max)
    n_min = 0
    for m in range(m_min,m_max+1):
        n_max = 2**m
        ax[i].scatter(x[n_min:n_max,0],x[n_min:n_max,1],s=5,color=colors[m-m_min],label='n_min = %d, n_max = %d'%(n_min,n_max))
        n_min = 2**m
    ax[i].set_title(name)
    ax[i].set_aspect(1)
    ax[i].set_xlabel(r'$X_{i0}$'); ax[i].set_ylabel(r'$X_{i1}$')
    ax[i].set_xlim([0,1]); ax[i].set_ylim([0,1])
    ax[i].set_xticks([0,1]); ax[i].set_yticks([0,1])

High Dimensional Pairs Plotting¶

In [11]:

Copied!





discrete_distrib = qp.DigitalNetB2(4)
x = discrete_distrib(2**7)
d = discrete_distrib.d
assert d>=2
fig,ax = pyplot.subplots(nrows=d,ncols=d,figsize=(3*d,3*d))
for i in range(d):
    fig.delaxes(ax[i,i])
    for j in range(i):
        ax[i,j].scatter(x[:,i],x[:,j],s=5)
        fig.delaxes(ax[j,i])
        ax[i,j].set_aspect(1)
        ax[i,j].set_xlabel(r'$X_{i%d}$'%i); ax[i,j].set_ylabel(r'$X_{i%d}$'%j)
        ax[i,j].set_xlim([0,1]); ax[i,j].set_ylim([0,1])
        ax[i,j].set_xticks([0,1]); ax[i,j].set_yticks([0,1])
discrete_distrib = qp.DigitalNetB2(4)
x = discrete_distrib(2**7)
d = discrete_distrib.d
assert d>=2
fig,ax = pyplot.subplots(nrows=d,ncols=d,figsize=(3*d,3*d))
for i in range(d):
    fig.delaxes(ax[i,i])
    for j in range(i):
        ax[i,j].scatter(x[:,i],x[:,j],s=5)
        fig.delaxes(ax[j,i])
        ax[i,j].set_aspect(1)
        ax[i,j].set_xlabel(r'$X_{i%d}$'%i); ax[i,j].set_ylabel(r'$X_{i%d}$'%j)
        ax[i,j].set_xlim([0,1]); ax[i,j].set_ylim([0,1])
        ax[i,j].set_xticks([0,1]); ax[i,j].set_yticks([0,1])

True Measure¶

Define $T$, facilitate transform from original integrand $g$ to transformed integrand $f$

In [12]:

Copied!

discrete_distrib = qp.Halton(3)
true_measure = qp.Gaussian(discrete_distrib,mean=[1,2,3],covariance=[4,5,6])
true_measure.gen_samples(4)
discrete_distrib = qp.Halton(3)
true_measure = qp.Gaussian(discrete_distrib,mean=[1,2,3],covariance=[4,5,6])
true_measure.gen_samples(4)

Out[12]:

array([[ 2.18348289,  4.27924141,  1.15343988],
       [-0.37229914,  2.15929196,  4.43156846],
       [ 0.92311253, -0.56539553,  1.3713884 ],
       [ 4.69593279,  3.41246141,  4.68304706]])

In [13]:

Copied!

true_measure.gen_samples(n_min=2,n_max=4)
true_measure.gen_samples(n_min=2,n_max=4)

Out[13]:

array([[ 0.92311253, -0.56539553,  1.3713884 ],
       [ 4.69593279,  3.41246141,  4.68304706]])

In [14]:

Copied!

true_measure
true_measure

Out[14]:

Gaussian (AbstractTrueMeasure)
    mean            [1 2 3]
    covariance      [4 5 6]
    decomp_type     PCA

Visuals¶

Some True Measure Samplings¶

In [15]:

Copied!





n = 2**7
discrete_distrib = qp.DigitalNetB2(2)
true_measures = {
    'Non-Standard Uniform': qp.Uniform(discrete_distrib,lower_bound=[-3,-2],upper_bound=[3,2]),
    'Standard Gaussian': qp.Gaussian(discrete_distrib),
    'Non-Standard Gaussian': qp.Gaussian(discrete_distrib,mean=[1,2],covariance=[[5,4],[4,9]]),
    'SciPy Based\nIndependent Beta-Gamma': qp.SciPyWrapper(discrete_distrib,[scipy.stats.beta(a=1,b=5),scipy.stats.gamma(a=1)])}
fig,ax = pyplot.subplots(nrows=1,ncols=len(true_measures),figsize=(3*len(true_measures),3))
ax = np.atleast_1d(ax)
for i,(name,true_measure) in enumerate(true_measures.items()):
    t = true_measure.gen_samples(n)
    ax[i].scatter(t[:,0],t[:,1],s=5,color=colors[i])
    ax[i].set_title(name)    
n = 2**7
discrete_distrib = qp.DigitalNetB2(2)
true_measures = {
    'Non-Standard Uniform': qp.Uniform(discrete_distrib,lower_bound=[-3,-2],upper_bound=[3,2]),
    'Standard Gaussian': qp.Gaussian(discrete_distrib),
    'Non-Standard Gaussian': qp.Gaussian(discrete_distrib,mean=[1,2],covariance=[[5,4],[4,9]]),
    'SciPy Based\nIndependent Beta-Gamma': qp.SciPyWrapper(discrete_distrib,[scipy.stats.beta(a=1,b=5),scipy.stats.gamma(a=1)])}
fig,ax = pyplot.subplots(nrows=1,ncols=len(true_measures),figsize=(3*len(true_measures),3))
ax = np.atleast_1d(ax)
for i,(name,true_measure) in enumerate(true_measures.items()):
    t = true_measure.gen_samples(n)
    ax[i].scatter(t[:,0],t[:,1],s=5,color=colors[i])
    ax[i].set_title(name)    

Brownian Motion¶

In [16]:

Copied!





n = 32
discrete_distrib = qp.Lattice(365)
brownian_motions = {
    'Standard Brownian Motion': qp.BrownianMotion(discrete_distrib),
    'Drifted Brownian Motion': qp.BrownianMotion(discrete_distrib,t_final=5,initial_value=5,drift=-1,diffusion=2)}
fig,ax = pyplot.subplots(nrows=len(brownian_motions),ncols=1,figsize=(6,3*len(brownian_motions)))
ax = np.atleast_1d(ax)
for i,(name,brownian_motion) in enumerate(brownian_motions.items()):
    t = brownian_motion.gen_samples(n)
    t_w_init = np.hstack([brownian_motion.initial_value*np.ones((n,1)),t])
    tvec_w_0 = np.hstack([0,brownian_motion.time_vec])
    ax[i].plot(tvec_w_0,t_w_init.T)
    ax[i].set_xlim([tvec_w_0[0],tvec_w_0[-1]])
    ax[i].set_title(name)
n = 32
discrete_distrib = qp.Lattice(365)
brownian_motions = {
    'Standard Brownian Motion': qp.BrownianMotion(discrete_distrib),
    'Drifted Brownian Motion': qp.BrownianMotion(discrete_distrib,t_final=5,initial_value=5,drift=-1,diffusion=2)}
fig,ax = pyplot.subplots(nrows=len(brownian_motions),ncols=1,figsize=(6,3*len(brownian_motions)))
ax = np.atleast_1d(ax)
for i,(name,brownian_motion) in enumerate(brownian_motions.items()):
    t = brownian_motion.gen_samples(n)
    t_w_init = np.hstack([brownian_motion.initial_value*np.ones((n,1)),t])
    tvec_w_0 = np.hstack([0,brownian_motion.time_vec])
    ax[i].plot(tvec_w_0,t_w_init.T)
    ax[i].set_xlim([tvec_w_0[0],tvec_w_0[-1]])
    ax[i].set_title(name)

Integrand¶

Define original integrand $g$, store transformed integrand $f$

Wrap your Function into QMCPy¶

Our simple example $$g(T) = T_0+T_1+\dots+T_{d-1}, \qquad T \sim \mathcal{N}(0,I_d)$$ $$f(X) = g(\Phi^{-1}(X)), \qquad \Phi \text{ standard normal CDF}$$ $$\mathbb{E}[f(X)] = \mathbb{E}[g(T)] = 0$$

In [17]:

Copied!





def myfun(t): # define g, the ORIGINAL integrand 
    # t an (n,d) shaped np.ndarray of sample from the ORIGINAL (true) measure
    y = t.sum(1)
    return y # an (n,) shaped np.ndarray
true_measure = qp.Gaussian(qp.Halton(5)) # LD Halton discrete distrib for QMC problem
qp_myfun = qp.CustomFun(true_measure,myfun,parallel=False)
def myfun(t): # define g, the ORIGINAL integrand 
    # t an (n,d) shaped np.ndarray of sample from the ORIGINAL (true) measure
    y = t.sum(1)
    return y # an (n,) shaped np.ndarray
true_measure = qp.Gaussian(qp.Halton(5)) # LD Halton discrete distrib for QMC problem
qp_myfun = qp.CustomFun(true_measure,myfun,parallel=False)

Evalute the Automatically Transformed Integrand¶

In [18]:

Copied!

x = qp_myfun.discrete_distrib.gen_samples(4) # samples from the TRANSFORMED measure
y = qp_myfun.f(x) # evaluate the TRANSFORMED integrand at the TRANSFORMED samples
y
x = qp_myfun.discrete_distrib.gen_samples(4) # samples from the TRANSFORMED measure
y = qp_myfun.f(x) # evaluate the TRANSFORMED integrand at the TRANSFORMED samples
y

Out[18]:

array([1.80203064, 1.41177199, 2.76302476, 1.46425731])

Manual QMC Approximation¶

Note that when doing importance sampling the below doesn't work. In that case we need to take a specially weighted sum instead instead of the equally weighted sum as done below.

In [19]:

Copied!





x = qp_myfun.discrete_distrib.gen_samples(2**16) # samples from the TRANSFORMED measure
y = qp_myfun.f(x) # evaluate the TRANSFORMED integrand at the TRANSFORMED samples
mu_hat = y.mean()
mu_hat
x = qp_myfun.discrete_distrib.gen_samples(2**16) # samples from the TRANSFORMED measure
y = qp_myfun.f(x) # evaluate the TRANSFORMED integrand at the TRANSFORMED samples
mu_hat = y.mean()
mu_hat

Out[19]:

np.float64(-7.210079451455405e-07)

Predefined Integrands¶

Many more integrands detailed at https://qmcpy.readthedocs.io/en/master/algorithms.html#integrand-class

Integrands contain their true measure definition, so the user only needs to pass in a sampler. Samplers are often just discrete distributions.

In [20]:

Copied!





asian_option = qp.FinancialOption(
    sampler = qp.DigitalNetB2(52),
    option = "ASIAN",
    volatility = 1/2,
    start_price = 30,
    strike_price = 35,
    interest_rate = 0.001,
    t_final = 1,
    call_put = 'call',
    asian_mean = 'arithmetic')
x = asian_option.discrete_distrib.gen_samples(2**16)
y = asian_option.f(x)
mu_hat = y.mean()
mu_hat
asian_option = qp.FinancialOption(
    sampler = qp.DigitalNetB2(52),
    option = "ASIAN",
    volatility = 1/2,
    start_price = 30,
    strike_price = 35,
    interest_rate = 0.001,
    t_final = 1,
    call_put = 'call',
    asian_mean = 'arithmetic')
x = asian_option.discrete_distrib.gen_samples(2**16)
y = asian_option.f(x)
mu_hat = y.mean()
mu_hat

Out[20]:

np.float64(1.7888486351025943)

Visual Transformation¶

In [21]:

Copied!





n = 32
keister = qp.Keister(qp.DigitalNetB2(1))
fig,ax = pyplot.subplots(nrows=1,ncols=2,figsize=(8,4))
x = keister.discrete_distrib.gen_samples(n)
t = keister.true_measure.gen_samples(n)
f_of_x = keister.f(x).squeeze()
g_of_t = keister.g(t).squeeze()
assert (f_of_x==g_of_t).all()
x_fine = np.linspace(0,1,257)[1:-1,None]
f_of_xfine = keister.f(x_fine).squeeze()
lb = 1.2*max(abs(t.min()),abs(t.max()))
t_fine = np.linspace(-lb,lb,257)[:,None]
g_of_tfine = keister.g(t_fine).squeeze()
ax[0].set_title(r'Original')
ax[0].set_xlabel(r'$T_i$'); ax[0].set_ylabel(r'$g(T_i) = g(\Phi^{-1}(X_i))$')
ax[0].plot(t_fine.squeeze(),g_of_tfine,color=colors[0],alpha=.5)
ax[0].scatter(t.squeeze(),f_of_x,s=10,color='k')
ax[1].set_title(r'Transformed')
ax[1].set_xlabel(r'$X_i$'); ax[1].set_ylabel(r'$f(X_i)$')
ax[1].scatter(x.squeeze(),f_of_x,s=10,color='k')
ax[1].plot(x_fine.squeeze(),f_of_xfine,color=colors[1],alpha=.5);
n = 32
keister = qp.Keister(qp.DigitalNetB2(1))
fig,ax = pyplot.subplots(nrows=1,ncols=2,figsize=(8,4))
x = keister.discrete_distrib.gen_samples(n)
t = keister.true_measure.gen_samples(n)
f_of_x = keister.f(x).squeeze()
g_of_t = keister.g(t).squeeze()
assert (f_of_x==g_of_t).all()
x_fine = np.linspace(0,1,257)[1:-1,None]
f_of_xfine = keister.f(x_fine).squeeze()
lb = 1.2*max(abs(t.min()),abs(t.max()))
t_fine = np.linspace(-lb,lb,257)[:,None]
g_of_tfine = keister.g(t_fine).squeeze()
ax[0].set_title(r'Original')
ax[0].set_xlabel(r'$T_i$'); ax[0].set_ylabel(r'$g(T_i) = g(\Phi^{-1}(X_i))$')
ax[0].plot(t_fine.squeeze(),g_of_tfine,color=colors[0],alpha=.5)
ax[0].scatter(t.squeeze(),f_of_x,s=10,color='k')
ax[1].set_title(r'Transformed')
ax[1].set_xlabel(r'$X_i$'); ax[1].set_ylabel(r'$f(X_i)$')
ax[1].scatter(x.squeeze(),f_of_x,s=10,color='k')
ax[1].plot(x_fine.squeeze(),f_of_xfine,color=colors[1],alpha=.5);

Stopping Criterion¶

Adaptively increase $n$ until $\lvert \mu - \hat{\mu} \rvert < \varepsilon$ where $\varepsilon$ is a user defined tolerance.

The stopping criterion should match the discrete distribution e.g. IID CMC stopping criterion for IID points, QMC Lattice stopping criterion for LD Lattice points, QMC digital net stopping criterion for LD digital net points, etc.

IID CMC Algorithm¶

In [22]:

Copied!





problem_cmc = qp.FinancialOption(qp.IIDStdUniform(52),option="ASIAN")
cmc_stop_crit = qp.CubMCG(problem_cmc,abs_tol=0.025)
approx_cmc,data_cmc = cmc_stop_crit.integrate()
data_cmc
problem_cmc = qp.FinancialOption(qp.IIDStdUniform(52),option="ASIAN")
cmc_stop_crit = qp.CubMCG(problem_cmc,abs_tol=0.025)
approx_cmc,data_cmc = cmc_stop_crit.integrate()
data_cmc

Out[22]:

Data (Data)
    solution        1.791
    bound_low       1.766
    bound_high      1.816
    bound_diff      0.050
    n_total         492693
    time_integrate  1.217
CubMCG (AbstractStoppingCriterion)
    abs_tol         0.025
    rel_tol         0
    n_init          2^(10)
    n_limit         2^(30)
    inflate         1.200
    alpha           0.010
    kurtmax         1.478
FinancialOption (AbstractIntegrand)
    option          ASIAN
    call_put        CALL
    volatility      2^(-1)
    start_price     30
    strike_price    35
    interest_rate   0
    t_final         1
    asian_mean      ARITHMETIC
BrownianMotion (AbstractTrueMeasure)
    time_vec        [0.019 0.038 0.058 ... 0.962 0.981 1.   ]
    drift           0
    mean            [0. 0. 0. ... 0. 0. 0.]
    covariance      [[0.019 0.019 0.019 ... 0.019 0.019 0.019]
                     [0.019 0.038 0.038 ... 0.038 0.038 0.038]
                     [0.019 0.038 0.058 ... 0.058 0.058 0.058]
                     ...
                     [0.019 0.038 0.058 ... 0.962 0.962 0.962]
                     [0.019 0.038 0.058 ... 0.962 0.981 0.981]
                     [0.019 0.038 0.058 ... 0.962 0.981 1.   ]]
    decomp_type     PCA
IIDStdUniform (AbstractIIDDiscreteDistribution)
    d               52
    replications    1
    entropy         97012320886264964284917995170158594466

LD QMC Algorithm¶

In [23]:

Copied!





problem_qmc = qp.FinancialOption(qp.DigitalNetB2(52),option="ASIAN")
qmc_stop_crit = qp.CubQMCNetG(problem_qmc,abs_tol=0.025)
approx_qmc,data_qmc = qmc_stop_crit.integrate()
data_qmc
problem_qmc = qp.FinancialOption(qp.DigitalNetB2(52),option="ASIAN")
qmc_stop_crit = qp.CubQMCNetG(problem_qmc,abs_tol=0.025)
approx_qmc,data_qmc = qmc_stop_crit.integrate()
data_qmc

Out[23]:

Data (Data)
    solution        1.773
    comb_bound_low  1.749
    comb_bound_high 1.797
    comb_bound_diff 0.048
    comb_flags      1
    n_total         2^(10)
    n               2^(10)
    time_integrate  0.002
CubQMCNetG (AbstractStoppingCriterion)
    abs_tol         0.025
    rel_tol         0
    n_init          2^(10)
    n_limit         2^(35)
FinancialOption (AbstractIntegrand)
    option          ASIAN
    call_put        CALL
    volatility      2^(-1)
    start_price     30
    strike_price    35
    interest_rate   0
    t_final         1
    asian_mean      ARITHMETIC
BrownianMotion (AbstractTrueMeasure)
    time_vec        [0.019 0.038 0.058 ... 0.962 0.981 1.   ]
    drift           0
    mean            [0. 0. 0. ... 0. 0. 0.]
    covariance      [[0.019 0.019 0.019 ... 0.019 0.019 0.019]
                     [0.019 0.038 0.038 ... 0.038 0.038 0.038]
                     [0.019 0.038 0.058 ... 0.058 0.058 0.058]
                     ...
                     [0.019 0.038 0.058 ... 0.962 0.962 0.962]
                     [0.019 0.038 0.058 ... 0.962 0.981 0.981]
                     [0.019 0.038 0.058 ... 0.962 0.981 1.   ]]
    decomp_type     PCA
DigitalNetB2 (AbstractLDDiscreteDistribution)
    d               52
    replications    1
    randomize       LMS DS
    gen_mats_source joe_kuo.6.21201.txt
    order           RADICAL INVERSE
    t               63
    alpha           1
    n_limit         2^(32)
    entropy         273514699345978368463399322345138322896

In [24]:

Copied!

print('QMC took %.2f%% the time and %.2f%% the samples compared to CMC'%(
      100*data_qmc.time_integrate/data_cmc.time_integrate,100*data_qmc.n_total/data_cmc.n_total))
print('QMC took %.2f%% the time and %.2f%% the samples compared to CMC'%(
      100*data_qmc.time_integrate/data_cmc.time_integrate,100*data_qmc.n_total/data_cmc.n_total))

QMC took 0.19% the time and 0.21% the samples compared to CMC

Visual CMC vs LD¶

In [25]:

Copied!





cmc_tols = [1,.75,.5,.25,.1,.075,.05,.025]
qmc_tols = [1,.5,.1,.05,.02,.01,.005,.002,.001]
fig,ax = pyplot.subplots(nrows=1,ncols=2,figsize=(10,5))
n_cmc,time_cmc = np.zeros_like(cmc_tols),np.zeros_like(cmc_tols)
for i,cmc_tol in enumerate(cmc_tols):
    cmc_stop_crit = qp.CubMCG(qp.FinancialOption(qp.IIDStdUniform(52),option="ASIAN"),abs_tol=cmc_tol)
    approx_cmc,data_cmc = cmc_stop_crit.integrate()
    n_cmc[i],time_cmc[i] = data_cmc.n_total,data_cmc.time_integrate
ax[0].plot(cmc_tols,n_cmc,'-o',color=colors[0],label=r'CMC, $\mathcal{O}(\varepsilon^{-2})$')
ax[1].plot(cmc_tols,time_cmc,'-o',color=colors[0])
n_qmc,time_qmc = np.zeros_like(qmc_tols),np.zeros_like(qmc_tols)
for i,qmc_tol in enumerate(qmc_tols):
    qmc_stop_crit = qp.CubQMCNetG(qp.FinancialOption(qp.DigitalNetB2(52),option="ASIAN"),abs_tol=qmc_tol)
    approx_qmc,data_qmc = qmc_stop_crit.integrate()
    n_qmc[i],time_qmc[i] = data_qmc.n_total,data_qmc.time_integrate
ax[0].plot(qmc_tols[1:],n_qmc[1:],'-o',color=colors[1],label=r'QMC, $\mathcal{O}(\varepsilon^{-1})$')
ax[1].plot(qmc_tols[1:],time_qmc[1:],'-o',color=colors[1])
ax[0].set_xscale('log',base=10); ax[0].set_yscale('log',base=2)
ax[1].set_xscale('log',base=10); ax[1].set_yscale('log',base=10)
ax[0].invert_xaxis(); ax[1].invert_xaxis()
ax[0].set_xlabel(r'absolute tolerance $\varepsilon$'); ax[1].set_xlabel(r'absolute tolerance $\varepsilon$')
ax[0].set_ylabel(r'numer of samples $n$'); ax[1].set_ylabel('integration time')
ax[0].legend(loc='upper left');
cmc_tols = [1,.75,.5,.25,.1,.075,.05,.025]
qmc_tols = [1,.5,.1,.05,.02,.01,.005,.002,.001]
fig,ax = pyplot.subplots(nrows=1,ncols=2,figsize=(10,5))
n_cmc,time_cmc = np.zeros_like(cmc_tols),np.zeros_like(cmc_tols)
for i,cmc_tol in enumerate(cmc_tols):
    cmc_stop_crit = qp.CubMCG(qp.FinancialOption(qp.IIDStdUniform(52),option="ASIAN"),abs_tol=cmc_tol)
    approx_cmc,data_cmc = cmc_stop_crit.integrate()
    n_cmc[i],time_cmc[i] = data_cmc.n_total,data_cmc.time_integrate
ax[0].plot(cmc_tols,n_cmc,'-o',color=colors[0],label=r'CMC, $\mathcal{O}(\varepsilon^{-2})$')
ax[1].plot(cmc_tols,time_cmc,'-o',color=colors[0])
n_qmc,time_qmc = np.zeros_like(qmc_tols),np.zeros_like(qmc_tols)
for i,qmc_tol in enumerate(qmc_tols):
    qmc_stop_crit = qp.CubQMCNetG(qp.FinancialOption(qp.DigitalNetB2(52),option="ASIAN"),abs_tol=qmc_tol)
    approx_qmc,data_qmc = qmc_stop_crit.integrate()
    n_qmc[i],time_qmc[i] = data_qmc.n_total,data_qmc.time_integrate
ax[0].plot(qmc_tols[1:],n_qmc[1:],'-o',color=colors[1],label=r'QMC, $\mathcal{O}(\varepsilon^{-1})$')
ax[1].plot(qmc_tols[1:],time_qmc[1:],'-o',color=colors[1])
ax[0].set_xscale('log',base=10); ax[0].set_yscale('log',base=2)
ax[1].set_xscale('log',base=10); ax[1].set_yscale('log',base=10)
ax[0].invert_xaxis(); ax[1].invert_xaxis()
ax[0].set_xlabel(r'absolute tolerance $\varepsilon$'); ax[1].set_xlabel(r'absolute tolerance $\varepsilon$')
ax[0].set_ylabel(r'numer of samples $n$'); ax[1].set_ylabel('integration time')
ax[0].legend(loc='upper left');

Vectorized Stopping Criterion¶

Many more examples available at https://github.com/QMCSoftware/QMCSoftware/blob/master/demos/vectorized_qmc.ipynb

Vector of Expectations¶

As a simple example, lets compute $\mathbb{E}[\cos(T_0)\cdots\cos(T_{d-1})]$ and $\mathbb{E}[\sin(T_0)\cdots\sin(T_{d-1})]$ where $T \sim \mathcal{U}[0,\pi]^d$

In [26]:

Copied!





qmc_stop_crit = qp.CubQMCCLT(
    integrand = qp.CustomFun(
        true_measure = qp.Uniform(sampler=qp.Halton(3,replications=32),lower_bound=0,upper_bound=np.pi),
        g = lambda t: np.stack([np.cos(t).prod(-1),np.sin(t).prod(-1)],axis=0),
        dimension_indv = 2),
    abs_tol=.0001)
approx,data = qmc_stop_crit.integrate()
data
qmc_stop_crit = qp.CubQMCCLT(
    integrand = qp.CustomFun(
        true_measure = qp.Uniform(sampler=qp.Halton(3,replications=32),lower_bound=0,upper_bound=np.pi),
        g = lambda t: np.stack([np.cos(t).prod(-1),np.sin(t).prod(-1)],axis=0),
        dimension_indv = 2),
    abs_tol=.0001)
approx,data = qmc_stop_crit.integrate()
data

Out[26]:

Data (Data)
    solution        [1.006e-05 2.580e-01]
    comb_bound_low  [-5.012e-05  2.579e-01]
    comb_bound_high [7.024e-05 2.581e-01]
    comb_bound_diff [0. 0.]
    comb_flags      [ True  True]
    n_total         262144
    n               [262144 131072]
    n_rep           [8192 4096]
    time_integrate  0.348
CubQMCRepStudentT (AbstractStoppingCriterion)
    inflate         1
    alpha           0.010
    abs_tol         1.00e-04
    rel_tol         0
    n_init          2^(8)
    n_limit         2^(30)
CustomFun (AbstractIntegrand)
Uniform (AbstractTrueMeasure)
    lower_bound     0
    upper_bound     3.142
Halton (AbstractLDDiscreteDistribution)
    d               3
    replications    2^(5)
    randomize       LMS DP
    t               63
    n_limit         2^(32)
    entropy         339443966251781763705539893307306305063

Covariance¶

In a simple example, let $T \sim \mathcal{N}(1,I_d)$ and compute the covariance of $P = T_0\cdots T_{d-1}$ and $S = T_0+\dots+T_{d-1}$ so that $$\mathrm{Cov}[P,S] = \mathbb{E}[PS]-\mathbb{E}[P]\mathbb{E}[S] = \mu_0-\mu_1\mu_2$$ Theoretically we have $\mathrm{Cov}[P,S] = 2d-(1)(d) = d$

In [27]:

Copied!





class CovIntegrand(qp.integrand.Integrand):
    def __init__(self, sampler):
        self.sampler = sampler
        self.true_measure = qp.Gaussian(sampler,mean=1)
        super(CovIntegrand,self).__init__(dimension_indv=3,dimension_comb=(),parallel=False)
    def g(self, t):
        P = t.prod(1) # P
        S = t.sum(1) # S
        PS = P*S #PS
        y = np.stack([PS,P,S],axis=0)
        return y
    def bound_fun(self, low, high):
        comb_low = low[0]-max(low[1]*low[2],low[1]*high[2],high[1]*low[2],high[1]*high[2])
        comb_high = high[0]-min(low[1]*low[2],low[1]*high[2],high[1]*low[2],high[1]*high[2])
        return comb_low,comb_high
    def dependency(self, comb_flag):
        return np.tile(comb_flag,3)
approx,data = qp.CubQMCLatticeG(CovIntegrand(qp.Lattice(10)),rel_tol=.025).integrate()
data
class CovIntegrand(qp.integrand.Integrand):
    def __init__(self, sampler):
        self.sampler = sampler
        self.true_measure = qp.Gaussian(sampler,mean=1)
        super(CovIntegrand,self).__init__(dimension_indv=3,dimension_comb=(),parallel=False)
    def g(self, t):
        P = t.prod(1) # P
        S = t.sum(1) # S
        PS = P*S #PS
        y = np.stack([PS,P,S],axis=0)
        return y
    def bound_fun(self, low, high):
        comb_low = low[0]-max(low[1]*low[2],low[1]*high[2],high[1]*low[2],high[1]*high[2])
        comb_high = high[0]-min(low[1]*low[2],low[1]*high[2],high[1]*low[2],high[1]*high[2])
        return comb_low,comb_high
    def dependency(self, comb_flag):
        return np.tile(comb_flag,3)
approx,data = qp.CubQMCLatticeG(CovIntegrand(qp.Lattice(10)),rel_tol=.025).integrate()
data

Out[27]:

Data (Data)
    solution        10.074
    comb_bound_low  9.840
    comb_bound_high 10.320
    comb_bound_diff 0.480
    comb_flags      1
    n_total         2^(20)
    n               [1048576 1048576 1048576]
    time_integrate  0.686
CubQMCLatticeG (AbstractStoppingCriterion)
    abs_tol         0.010
    rel_tol         0.025
    n_init          2^(10)
    n_limit         2^(30)
CovIntegrand (AbstractIntegrand)
Gaussian (AbstractTrueMeasure)
    mean            1
    covariance      1
    decomp_type     PCA
Lattice (AbstractLDDiscreteDistribution)
    d               10
    replications    1
    randomize       SHIFT
    gen_vec_source  kuo.lattice-33002-1024-1048576.9125.txt
    order           RADICAL INVERSE
    n_limit         2^(20)
    entropy         21765430054726836764349893853647822716

Sensitiviy Indices¶

See Appendix A of Art Owen's Monte Carlo Book

In the following example, we fit a neural network to Iris flower features and try to classify the Iris species. For each set of features, the classifier provides a probability of belonging to each species, a length 3 vector. We quantify the sensitiviy of this classificaiton probability to Iris features, assuming features are uniformly distributed throughout the feature domain.

In [28]:

Copied!





from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
data = load_iris()
og_feature_names = data["feature_names"]
feature_names = [fn.replace('sepal ','S')\
    .replace('length ','L')\
    .replace('petal ','P')\
    .replace('width ','W')\
    .replace('(cm)','') for fn in og_feature_names]
target_names = data["target_names"]
xt,xv,yt,yv = train_test_split(data["data"],data["target"],
    test_size = 1/3,
    random_state = 7)
pd.DataFrame(np.hstack([data['data'],data['target'][:,None]]),columns=og_feature_names+['species']).iloc[[0,1,90,91,140,141]]
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
data = load_iris()
og_feature_names = data["feature_names"]
feature_names = [fn.replace('sepal ','S')\
    .replace('length ','L')\
    .replace('petal ','P')\
    .replace('width ','W')\
    .replace('(cm)','') for fn in og_feature_names]
target_names = data["target_names"]
xt,xv,yt,yv = train_test_split(data["data"],data["target"],
    test_size = 1/3,
    random_state = 7)
pd.DataFrame(np.hstack([data['data'],data['target'][:,None]]),columns=og_feature_names+['species']).iloc[[0,1,90,91,140,141]]

Out[28]:

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	species
0	5.1	3.5	1.4	0.2	0.0
1	4.9	3.0	1.4	0.2	0.0
90	5.5	2.6	4.4	1.2	1.0
91	6.1	3.0	4.6	1.4	1.0
140	6.7	3.1	5.6	2.4	2.0
141	6.9	3.1	5.1	2.3	2.0

In [29]:

Copied!





mlpc = MLPClassifier(random_state=7,max_iter=1024).fit(xt,yt)
yhat = mlpc.predict(xv)
print("accuracy: %.1f%%"%(100*(yv==yhat).mean()))
# accuracy: 98.0%
sampler = qp.DigitalNetB2(4,seed=7)
true_measure =  qp.Uniform(sampler,
    lower_bound = xt.min(0),
    upper_bound = xt.max(0))
fun = qp.CustomFun(
    true_measure = true_measure,
    g = lambda x: mlpc.predict_proba(x).T,
    dimension_indv = 3)
si_fun = qp.SensitivityIndices(fun,indices="all")
qmc_algo = qp.CubQMCNetG(si_fun,abs_tol=.005)
nn_sis,nn_sis_data = qmc_algo.integrate()
mlpc = MLPClassifier(random_state=7,max_iter=1024).fit(xt,yt)
yhat = mlpc.predict(xv)
print("accuracy: %.1f%%"%(100*(yv==yhat).mean()))
# accuracy: 98.0%
sampler = qp.DigitalNetB2(4,seed=7)
true_measure =  qp.Uniform(sampler,
    lower_bound = xt.min(0),
    upper_bound = xt.max(0))
fun = qp.CustomFun(
    true_measure = true_measure,
    g = lambda x: mlpc.predict_proba(x).T,
    dimension_indv = 3)
si_fun = qp.SensitivityIndices(fun,indices="all")
qmc_algo = qp.CubQMCNetG(si_fun,abs_tol=.005)
nn_sis,nn_sis_data = qmc_algo.integrate()

accuracy: 98.0%

In [30]:

Copied!





#print(nn_sis_data.flags_indv.shape)
#print(nn_sis_data.flags_comb.shape)
print('samples: 2^(%d)'%np.log2(nn_sis_data.n_total))
print('time: %.1e'%nn_sis_data.time_integrate)
print('indices:\n%s'%nn_sis_data.integrand.indices)

import pandas as pd

df_closed = pd.DataFrame(nn_sis[0],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nClosed Indices')
print(df_closed)
df_total = pd.DataFrame(nn_sis[1],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nTotal Indices')
df_closed_singletons = df_closed.loc[['[%d]'%i for i in range(4)]].T
df_closed_singletons['sum singletons'] = df_closed_singletons[['[%d]'%i for i in range(4)]].sum(1)
df_closed_singletons.columns = data['feature_names']+['sum']
df_closed_singletons = df_closed_singletons*100
df_closed_singletons
#print(nn_sis_data.flags_indv.shape)
#print(nn_sis_data.flags_comb.shape)
print('samples: 2^(%d)'%np.log2(nn_sis_data.n_total))
print('time: %.1e'%nn_sis_data.time_integrate)
print('indices:\n%s'%nn_sis_data.integrand.indices)

import pandas as pd

df_closed = pd.DataFrame(nn_sis[0],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nClosed Indices')
print(df_closed)
df_total = pd.DataFrame(nn_sis[1],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nTotal Indices')
df_closed_singletons = df_closed.loc[['[%d]'%i for i in range(4)]].T
df_closed_singletons['sum singletons'] = df_closed_singletons[['[%d]'%i for i in range(4)]].sum(1)
df_closed_singletons.columns = data['feature_names']+['sum']
df_closed_singletons = df_closed_singletons*100
df_closed_singletons

samples: 2^(15)
time: 6.6e-01
indices:
[[ True False False False]
 [False  True False False]
 [False False  True False]
 [False False False  True]
 [ True  True False False]
 [ True False  True False]
 [ True False False  True]
 [False  True  True False]
 [False  True False  True]
 [False False  True  True]
 [ True  True  True False]
 [ True  True False  True]
 [ True False  True  True]
 [False  True  True  True]]

Closed Indices
           setosa  versicolor  virginica
[0]      0.001323    0.068645   0.077325
[1]      0.063749    0.026565   0.004784
[2]      0.713825    0.325072   0.497800
[3]      0.052967    0.025579   0.120317
[0 1]    0.063925    0.091300   0.085151
[0 2]    0.715316    0.460314   0.637738
[0 3]    0.053469    0.092601   0.205639
[1 2]    0.841655    0.431035   0.513277
[1 3]    0.110739    0.039410   0.131264
[2 3]    0.822910    0.583282   0.703142
[0 1 2]  0.843726    0.570076   0.658272
[0 1 3]  0.112798    0.104804   0.215817
[0 2 3]  0.825330    0.815263   0.945267
[1 2 3]  0.995864    0.739588   0.728499

Total Indices

Out[30]:

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	sum
setosa	0.132343	6.374860	71.382498	5.296674	83.186375
versicolor	6.864536	2.656530	32.507230	2.557911	44.586208
virginica	7.732521	0.478364	49.779978	12.031725	70.022587

In [31]:

Copied!





nindices = len(nn_sis_data.integrand.indices)
fig,ax = pyplot.subplots(figsize=(9,5))
ticks = np.arange(nindices)
width = .25
for i,(alpha,species) in enumerate(zip([.25,.5,.75],data['target_names'])):
    cvals = df_closed[species].to_numpy()
    tvals = df_total[species].to_numpy()
    ticks_i = ticks+i*width
    ax.bar(ticks_i,cvals,width=width,align='edge',color='k',alpha=alpha,label=species)
    #ax.bar(ticks_i,np.flip(tvals),width=width,align='edge',bottom=1-np.flip(tvals),color=color,alpha=.1)
ax.set_xlim([0,13+3*width])
ax.set_xticks(ticks+1.5*width)

# closed_labels = [r'$\underline{s}_{\{%s\}}$'%(','.join([r'\text{%s}'%feature_names[i] for i in idx])) for idx in nn_sis_data.integrand.indices]
closed_labels = ['\n'.join([feature_names[i] for i in np.where(idx)[0]]) for idx in nn_sis_data.integrand.indices]
ax.set_xticklabels(closed_labels,rotation=0)
ax.set_ylim([0,1]); ax.set_yticks([0,1])
ax.grid(False)
for spine in ['top','right','bottom']: ax.spines[spine].set_visible(False)
ax.legend(frameon=False,loc='lower center',bbox_to_anchor=(.5,-.2),ncol=3);
fig.tight_layout()
nindices = len(nn_sis_data.integrand.indices)
fig,ax = pyplot.subplots(figsize=(9,5))
ticks = np.arange(nindices)
width = .25
for i,(alpha,species) in enumerate(zip([.25,.5,.75],data['target_names'])):
    cvals = df_closed[species].to_numpy()
    tvals = df_total[species].to_numpy()
    ticks_i = ticks+i*width
    ax.bar(ticks_i,cvals,width=width,align='edge',color='k',alpha=alpha,label=species)
    #ax.bar(ticks_i,np.flip(tvals),width=width,align='edge',bottom=1-np.flip(tvals),color=color,alpha=.1)
ax.set_xlim([0,13+3*width])
ax.set_xticks(ticks+1.5*width)

# closed_labels = [r'$\underline{s}_{\{%s\}}$'%(','.join([r'\text{%s}'%feature_names[i] for i in idx])) for idx in nn_sis_data.integrand.indices]
closed_labels = ['\n'.join([feature_names[i] for i in np.where(idx)[0]]) for idx in nn_sis_data.integrand.indices]
ax.set_xticklabels(closed_labels,rotation=0)
ax.set_ylim([0,1]); ax.set_yticks([0,1])
ax.grid(False)
for spine in ['top','right','bottom']: ax.spines[spine].set_visible(False)
ax.legend(frameon=False,loc='lower center',bbox_to_anchor=(.5,-.2),ncol=3);
fig.tight_layout()

In [ ]:

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	species
0	5.1	3.5	1.4	0.2	0.0
1	4.9	3.0	1.4	0.2	0.0
90	5.5	2.6	4.4	1.2	1.0
91	6.1	3.0	4.6	1.4	1.0
140	6.7	3.1	5.6	2.4	2.0
141	6.9	3.1	5.1	2.3	2.0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	species
0	5.1	3.5	1.4	0.2	0.0
1	4.9	3.0	1.4	0.2	0.0
90	5.5	2.6	4.4	1.2	1.0
91	6.1	3.0	4.6	1.4	1.0
140	6.7	3.1	5.6	2.4	2.0
141	6.9	3.1	5.1	2.3	2.0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	species
0	5.1	3.5	1.4	0.2	0.0
1	4.9	3.0	1.4	0.2	0.0
90	5.5	2.6	4.4	1.2	1.0
91	6.1	3.0	4.6	1.4	1.0
140	6.7	3.1	5.6	2.4	2.0
141	6.9	3.1	5.1	2.3	2.0