Monte Carlo for Vector Functions of Integrals¶
Demo Accompanying Aleksei Sorokin's PyData Chicago 2023 Talk
Monte Carlo Problem¶
$$\text{True Mean} = \mu = \mathbb{E}[g(T)] = \mathbb{E}[f(X)] = \int_{[0,1]^d} f(x) \mathrm{d} x \approx \frac{1}{n} \sum_{i=0}^{n-1} f(X_i) = \hat{\mu} = \text{Sample Mean}$$
- $T$, original measure on $\mathcal{T}$
- $g: \mathcal{T} \to \mathbb{R}$, original integrand
- $X \sim \mathcal{U}[0,1]^d$, transformed measure
- $f: [0,1]^d \to \mathbb{R}$, transformed integrand
Python Setup¶
import qmcpy as qp
import numpy as np
import scipy.stats
import pandas as pd
import time
from matplotlib import pyplot
colors = pyplot.rcParams['axes.prop_cycle'].by_key()['color']
%matplotlib inline
Discrete Distribution¶
Generate sampling locations $X_0,\dots,X_{n-1} \sim \mathcal{U}[0,1]^d$
Independent Identically Distributed (IID) Points for Crude Monte Carlo (CMC)¶
iid = qp.IIDStdUniform(dimension=3)
iid.gen_samples(n=4)
array([[0.61584953, 0.08238197, 0.79597353], [0.47972561, 0.36991422, 0.4520753 ], [0.9705357 , 0.20129176, 0.05264096], [0.12821756, 0.27695067, 0.4200091 ]])
iid.gen_samples(4)
array([[0.75988525, 0.06535736, 0.88857573], [0.93750365, 0.82111122, 0.62840155], [0.73705715, 0.22116385, 0.86364701], [0.76327139, 0.67695714, 0.73070693]])
iid
IIDStdUniform (AbstractIIDDiscreteDistribution) d 3 replications 1 entropy 255445230289856938445909338179220309576
Low Discrepancy (LD) Points for Quasi-Monte Carlo (QMC)¶
ld_lattice = qp.Lattice(3)
ld_lattice.gen_samples(4)
array([[0.7323192 , 0.47466547, 0.80517287], [0.2323192 , 0.97466547, 0.30517287], [0.9823192 , 0.22466547, 0.55517287], [0.4823192 , 0.72466547, 0.05517287]])
ld_lattice.gen_samples(4)
array([[0.7323192 , 0.47466547, 0.80517287], [0.2323192 , 0.97466547, 0.30517287], [0.9823192 , 0.22466547, 0.55517287], [0.4823192 , 0.72466547, 0.05517287]])
ld_lattice.gen_samples(n_min=2,n_max=4)
array([[0.9823192 , 0.22466547, 0.55517287], [0.4823192 , 0.72466547, 0.05517287]])
ld_lattice
Lattice (AbstractLDDiscreteDistribution) d 3 replications 1 randomize SHIFT gen_vec_source kuo.lattice-33002-1024-1048576.9125.txt order RADICAL INVERSE n_limit 2^(20) entropy 218929209270440955509681448220425620652
Visuals¶
IID vs LD Points¶
n = 2**7 # Lattice and Digital Net prefer powers of 2 sample sizes
discrete_distribs = {
'IID': qp.IIDStdUniform(2),
'LD Lattice': qp.Lattice(2),
'LD Digital Net': qp.DigitalNetB2(2),
'LD Halton': qp.Halton(2)}
fig,ax = pyplot.subplots(nrows=1,ncols=len(discrete_distribs),figsize=(3*len(discrete_distribs),3))
ax = np.atleast_1d(ax)
for i,(name,discrete_distrib) in enumerate(discrete_distribs.items()):
x = discrete_distrib.gen_samples(n)
ax[i].scatter(x[:,0],x[:,1],s=5,color=colors[i])
ax[i].set_title(name)
ax[i].set_aspect(1)
ax[i].set_xlabel(r'$X_{i0}$'); ax[i].set_ylabel(r'$X_{i1}$')
ax[i].set_xlim([0,1]); ax[i].set_ylim([0,1])
ax[i].set_xticks([0,1]); ax[i].set_yticks([0,1])
LD Space Filling Extensibility¶
m_min,m_max = 6,8
fig,ax = pyplot.subplots(nrows=1,ncols=len(discrete_distribs),figsize=(3*len(discrete_distribs),3))
ax = np.atleast_1d(ax)
for i,(name,discrete_distrib) in enumerate(discrete_distribs.items()):
x = discrete_distrib.gen_samples(2**m_max)
n_min = 0
for m in range(m_min,m_max+1):
n_max = 2**m
ax[i].scatter(x[n_min:n_max,0],x[n_min:n_max,1],s=5,color=colors[m-m_min],label='n_min = %d, n_max = %d'%(n_min,n_max))
n_min = 2**m
ax[i].set_title(name)
ax[i].set_aspect(1)
ax[i].set_xlabel(r'$X_{i0}$'); ax[i].set_ylabel(r'$X_{i1}$')
ax[i].set_xlim([0,1]); ax[i].set_ylim([0,1])
ax[i].set_xticks([0,1]); ax[i].set_yticks([0,1])
High Dimensional Pairs Plotting¶
discrete_distrib = qp.DigitalNetB2(4)
x = discrete_distrib(2**7)
d = discrete_distrib.d
assert d>=2
fig,ax = pyplot.subplots(nrows=d,ncols=d,figsize=(3*d,3*d))
for i in range(d):
fig.delaxes(ax[i,i])
for j in range(i):
ax[i,j].scatter(x[:,i],x[:,j],s=5)
fig.delaxes(ax[j,i])
ax[i,j].set_aspect(1)
ax[i,j].set_xlabel(r'$X_{i%d}$'%i); ax[i,j].set_ylabel(r'$X_{i%d}$'%j)
ax[i,j].set_xlim([0,1]); ax[i,j].set_ylim([0,1])
ax[i,j].set_xticks([0,1]); ax[i,j].set_yticks([0,1])
True Measure¶
Define $T$, facilitate transform from original integrand $g$ to transformed integrand $f$
discrete_distrib = qp.Halton(3)
true_measure = qp.Gaussian(discrete_distrib,mean=[1,2,3],covariance=[4,5,6])
true_measure.gen_samples(4)
array([[ 2.18348289, 4.27924141, 1.15343988], [-0.37229914, 2.15929196, 4.43156846], [ 0.92311253, -0.56539553, 1.3713884 ], [ 4.69593279, 3.41246141, 4.68304706]])
true_measure.gen_samples(n_min=2,n_max=4)
array([[ 0.92311253, -0.56539553, 1.3713884 ], [ 4.69593279, 3.41246141, 4.68304706]])
true_measure
Gaussian (AbstractTrueMeasure) mean [1 2 3] covariance [4 5 6] decomp_type PCA
n = 2**7
discrete_distrib = qp.DigitalNetB2(2)
true_measures = {
'Non-Standard Uniform': qp.Uniform(discrete_distrib,lower_bound=[-3,-2],upper_bound=[3,2]),
'Standard Gaussian': qp.Gaussian(discrete_distrib),
'Non-Standard Gaussian': qp.Gaussian(discrete_distrib,mean=[1,2],covariance=[[5,4],[4,9]]),
'SciPy Based\nIndependent Beta-Gamma': qp.SciPyWrapper(discrete_distrib,[scipy.stats.beta(a=1,b=5),scipy.stats.gamma(a=1)])}
fig,ax = pyplot.subplots(nrows=1,ncols=len(true_measures),figsize=(3*len(true_measures),3))
ax = np.atleast_1d(ax)
for i,(name,true_measure) in enumerate(true_measures.items()):
t = true_measure.gen_samples(n)
ax[i].scatter(t[:,0],t[:,1],s=5,color=colors[i])
ax[i].set_title(name)
Brownian Motion¶
n = 32
discrete_distrib = qp.Lattice(365)
brownian_motions = {
'Standard Brownian Motion': qp.BrownianMotion(discrete_distrib),
'Drifted Brownian Motion': qp.BrownianMotion(discrete_distrib,t_final=5,initial_value=5,drift=-1,diffusion=2)}
fig,ax = pyplot.subplots(nrows=len(brownian_motions),ncols=1,figsize=(6,3*len(brownian_motions)))
ax = np.atleast_1d(ax)
for i,(name,brownian_motion) in enumerate(brownian_motions.items()):
t = brownian_motion.gen_samples(n)
t_w_init = np.hstack([brownian_motion.initial_value*np.ones((n,1)),t])
tvec_w_0 = np.hstack([0,brownian_motion.time_vec])
ax[i].plot(tvec_w_0,t_w_init.T)
ax[i].set_xlim([tvec_w_0[0],tvec_w_0[-1]])
ax[i].set_title(name)
Integrand¶
Define original integrand $g$, store transformed integrand $f$
Wrap your Function into QMCPy¶
Our simple example $$g(T) = T_0+T_1+\dots+T_{d-1}, \qquad T \sim \mathcal{N}(0,I_d)$$ $$f(X) = g(\Phi^{-1}(X)), \qquad \Phi \text{ standard normal CDF}$$ $$\mathbb{E}[f(X)] = \mathbb{E}[g(T)] = 0$$
def myfun(t): # define g, the ORIGINAL integrand
# t an (n,d) shaped np.ndarray of sample from the ORIGINAL (true) measure
y = t.sum(1)
return y # an (n,) shaped np.ndarray
true_measure = qp.Gaussian(qp.Halton(5)) # LD Halton discrete distrib for QMC problem
qp_myfun = qp.CustomFun(true_measure,myfun,parallel=False)
Evalute the Automatically Transformed Integrand¶
x = qp_myfun.discrete_distrib.gen_samples(4) # samples from the TRANSFORMED measure
y = qp_myfun.f(x) # evaluate the TRANSFORMED integrand at the TRANSFORMED samples
y
array([1.80203064, 1.41177199, 2.76302476, 1.46425731])
Manual QMC Approximation¶
Note that when doing importance sampling the below doesn't work. In that case we need to take a specially weighted sum instead instead of the equally weighted sum as done below.
x = qp_myfun.discrete_distrib.gen_samples(2**16) # samples from the TRANSFORMED measure
y = qp_myfun.f(x) # evaluate the TRANSFORMED integrand at the TRANSFORMED samples
mu_hat = y.mean()
mu_hat
np.float64(-7.210079451455405e-07)
Predefined Integrands¶
Many more integrands detailed at https://qmcpy.readthedocs.io/en/master/algorithms.html#integrand-class
Integrands contain their true measure definition, so the user only needs to pass in a sampler. Samplers are often just discrete distributions.
asian_option = qp.FinancialOption(
sampler = qp.DigitalNetB2(52),
option = "ASIAN",
volatility = 1/2,
start_price = 30,
strike_price = 35,
interest_rate = 0.001,
t_final = 1,
call_put = 'call',
asian_mean = 'arithmetic')
x = asian_option.discrete_distrib.gen_samples(2**16)
y = asian_option.f(x)
mu_hat = y.mean()
mu_hat
np.float64(1.7888486351025943)
Visual Transformation¶
n = 32
keister = qp.Keister(qp.DigitalNetB2(1))
fig,ax = pyplot.subplots(nrows=1,ncols=2,figsize=(8,4))
x = keister.discrete_distrib.gen_samples(n)
t = keister.true_measure.gen_samples(n)
f_of_x = keister.f(x).squeeze()
g_of_t = keister.g(t).squeeze()
assert (f_of_x==g_of_t).all()
x_fine = np.linspace(0,1,257)[1:-1,None]
f_of_xfine = keister.f(x_fine).squeeze()
lb = 1.2*max(abs(t.min()),abs(t.max()))
t_fine = np.linspace(-lb,lb,257)[:,None]
g_of_tfine = keister.g(t_fine).squeeze()
ax[0].set_title(r'Original')
ax[0].set_xlabel(r'$T_i$'); ax[0].set_ylabel(r'$g(T_i) = g(\Phi^{-1}(X_i))$')
ax[0].plot(t_fine.squeeze(),g_of_tfine,color=colors[0],alpha=.5)
ax[0].scatter(t.squeeze(),f_of_x,s=10,color='k')
ax[1].set_title(r'Transformed')
ax[1].set_xlabel(r'$X_i$'); ax[1].set_ylabel(r'$f(X_i)$')
ax[1].scatter(x.squeeze(),f_of_x,s=10,color='k')
ax[1].plot(x_fine.squeeze(),f_of_xfine,color=colors[1],alpha=.5);
Stopping Criterion¶
Adaptively increase $n$ until $\lvert \mu - \hat{\mu} \rvert < \varepsilon$ where $\varepsilon$ is a user defined tolerance.
The stopping criterion should match the discrete distribution e.g. IID CMC stopping criterion for IID points, QMC Lattice stopping criterion for LD Lattice points, QMC digital net stopping criterion for LD digital net points, etc.
IID CMC Algorithm¶
problem_cmc = qp.FinancialOption(qp.IIDStdUniform(52),option="ASIAN")
cmc_stop_crit = qp.CubMCG(problem_cmc,abs_tol=0.025)
approx_cmc,data_cmc = cmc_stop_crit.integrate()
data_cmc
Data (Data) solution 1.791 bound_low 1.766 bound_high 1.816 bound_diff 0.050 n_total 492693 time_integrate 1.217 CubMCG (AbstractStoppingCriterion) abs_tol 0.025 rel_tol 0 n_init 2^(10) n_limit 2^(30) inflate 1.200 alpha 0.010 kurtmax 1.478 FinancialOption (AbstractIntegrand) option ASIAN call_put CALL volatility 2^(-1) start_price 30 strike_price 35 interest_rate 0 t_final 1 asian_mean ARITHMETIC BrownianMotion (AbstractTrueMeasure) time_vec [0.019 0.038 0.058 ... 0.962 0.981 1. ] drift 0 mean [0. 0. 0. ... 0. 0. 0.] covariance [[0.019 0.019 0.019 ... 0.019 0.019 0.019] [0.019 0.038 0.038 ... 0.038 0.038 0.038] [0.019 0.038 0.058 ... 0.058 0.058 0.058] ... [0.019 0.038 0.058 ... 0.962 0.962 0.962] [0.019 0.038 0.058 ... 0.962 0.981 0.981] [0.019 0.038 0.058 ... 0.962 0.981 1. ]] decomp_type PCA IIDStdUniform (AbstractIIDDiscreteDistribution) d 52 replications 1 entropy 97012320886264964284917995170158594466
LD QMC Algorithm¶
problem_qmc = qp.FinancialOption(qp.DigitalNetB2(52),option="ASIAN")
qmc_stop_crit = qp.CubQMCNetG(problem_qmc,abs_tol=0.025)
approx_qmc,data_qmc = qmc_stop_crit.integrate()
data_qmc
Data (Data) solution 1.773 comb_bound_low 1.749 comb_bound_high 1.797 comb_bound_diff 0.048 comb_flags 1 n_total 2^(10) n 2^(10) time_integrate 0.002 CubQMCNetG (AbstractStoppingCriterion) abs_tol 0.025 rel_tol 0 n_init 2^(10) n_limit 2^(35) FinancialOption (AbstractIntegrand) option ASIAN call_put CALL volatility 2^(-1) start_price 30 strike_price 35 interest_rate 0 t_final 1 asian_mean ARITHMETIC BrownianMotion (AbstractTrueMeasure) time_vec [0.019 0.038 0.058 ... 0.962 0.981 1. ] drift 0 mean [0. 0. 0. ... 0. 0. 0.] covariance [[0.019 0.019 0.019 ... 0.019 0.019 0.019] [0.019 0.038 0.038 ... 0.038 0.038 0.038] [0.019 0.038 0.058 ... 0.058 0.058 0.058] ... [0.019 0.038 0.058 ... 0.962 0.962 0.962] [0.019 0.038 0.058 ... 0.962 0.981 0.981] [0.019 0.038 0.058 ... 0.962 0.981 1. ]] decomp_type PCA DigitalNetB2 (AbstractLDDiscreteDistribution) d 52 replications 1 randomize LMS DS gen_mats_source joe_kuo.6.21201.txt order RADICAL INVERSE t 63 alpha 1 n_limit 2^(32) entropy 273514699345978368463399322345138322896
print('QMC took %.2f%% the time and %.2f%% the samples compared to CMC'%(
100*data_qmc.time_integrate/data_cmc.time_integrate,100*data_qmc.n_total/data_cmc.n_total))
QMC took 0.19% the time and 0.21% the samples compared to CMC
Visual CMC vs LD¶
cmc_tols = [1,.75,.5,.25,.1,.075,.05,.025]
qmc_tols = [1,.5,.1,.05,.02,.01,.005,.002,.001]
fig,ax = pyplot.subplots(nrows=1,ncols=2,figsize=(10,5))
n_cmc,time_cmc = np.zeros_like(cmc_tols),np.zeros_like(cmc_tols)
for i,cmc_tol in enumerate(cmc_tols):
cmc_stop_crit = qp.CubMCG(qp.FinancialOption(qp.IIDStdUniform(52),option="ASIAN"),abs_tol=cmc_tol)
approx_cmc,data_cmc = cmc_stop_crit.integrate()
n_cmc[i],time_cmc[i] = data_cmc.n_total,data_cmc.time_integrate
ax[0].plot(cmc_tols,n_cmc,'-o',color=colors[0],label=r'CMC, $\mathcal{O}(\varepsilon^{-2})$')
ax[1].plot(cmc_tols,time_cmc,'-o',color=colors[0])
n_qmc,time_qmc = np.zeros_like(qmc_tols),np.zeros_like(qmc_tols)
for i,qmc_tol in enumerate(qmc_tols):
qmc_stop_crit = qp.CubQMCNetG(qp.FinancialOption(qp.DigitalNetB2(52),option="ASIAN"),abs_tol=qmc_tol)
approx_qmc,data_qmc = qmc_stop_crit.integrate()
n_qmc[i],time_qmc[i] = data_qmc.n_total,data_qmc.time_integrate
ax[0].plot(qmc_tols[1:],n_qmc[1:],'-o',color=colors[1],label=r'QMC, $\mathcal{O}(\varepsilon^{-1})$')
ax[1].plot(qmc_tols[1:],time_qmc[1:],'-o',color=colors[1])
ax[0].set_xscale('log',base=10); ax[0].set_yscale('log',base=2)
ax[1].set_xscale('log',base=10); ax[1].set_yscale('log',base=10)
ax[0].invert_xaxis(); ax[1].invert_xaxis()
ax[0].set_xlabel(r'absolute tolerance $\varepsilon$'); ax[1].set_xlabel(r'absolute tolerance $\varepsilon$')
ax[0].set_ylabel(r'numer of samples $n$'); ax[1].set_ylabel('integration time')
ax[0].legend(loc='upper left');
Vectorized Stopping Criterion¶
Many more examples available at https://github.com/QMCSoftware/QMCSoftware/blob/master/demos/vectorized_qmc.ipynb
Vector of Expectations¶
As a simple example, lets compute $\mathbb{E}[\cos(T_0)\cdots\cos(T_{d-1})]$ and $\mathbb{E}[\sin(T_0)\cdots\sin(T_{d-1})]$ where $T \sim \mathcal{U}[0,\pi]^d$
qmc_stop_crit = qp.CubQMCCLT(
integrand = qp.CustomFun(
true_measure = qp.Uniform(sampler=qp.Halton(3,replications=32),lower_bound=0,upper_bound=np.pi),
g = lambda t: np.stack([np.cos(t).prod(-1),np.sin(t).prod(-1)],axis=0),
dimension_indv = 2),
abs_tol=.0001)
approx,data = qmc_stop_crit.integrate()
data
Data (Data) solution [1.006e-05 2.580e-01] comb_bound_low [-5.012e-05 2.579e-01] comb_bound_high [7.024e-05 2.581e-01] comb_bound_diff [0. 0.] comb_flags [ True True] n_total 262144 n [262144 131072] n_rep [8192 4096] time_integrate 0.348 CubQMCRepStudentT (AbstractStoppingCriterion) inflate 1 alpha 0.010 abs_tol 1.00e-04 rel_tol 0 n_init 2^(8) n_limit 2^(30) CustomFun (AbstractIntegrand) Uniform (AbstractTrueMeasure) lower_bound 0 upper_bound 3.142 Halton (AbstractLDDiscreteDistribution) d 3 replications 2^(5) randomize LMS DP t 63 n_limit 2^(32) entropy 339443966251781763705539893307306305063
Covariance¶
In a simple example, let $T \sim \mathcal{N}(1,I_d)$ and compute the covariance of $P = T_0\cdots T_{d-1}$ and $S = T_0+\dots+T_{d-1}$ so that $$\mathrm{Cov}[P,S] = \mathbb{E}[PS]-\mathbb{E}[P]\mathbb{E}[S] = \mu_0-\mu_1\mu_2$$ Theoretically we have $\mathrm{Cov}[P,S] = 2d-(1)(d) = d$
class CovIntegrand(qp.integrand.Integrand):
def __init__(self, sampler):
self.sampler = sampler
self.true_measure = qp.Gaussian(sampler,mean=1)
super(CovIntegrand,self).__init__(dimension_indv=3,dimension_comb=(),parallel=False)
def g(self, t):
P = t.prod(1) # P
S = t.sum(1) # S
PS = P*S #PS
y = np.stack([PS,P,S],axis=0)
return y
def bound_fun(self, low, high):
comb_low = low[0]-max(low[1]*low[2],low[1]*high[2],high[1]*low[2],high[1]*high[2])
comb_high = high[0]-min(low[1]*low[2],low[1]*high[2],high[1]*low[2],high[1]*high[2])
return comb_low,comb_high
def dependency(self, comb_flag):
return np.tile(comb_flag,3)
approx,data = qp.CubQMCLatticeG(CovIntegrand(qp.Lattice(10)),rel_tol=.025).integrate()
data
Data (Data) solution 10.074 comb_bound_low 9.840 comb_bound_high 10.320 comb_bound_diff 0.480 comb_flags 1 n_total 2^(20) n [1048576 1048576 1048576] time_integrate 0.686 CubQMCLatticeG (AbstractStoppingCriterion) abs_tol 0.010 rel_tol 0.025 n_init 2^(10) n_limit 2^(30) CovIntegrand (AbstractIntegrand) Gaussian (AbstractTrueMeasure) mean 1 covariance 1 decomp_type PCA Lattice (AbstractLDDiscreteDistribution) d 10 replications 1 randomize SHIFT gen_vec_source kuo.lattice-33002-1024-1048576.9125.txt order RADICAL INVERSE n_limit 2^(20) entropy 21765430054726836764349893853647822716
Sensitiviy Indices¶
See Appendix A of Art Owen's Monte Carlo Book
In the following example, we fit a neural network to Iris flower features and try to classify the Iris species. For each set of features, the classifier provides a probability of belonging to each species, a length 3 vector. We quantify the sensitiviy of this classificaiton probability to Iris features, assuming features are uniformly distributed throughout the feature domain.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
data = load_iris()
og_feature_names = data["feature_names"]
feature_names = [fn.replace('sepal ','S')\
.replace('length ','L')\
.replace('petal ','P')\
.replace('width ','W')\
.replace('(cm)','') for fn in og_feature_names]
target_names = data["target_names"]
xt,xv,yt,yv = train_test_split(data["data"],data["target"],
test_size = 1/3,
random_state = 7)
pd.DataFrame(np.hstack([data['data'],data['target'][:,None]]),columns=og_feature_names+['species']).iloc[[0,1,90,91,140,141]]
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0.0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0.0 |
90 | 5.5 | 2.6 | 4.4 | 1.2 | 1.0 |
91 | 6.1 | 3.0 | 4.6 | 1.4 | 1.0 |
140 | 6.7 | 3.1 | 5.6 | 2.4 | 2.0 |
141 | 6.9 | 3.1 | 5.1 | 2.3 | 2.0 |
mlpc = MLPClassifier(random_state=7,max_iter=1024).fit(xt,yt)
yhat = mlpc.predict(xv)
print("accuracy: %.1f%%"%(100*(yv==yhat).mean()))
# accuracy: 98.0%
sampler = qp.DigitalNetB2(4,seed=7)
true_measure = qp.Uniform(sampler,
lower_bound = xt.min(0),
upper_bound = xt.max(0))
fun = qp.CustomFun(
true_measure = true_measure,
g = lambda x: mlpc.predict_proba(x).T,
dimension_indv = 3)
si_fun = qp.SensitivityIndices(fun,indices="all")
qmc_algo = qp.CubQMCNetG(si_fun,abs_tol=.005)
nn_sis,nn_sis_data = qmc_algo.integrate()
accuracy: 98.0%
#print(nn_sis_data.flags_indv.shape)
#print(nn_sis_data.flags_comb.shape)
print('samples: 2^(%d)'%np.log2(nn_sis_data.n_total))
print('time: %.1e'%nn_sis_data.time_integrate)
print('indices:\n%s'%nn_sis_data.integrand.indices)
import pandas as pd
df_closed = pd.DataFrame(nn_sis[0],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nClosed Indices')
print(df_closed)
df_total = pd.DataFrame(nn_sis[1],columns=target_names,index=[str(np.where(idx)[0]) for idx in nn_sis_data.integrand.indices])
print('\nTotal Indices')
df_closed_singletons = df_closed.loc[['[%d]'%i for i in range(4)]].T
df_closed_singletons['sum singletons'] = df_closed_singletons[['[%d]'%i for i in range(4)]].sum(1)
df_closed_singletons.columns = data['feature_names']+['sum']
df_closed_singletons = df_closed_singletons*100
df_closed_singletons
samples: 2^(15) time: 6.6e-01 indices: [[ True False False False] [False True False False] [False False True False] [False False False True] [ True True False False] [ True False True False] [ True False False True] [False True True False] [False True False True] [False False True True] [ True True True False] [ True True False True] [ True False True True] [False True True True]] Closed Indices setosa versicolor virginica [0] 0.001323 0.068645 0.077325 [1] 0.063749 0.026565 0.004784 [2] 0.713825 0.325072 0.497800 [3] 0.052967 0.025579 0.120317 [0 1] 0.063925 0.091300 0.085151 [0 2] 0.715316 0.460314 0.637738 [0 3] 0.053469 0.092601 0.205639 [1 2] 0.841655 0.431035 0.513277 [1 3] 0.110739 0.039410 0.131264 [2 3] 0.822910 0.583282 0.703142 [0 1 2] 0.843726 0.570076 0.658272 [0 1 3] 0.112798 0.104804 0.215817 [0 2 3] 0.825330 0.815263 0.945267 [1 2 3] 0.995864 0.739588 0.728499 Total Indices
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | sum | |
---|---|---|---|---|---|
setosa | 0.132343 | 6.374860 | 71.382498 | 5.296674 | 83.186375 |
versicolor | 6.864536 | 2.656530 | 32.507230 | 2.557911 | 44.586208 |
virginica | 7.732521 | 0.478364 | 49.779978 | 12.031725 | 70.022587 |
nindices = len(nn_sis_data.integrand.indices)
fig,ax = pyplot.subplots(figsize=(9,5))
ticks = np.arange(nindices)
width = .25
for i,(alpha,species) in enumerate(zip([.25,.5,.75],data['target_names'])):
cvals = df_closed[species].to_numpy()
tvals = df_total[species].to_numpy()
ticks_i = ticks+i*width
ax.bar(ticks_i,cvals,width=width,align='edge',color='k',alpha=alpha,label=species)
#ax.bar(ticks_i,np.flip(tvals),width=width,align='edge',bottom=1-np.flip(tvals),color=color,alpha=.1)
ax.set_xlim([0,13+3*width])
ax.set_xticks(ticks+1.5*width)
# closed_labels = [r'$\underline{s}_{\{%s\}}$'%(','.join([r'\text{%s}'%feature_names[i] for i in idx])) for idx in nn_sis_data.integrand.indices]
closed_labels = ['\n'.join([feature_names[i] for i in np.where(idx)[0]]) for idx in nn_sis_data.integrand.indices]
ax.set_xticklabels(closed_labels,rotation=0)
ax.set_ylim([0,1]); ax.set_yticks([0,1])
ax.grid(False)
for spine in ['top','right','bottom']: ax.spines[spine].set_visible(False)
ax.legend(frameon=False,loc='lower center',bbox_to_anchor=(.5,-.2),ncol=3);
fig.tight_layout()