The simulated dataset was generated. Benjamin Chu writes: I think the main problem might be how I did the simulation. I believe it was something like
b ~ N(0, 1) # for k position of b only, others are 0 yi ~ N(xi^T * b, 1)
where yi is length 2 vector of phenotype, xi is length 10000 genotype vector (entries 0, 1 or 2), and b is length 10000 effect size vector but only k position of b is drawn from standard Normal. This might be a poor simulation model for GEMMA. Later I changed the simulation model and the divergence problem went away.
The following command finishes:
time ../../bin/gemma -bfile multivariate_2traits -k output/multivariate.cXX.txt -maf 0.0000001 -lmm 4 -n 1 2 -o gemma.polygenic.result real 133m29.533s user 133m31.746s sys 0m5.064s
But is slow. A print statement shows close to a second is spent on every SNP:
0.83 9994,10000 0.82 9995,10000 0.81 9996,10000 0.80 9997,10000 0.80 9998,10000 0.80 9999,10000
Running the debugger on and hitting Ctrl-C repeatedly stops in the following places
#0 0x00000000004807f9 in MphCalcLogL (eval=eval@entry=0x5233c0, xHiy=xHiy@entry=0x59e5d0,
D_l=D_l@entry=0x5ac840, UltVehiY=UltVehiY@entry=0x53e500, Qi=Qi@entry=0x59e2e0) at src/mvlmm.cpp:579
#1 0x000000000048104b in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001,
eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380,
E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560,
UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:662
#2 0x000000000048f924 in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380,
eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3803
#3 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...)
at src/gemma.cpp:2830
#0 0x00007ffff7df8690 in gsl_matrix_sub ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25
#1 0x000000000047fc91 in UpdateU (OmegaE=OmegaE@entry=0x53e4a0, UltVehiY=UltVehiY@entry=0x53e500,
UltVehiBX=UltVehiBX@entry=0x53e560, UltVehiU=UltVehiU@entry=0x53e5c0) at src/mvlmm.cpp:387
#2 0x0000000000480e76 in MphEM (func_name=func_name@entry=76 'L', max_iter=1000, max_prec=0.001,
eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380,
E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560,
UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:686
#3 0x000000000048faed in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380,
eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3778
#4 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...)
at src/gemma.cpp:2830
#0 0x00007ffff6300718 in dgemm_nt ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0
#1 0x00007ffff622e6f3 in cblas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0
#2 0x00007ffff7d6af44 in gsl_blas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25
#3 0x00000000004806a4 in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0,
D_l=D_l@entry=0x59e5d0, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440,
OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5abd30, Qi=0x5abe50, Sigma_uu=0x540fb0, Sigma_ee=0x5ac7e0)
at src/mvlmm.cpp:542
#4 0x0000000000480f62 in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001,
eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380,
E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560,
UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:704
#5 0x000000000048f924 in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380,
eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3803
#6 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...)
at src/gemma.cpp:2830
#0 0x00007ffff5e16f00 in __pthread_mutex_unlock_usercnt ()
from /gnu/store/fa6wj5bxkj5ll1d7292a70knmyl7a0cr-glibc-2.31/lib/libpthread.so.0
#1 0x00007ffff622e6fb in cblas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0
#2 0x00007ffff7d6af44 in gsl_blas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25
#3 0x000000000048067b in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0,
D_l=D_l@entry=0x5ac970, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440,
OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5abdb0, Qi=0x59eb10, Sigma_uu=0x5ac3e0, Sigma_ee=0x5ab290)
at src/mvlmm.cpp:541
#0 0x00007ffff5e16f19 in __pthread_mutex_unlock_usercnt ()
from /gnu/store/fa6wj5bxkj5ll1d7292a70knmyl7a0cr-glibc-2.31/lib/libpthread.so.0
#1 0x00007ffff643d667 in blas_memory_alloc ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0
#2 0x00007ffff622e648 in cblas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0
#3 0x00007ffff7d6af44 in gsl_blas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25
#4 0x0000000000480652 in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0,
D_l=D_l@entry=0x5ab1e0, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440,
OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5a0990, Qi=0x59e570, Sigma_uu=0x59e2e0, Sigma_ee=0x5abe30)
at src/mvlmm.cpp:539
#0 0x000000000047f8e7 in gsl_matrix_get (j=389, i=0, m=0x53e500)
at /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/include/gsl/gsl_matrix_double.h:286
#1 CalcXHiY (eval=eval@entry=0x5233c0, D_l=D_l@entry=0x59f980, X=X@entry=0x540930,
UltVehiY=UltVehiY@entry=0x53e500, xHiy=xHiy@entry=0x5aaf40) at src/mvlmm.cpp:349
#2 0x0000000000481033 in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001,
eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380,
E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560,
UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:658
#3 0x000000000048f9da in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380,
eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3805
#4 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...)
at src/gemma.cpp:2830
#0 0x00007ffff62ffd0b in dgemm_nn ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0
#1 0x00007ffff622e6f3 in cblas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0
#2 0x00007ffff7d6af44 in gsl_blas_dgemm ()
from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25
#3 0x000000000048067b in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0,
D_l=D_l@entry=0x5a13f0, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440,
OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5a4410, Qi=0x5a8320, Sigma_uu=0x59fec0, Sigma_ee=0x5acca0)
at src/mvlmm.cpp:541
#4 0x0000000000480f62 in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001,
eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380,
E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560,
UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:704
#5 0x000000000048fa1d in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380,
eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3806
Shows time is spent mostly in one place.
A profiler should help pinpoint where time is spent. The issue submitter admits that this is a contrived edge-case so I am not going to work on it until I start optimizing mvlmm. Just a quick check with gperftools (formerly the Google profiler) which is packaged in GNU Guix:
Profiling above gemma dataset
120 7.3% 7.3% 155 9.4% CalcQi
96 5.8% 13.1% 96 5.8% dgemm_kernel_ZEN
88 5.3% 18.4% 88 5.3% __sched_yield
81 4.9% 23.3% 109 6.6% blas_memory_free
77 4.7% 28.0% 77 4.7% __pthread_mutex_unlock_usercnt
71 4.3% 32.3% 94 5.7% CalcXHiY
69 4.2% 36.5% 87 5.3% dgemm_nn
66 4.0% 40.5% 66 4.0% __pthread_mutex_lock
63 3.8% 44.3% 63 3.8% __ieee754_log_fma
59 3.6% 47.9% 179 10.9% blas_memory_alloc
58 3.5% 51.4% 58 3.5% gsl_vector_get
57 3.5% 54.9% 88 5.3% dgemm_nt
56 3.4% 58.3% 80 4.9% CalcOmega
56 3.4% 61.7% 421 25.5% cblas_dgemm
54 3.3% 64.9% 54 3.3% dgemm_beta_ZEN
51 3.1% 68.0% 537 32.6% CalcSigma
51 3.1% 71.1% 76 4.6% dsyr_thread_L
43 2.6% 73.7% 43 2.6% gsl_matrix_get
41 2.5% 76.2% 41 2.5% gsl_matrix_set
37 2.2% 78.5% 130 7.9% MphCalcLogL
CalcSigma and CalcQi are worth looking into. Also cblas_dgemm may need some attention.
For now I am dropping this until I pick up the mvlmm speedups.