The simulated dataset was generated. Benjamin Chu writes: I think the main problem might be how I did the simulation. I believe it was something like
b ~ N(0, 1) # for k position of b only, others are 0 yi ~ N(xi^T * b, 1)
where yi is length 2 vector of phenotype, xi is length 10000 genotype vector (entries 0, 1 or 2), and b is length 10000 effect size vector but only k position of b is drawn from standard Normal. This might be a poor simulation model for GEMMA. Later I changed the simulation model and the divergence problem went away.
The following command finishes:
time ../../bin/gemma -bfile multivariate_2traits -k output/multivariate.cXX.txt -maf 0.0000001 -lmm 4 -n 1 2 -o gemma.polygenic.result real 133m29.533s user 133m31.746s sys 0m5.064s
But is slow. A print statement shows close to a second is spent on every SNP:
0.83 9994,10000 0.82 9995,10000 0.81 9996,10000 0.80 9997,10000 0.80 9998,10000 0.80 9999,10000
Running the debugger on and hitting Ctrl-C repeatedly stops in the following places
#0 0x00000000004807f9 in MphCalcLogL (eval=eval@entry=0x5233c0, xHiy=xHiy@entry=0x59e5d0, D_l=D_l@entry=0x5ac840, UltVehiY=UltVehiY@entry=0x53e500, Qi=Qi@entry=0x59e2e0) at src/mvlmm.cpp:579 #1 0x000000000048104b in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001, eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380, E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560, UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:662 #2 0x000000000048f924 in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380, eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3803 #3 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...) at src/gemma.cpp:2830 #0 0x00007ffff7df8690 in gsl_matrix_sub () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25 #1 0x000000000047fc91 in UpdateU (OmegaE=OmegaE@entry=0x53e4a0, UltVehiY=UltVehiY@entry=0x53e500, UltVehiBX=UltVehiBX@entry=0x53e560, UltVehiU=UltVehiU@entry=0x53e5c0) at src/mvlmm.cpp:387 #2 0x0000000000480e76 in MphEM (func_name=func_name@entry=76 'L', max_iter=1000, max_prec=0.001, eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380, E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560, UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:686 #3 0x000000000048faed in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380, eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3778 #4 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...) at src/gemma.cpp:2830 #0 0x00007ffff6300718 in dgemm_nt () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0 #1 0x00007ffff622e6f3 in cblas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0 #2 0x00007ffff7d6af44 in gsl_blas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25 #3 0x00000000004806a4 in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0, D_l=D_l@entry=0x59e5d0, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440, OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5abd30, Qi=0x5abe50, Sigma_uu=0x540fb0, Sigma_ee=0x5ac7e0) at src/mvlmm.cpp:542 #4 0x0000000000480f62 in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001, eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380, E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560, UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:704 #5 0x000000000048f924 in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380, eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3803 #6 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...) at src/gemma.cpp:2830 #0 0x00007ffff5e16f00 in __pthread_mutex_unlock_usercnt () from /gnu/store/fa6wj5bxkj5ll1d7292a70knmyl7a0cr-glibc-2.31/lib/libpthread.so.0 #1 0x00007ffff622e6fb in cblas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0 #2 0x00007ffff7d6af44 in gsl_blas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25 #3 0x000000000048067b in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0, D_l=D_l@entry=0x5ac970, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440, OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5abdb0, Qi=0x59eb10, Sigma_uu=0x5ac3e0, Sigma_ee=0x5ab290) at src/mvlmm.cpp:541 #0 0x00007ffff5e16f19 in __pthread_mutex_unlock_usercnt () from /gnu/store/fa6wj5bxkj5ll1d7292a70knmyl7a0cr-glibc-2.31/lib/libpthread.so.0 #1 0x00007ffff643d667 in blas_memory_alloc () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0 #2 0x00007ffff622e648 in cblas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0 #3 0x00007ffff7d6af44 in gsl_blas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25 #4 0x0000000000480652 in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0, D_l=D_l@entry=0x5ab1e0, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440, OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5a0990, Qi=0x59e570, Sigma_uu=0x59e2e0, Sigma_ee=0x5abe30) at src/mvlmm.cpp:539 #0 0x000000000047f8e7 in gsl_matrix_get (j=389, i=0, m=0x53e500) at /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/include/gsl/gsl_matrix_double.h:286 #1 CalcXHiY (eval=eval@entry=0x5233c0, D_l=D_l@entry=0x59f980, X=X@entry=0x540930, UltVehiY=UltVehiY@entry=0x53e500, xHiy=xHiy@entry=0x5aaf40) at src/mvlmm.cpp:349 #2 0x0000000000481033 in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001, eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380, E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560, UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:658 #3 0x000000000048f9da in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380, eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3805 #4 0x0000000000435c2e in GEMMA::BatchRun (this=this@entry=0x7fffffffe4a0, cPar=...) at src/gemma.cpp:2830 #0 0x00007ffff62ffd0b in dgemm_nn () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0 #1 0x00007ffff622e6f3 in cblas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libopenblas.so.0 #2 0x00007ffff7d6af44 in gsl_blas_dgemm () from /gnu/store/pmmbmrbizz668plrrvfyvqh5ymvjy5p4-profile/lib/libgsl.so.25 #3 0x000000000048067b in CalcSigma (func_name=func_name@entry=82 'R', eval=eval@entry=0x5233c0, D_l=D_l@entry=0x5a13f0, X=X@entry=0x540930, OmegaU=OmegaU@entry=0x53e440, OmegaE=OmegaE@entry=0x53e4a0, UltVeh=0x5a4410, Qi=0x5a8320, Sigma_uu=0x59fec0, Sigma_ee=0x5acca0) at src/mvlmm.cpp:541 #4 0x0000000000480f62 in MphEM (func_name=func_name@entry=82 'R', max_iter=1000, max_prec=0.001, eval=eval@entry=0x5233c0, X=X@entry=0x540930, Y=Y@entry=0x5408d0, U_hat=U_hat@entry=0x53e380, E_hat=0x53e3e0, OmegaU=0x53e440, OmegaE=0x53e4a0, UltVehiY=0x53e500, UltVehiBX=0x53e560, UltVehiU=0x53e5c0, UltVehiE=0x53e620, V_g=0x540990, V_e=0x540a20, B=0x540ab0) at src/mvlmm.cpp:704 #5 0x000000000048fa1d in MVLMM::AnalyzePlink (this=this@entry=0x7fffffffcf20, U=U@entry=0x523380, eval=eval@entry=0x5233c0, UtW=UtW@entry=0x5233f0, UtY=UtY@entry=0x523430) at src/mvlmm.cpp:3806
Shows time is spent mostly in one place.
A profiler should help pinpoint where time is spent. The issue submitter admits that this is a contrived edge-case so I am not going to work on it until I start optimizing mvlmm. Just a quick check with gperftools (formerly the Google profiler) which is packaged in GNU Guix:
Profiling above gemma dataset
120 7.3% 7.3% 155 9.4% CalcQi 96 5.8% 13.1% 96 5.8% dgemm_kernel_ZEN 88 5.3% 18.4% 88 5.3% __sched_yield 81 4.9% 23.3% 109 6.6% blas_memory_free 77 4.7% 28.0% 77 4.7% __pthread_mutex_unlock_usercnt 71 4.3% 32.3% 94 5.7% CalcXHiY 69 4.2% 36.5% 87 5.3% dgemm_nn 66 4.0% 40.5% 66 4.0% __pthread_mutex_lock 63 3.8% 44.3% 63 3.8% __ieee754_log_fma 59 3.6% 47.9% 179 10.9% blas_memory_alloc 58 3.5% 51.4% 58 3.5% gsl_vector_get 57 3.5% 54.9% 88 5.3% dgemm_nt 56 3.4% 58.3% 80 4.9% CalcOmega 56 3.4% 61.7% 421 25.5% cblas_dgemm 54 3.3% 64.9% 54 3.3% dgemm_beta_ZEN 51 3.1% 68.0% 537 32.6% CalcSigma 51 3.1% 71.1% 76 4.6% dsyr_thread_L 43 2.6% 73.7% 43 2.6% gsl_matrix_get 41 2.5% 76.2% 41 2.5% gsl_matrix_set 37 2.2% 78.5% 130 7.9% MphCalcLogL
CalcSigma and CalcQi are worth looking into. Also cblas_dgemm may need some attention.
For now I am dropping this until I pick up the mvlmm speedups.