data <- read.table("ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes_annot.txt",skip=2) a1 <- which(data[[10]]<0.05) length(a1) [1] 2596777 a2 <- which(data[[10]]>(1/2184)) a0 <- intersect(a1,a2) length(a0) [1] 2583112 summary(data[[10]][a1]) summary(data[[10]][a0]) > summary(data[[10]][a1]) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000000 0.0004579 0.0018320 0.0056190 0.0059520 0.0499100 > > summary(data[[10]][a0]) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0004579 0.0004579 0.0018320 0.0056480 0.0059520 0.0499100 a3 <- which(data[[10]]>(2/2184)) a01 <- intersect(a1,a3) length(a01) [1] 1551431 > summary(data[[10]][a01]) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.001374 0.002289 0.004579 0.008997 0.011450 0.049910 ---------------- mean(data[[10]]) [1] 0.06958777 length(data[[10]]) [1] 3201157 aT <- table(data[[10]]) > aT[1:110] 0 0.00045788 0.00091575 0.00137363 0.0018315 0.00228938 0.00274725 13665 683851 347830 216587 151177 113907 89474 0.00320513 0.003663 0.00412088 0.00457875 0.00503663 0.00549451 0.00595238 73436 62087 54158 46754 41816 37203 33126 0.00641026 0.00686813 0.00732601 0.00778388 0.00824176 0.00869963 0.00915751 30743 28254 25900 23868 21913 21174 19393 0.00961538 0.01007326 0.01053114 0.01098901 0.01144689 0.01190476 0.01236264 18823 17254 16078 15488 14623 13928 12951 0.01282051 0.01327839 0.01373626 0.01419414 0.01465201 0.01510989 0.01556777 12127 11808 11131 10708 10603 9481 9295 0.01602564 0.01648352 0.01694139 0.01739927 0.01785714 0.01831502 0.01877289 8883 8585 8050 7711 7302 7307 7201 0.01923077 0.01968865 0.02014652 0.0206044 0.02106227 0.02152015 0.02197802 6762 6495 6136 5999 5833 5692 5463 0.0224359 0.02289377 0.02335165 0.02380952 0.0242674 0.02472528 0.02518315 5753 5274 4947 4883 4538 4592 4251 0.02564103 0.0260989 0.02655678 0.02701465 0.02747253 0.0279304 0.02838828 4547 4437 4121 4135 4079 4144 3924 0.02884615 0.02930403 0.0297619 0.03021978 0.03067766 0.03113553 0.03159341 3609 3650 3406 3305 3420 3046 2973 0.03205128 0.03250916 0.03296703 0.03342491 0.03388279 0.03434066 0.03479854 2872 2991 2847 2900 2798 3063 2823 0.03525641 0.03571429 0.03617216 0.03663004 0.03708791 0.03754579 0.03800366 2893 2755 2687 2487 2443 2547 2551 0.03846154 0.03891942 0.03937729 0.03983517 0.04029304 0.04075092 0.04120879 2533 2491 2476 2381 2371 2307 2506 0.04166667 0.04212454 0.04258242 0.04304029 0.04349817 0.04395605 0.04441392 2188 2060 2107 1813 2022 1982 1818 0.0448718 0.04532967 0.04578755 0.04624542 0.0467033 0.04716117 0.04761905 1882 1861 1917 2021 1813 1823 1827 0.04807692 0.0485348 0.04899267 0.04945055 0.04990843 1807 1808 1579 1946 1714 > > aT[(2185-110+1):2185] 0.9500916 0.95054942 0.95100731 0.95146519 0.95192307 0.95238096 0.95283884 126 149 118 228 139 179 177 0.95329672 0.9537546 0.95421243 0.95467031 0.95512819 0.95558608 0.95604396 111 138 131 192 120 123 142 0.95650184 0.95695972 0.95741761 0.95787543 0.95833331 0.9587912 0.95924908 132 160 126 128 137 109 132 0.95970696 0.96016484 0.96062273 0.96108061 0.96153843 0.96199632 0.9624542 116 130 146 137 139 140 131 0.96291208 0.96336997 0.96382785 0.96428573 0.96474361 0.96520144 0.96565932 122 151 163 161 159 150 177 0.9661172 0.96657509 0.96703297 0.96749085 0.96794873 0.96840662 0.96886444 235 144 156 151 153 174 141 0.96932232 0.96978021 0.97023809 0.97069597 0.97115386 0.97161174 0.97206962 206 175 166 200 139 113 158 0.97252744 0.97298533 0.97344321 0.97390109 0.97435898 0.97481686 0.97527474 174 145 164 179 171 120 201 0.97573262 0.97619045 0.97664833 0.97710621 0.9775641 0.97802198 0.97847986 131 185 189 213 180 190 197 0.97893775 0.97939563 0.97985345 0.98031133 0.98076922 0.9812271 0.98168498 194 203 192 204 216 184 220 0.98214287 0.98260075 0.98305863 0.98351645 0.98397434 0.98443222 0.9848901 188 217 244 231 158 282 265 0.98534799 0.98580587 0.98626375 0.98672163 0.98717946 0.98763734 0.98809522 245 221 227 190 194 183 258 0.98855311 0.98901099 0.98946887 0.98992676 0.99038464 0.99084246 0.99130034 247 227 247 227 223 264 223 0.99175823 0.99221611 0.99267399 0.99313188 0.99358976 0.99404764 0.99450547 300 310 277 251 289 306 239 0.99496335 0.99542123 0.99587911 0.996337 0.99679488 0.99725276 0.99771065 326 256 301 362 311 364 442 0.99816847 0.99862635 0.99908423 0.99954212 1 417 394 484 320 1459 > sum(aT[1:110])+ sum(aT[(2185-110+1):2185]) Rare SNPs (0<=MAF<=0.05 OR 0.95<=MAF<=1.0): 2620128 Without zero and privat: sum(aT[3:110])+ sum(aT[(2185-110+1):(2185-2)]) 1920833 freqs <- names(aT) freqs[110] [1] "0.04990843" dim(aT) <- c(1,2185) colnamesAT <- 0:2184 colnames(aT) <- colnamesAT write.table(aT,file="MAFstatistics.txt") s <- 0 for (i in 3:109) { s <- s + aT[i]*(i-1) } s [1] 30994086 for (i in ((2185-109+1):(2185-2))) { s <- s + aT[i]*(2185-i) } s [1] 31978538 s <- 0 for (i in 2:2185) { s <- s + aT[i]*(i-1) } s [1] 486510822 30994086/(2184*3201157) [1] 0.004433219 31978538/(2184*3201157) [1] 0.00457403 486510822/(2184*3201157) [1] 0.06958777 ########################### SNP distances in bp: dist <- diff(data[[2]]) summary(dist) Min. 1st Qu. Median Mean 3rd Qu. Max. 0 18 45 78 94 21060000 1/45 [1] 0.02222222 1/78 [1] 0.01282051 ########################### re <- c() for (j in 0:20) { Lout <- readSamplesSpfabia(X="ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes_mat",samples=((j*100+1):(j*100+100)),lowerB=1.5,upperB=0.05*lii) for (i in 1:100) { av <- which(Lout[,i]>0.1) avd <- diff(av) re <- c(re,avd) } } Lout <- readSamplesSpfabia(X="ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes_mat",samples=2101:2184,lowerB=1.5,upperB=0.05*lii) for (i in 1:84) { av <- which(Lout[,i]>0.1) avd <- diff(av) re <- c(re,avd) } summary(re) summary(re) Min. 1st Qu. Median Mean 3rd Qu. Max. 1.00 3.00 9.00 22.29 23.00 4205.00 ########################### RESULTS: MAF: 0.06958777 MAF without 0 and privat SNPs and SNPs >0.05 MAF: 0.004433219 MAF without 0 and privat SNPs and SNPs >0.05 MAF and with change of major: 0.00457403 3201157 SNPS 13665 (0.43%) SNPs with frequency 0 486510822 minor alleles 30994086 minor alleles without 0 and private SNPs and >0.05 MAF 683851 (21.4%) private SNPs