data <- read.table("ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes_annot.txt",skip=2) mean(data[[10]]) [1] 0.06958777 length(data[[10]]) [1] 3201157 aT <- table(data[[10]]) > aT[1:110] 0 0.00045788 0.00091575 0.00137363 0.0018315 0.00228938 0.00274725 13665 683851 347830 216587 151177 113907 89474 0.00320513 0.003663 0.00412088 0.00457875 0.00503663 0.00549451 0.00595238 73436 62087 54158 46754 41816 37203 33126 0.00641026 0.00686813 0.00732601 0.00778388 0.00824176 0.00869963 0.00915751 30743 28254 25900 23868 21913 21174 19393 0.00961538 0.01007326 0.01053114 0.01098901 0.01144689 0.01190476 0.01236264 18823 17254 16078 15488 14623 13928 12951 0.01282051 0.01327839 0.01373626 0.01419414 0.01465201 0.01510989 0.01556777 12127 11808 11131 10708 10603 9481 9295 0.01602564 0.01648352 0.01694139 0.01739927 0.01785714 0.01831502 0.01877289 8883 8585 8050 7711 7302 7307 7201 0.01923077 0.01968865 0.02014652 0.0206044 0.02106227 0.02152015 0.02197802 6762 6495 6136 5999 5833 5692 5463 0.0224359 0.02289377 0.02335165 0.02380952 0.0242674 0.02472528 0.02518315 5753 5274 4947 4883 4538 4592 4251 0.02564103 0.0260989 0.02655678 0.02701465 0.02747253 0.0279304 0.02838828 4547 4437 4121 4135 4079 4144 3924 0.02884615 0.02930403 0.0297619 0.03021978 0.03067766 0.03113553 0.03159341 3609 3650 3406 3305 3420 3046 2973 0.03205128 0.03250916 0.03296703 0.03342491 0.03388279 0.03434066 0.03479854 2872 2991 2847 2900 2798 3063 2823 0.03525641 0.03571429 0.03617216 0.03663004 0.03708791 0.03754579 0.03800366 2893 2755 2687 2487 2443 2547 2551 0.03846154 0.03891942 0.03937729 0.03983517 0.04029304 0.04075092 0.04120879 2533 2491 2476 2381 2371 2307 2506 0.04166667 0.04212454 0.04258242 0.04304029 0.04349817 0.04395605 0.04441392 2188 2060 2107 1813 2022 1982 1818 0.0448718 0.04532967 0.04578755 0.04624542 0.0467033 0.04716117 0.04761905 1882 1861 1917 2021 1813 1823 1827 0.04807692 0.0485348 0.04899267 0.04945055 0.04990843 1807 1808 1579 1946 1714 > > aT[(2185-110+1):2185] 0.9500916 0.95054942 0.95100731 0.95146519 0.95192307 0.95238096 0.95283884 126 149 118 228 139 179 177 0.95329672 0.9537546 0.95421243 0.95467031 0.95512819 0.95558608 0.95604396 111 138 131 192 120 123 142 0.95650184 0.95695972 0.95741761 0.95787543 0.95833331 0.9587912 0.95924908 132 160 126 128 137 109 132 0.95970696 0.96016484 0.96062273 0.96108061 0.96153843 0.96199632 0.9624542 116 130 146 137 139 140 131 0.96291208 0.96336997 0.96382785 0.96428573 0.96474361 0.96520144 0.96565932 122 151 163 161 159 150 177 0.9661172 0.96657509 0.96703297 0.96749085 0.96794873 0.96840662 0.96886444 235 144 156 151 153 174 141 0.96932232 0.96978021 0.97023809 0.97069597 0.97115386 0.97161174 0.97206962 206 175 166 200 139 113 158 0.97252744 0.97298533 0.97344321 0.97390109 0.97435898 0.97481686 0.97527474 174 145 164 179 171 120 201 0.97573262 0.97619045 0.97664833 0.97710621 0.9775641 0.97802198 0.97847986 131 185 189 213 180 190 197 0.97893775 0.97939563 0.97985345 0.98031133 0.98076922 0.9812271 0.98168498 194 203 192 204 216 184 220 0.98214287 0.98260075 0.98305863 0.98351645 0.98397434 0.98443222 0.9848901 188 217 244 231 158 282 265 0.98534799 0.98580587 0.98626375 0.98672163 0.98717946 0.98763734 0.98809522 245 221 227 190 194 183 258 0.98855311 0.98901099 0.98946887 0.98992676 0.99038464 0.99084246 0.99130034 247 227 247 227 223 264 223 0.99175823 0.99221611 0.99267399 0.99313188 0.99358976 0.99404764 0.99450547 300 310 277 251 289 306 239 0.99496335 0.99542123 0.99587911 0.996337 0.99679488 0.99725276 0.99771065 326 256 301 362 311 364 442 0.99816847 0.99862635 0.99908423 0.99954212 1 417 394 484 320 1459 > sum(aT[1:110])+ sum(aT[(2185-110+1):2185]) Rare SNPs (0<=MAF<=0.05 OR 0.95<=MAF<=1.0): 2620128 Without zero and privat: sum(aT[3:110])+ sum(aT[(2185-110+1):(2185-2)]) 1920833 freqs <- names(aT) freqs[110] [1] "0.04990843" dim(aT) <- c(1,2185) colnamesAT <- 0:2184 colnames(aT) <- colnamesAT write.table(aT,file="MAFstatistics.txt") s <- 0 for (i in 3:109) { s <- s + aT[i]*(i-1) } s [1] 30994086 for (i in ((2185-109+1):(2185-2))) { s <- s + aT[i]*(2185-i) } s [1] 31978538 s <- 0 for (i in 2:2185) { s <- s + aT[i]*(i-1) } s [1] 486510822 30994086/(2184*3201157) [1] 0.004433219 31978538/(2184*3201157) [1] 0.00457403 486510822/(2184*3201157) [1] 0.06958777 ########################### SNP distances in bp: dist <- diff(data[[2]]) summary(dist) Min. 1st Qu. Median Mean 3rd Qu. Max. 0 18 45 78 94 21060000 1/45 [1] 0.02222222 1/78 [1] 0.01282051 ########################### re <- c() for (j in 0:20) { Lout <- readSamplesSpfabia(X="ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes_mat",samples=((j*100+1):(j*100+100)),lowerB=1.5,upperB=0.05*lii) for (i in 1:100) { av <- which(Lout[,i]>0.1) avd <- diff(av) re <- c(re,avd) } } Lout <- readSamplesSpfabia(X="ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes_mat",samples=2101:2184,lowerB=1.5,upperB=0.05*lii) for (i in 1:84) { av <- which(Lout[,i]>0.1) avd <- diff(av) re <- c(re,avd) } summary(re) summary(re) Min. 1st Qu. Median Mean 3rd Qu. Max. 1.00 3.00 9.00 22.29 23.00 4205.00 ########################### RESULTS: MAF: 0.06958777 MAF without 0 and privat SNPs and SNPs >0.05 MAF: 0.004433219 MAF without 0 and privat SNPs and SNPs >0.05 MAF and with change of major: 0.00457403 3201157 SNPS 13665 (0.43%) SNPs with frequency 0 486510822 minor alleles 30994086 minor alleles without 0 and private SNPs and >0.05 MAF 683851 (21.4%) private SNPs ############################################################################ ############################################################################ ############################################################################ > print(allCountT) [1] 160588 > print(allIBDsD) [1] 160588 > > sunoIBDD <- summary(noIBDD) > print(sunoIBDD) Min. 1st Qu. Median Mean 3rd Qu. Max. 112.0 218.0 248.0 250.9 281.0 416.0 > > > sunoSnps <- summary(noSnpsD) > print(sunoSnps) Min. 1st Qu. Median Mean 3rd Qu. Max. 9.00 9.00 11.00 15.48 16.00 266.00 > > sunoSamp <- summary(noSampD) > print(sunoSamp) Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 3.00 6.00 13.53 16.00 185.00 > > suibdLength <- summary(ibdLengthD) > print(suibdLength) Min. 1st Qu. Median Mean 3rd Qu. Max. 11 448 15250 14260 24640 658100 > > ibdPosS <- sort(ibdPosD) > ibdDistD <- diff(ibdPosS) > suibdDist <- summary(ibdDistD) > print(suibdDist) Min. 1st Qu. Median Mean 3rd Qu. Max. 0 236 653 1552 1584 21160000 Per SNP!!!!:: > > exonicCountS <- colSums(exonicCountD) > names(exonicCountS) <- exonicClass > print(exonicCountS) frameshift deletion frameshift insertion 30 185 frameshift substitution nonframeshift deletion 2 35 nonframeshift insertion nonframeshift substitution 90 0 nonsynonymous SNV stopgain SNV 12412 180 stoploss SNV synonymous SNV 12 11084 unknown 140 > > variantCountS <- colSums(variantCountD) > names(variantCountS) <- variantClass > print(variantCountS) downstream exonic exonic;splicing 19427 19852 249 intergenic intronic ncRNA_exonic 1302420 911594 4190 ncRNA_intronic ncRNA_splicing ncRNA_UTR3 52144 17 348 ncRNA_UTR5 ncRNA_UTR5;ncRNA_UTR3 splicing 45 0 165 upstream upstream;downstream UTR3 18344 693 22949 UTR5 UTR5;UTR3 4443 1 > > tfbsD <- unlist(tfbsD) > print(summary(tfbsD)) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000 0.0000 0.0000 0.2604 0.0000 10.0000 > ss <- sum(tfbsD) > print(ss) [1] 41813 > print(length(tfbsD)) [1] 160588 > > for (i in 1:length(exonicClass)) { + + print(exonicClass[i]) + print("####################") + print(summary(exonicCountD[,i])) + + + } [1] "frameshift deletion" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000000 0.0000000 0.0000000 0.0001868 0.0000000 1.0000000 [1] "frameshift insertion" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000000 0.000000 0.000000 0.001152 0.000000 3.000000 [1] "frameshift substitution" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00e+00 0.00e+00 0.00e+00 1.24e-05 0.00e+00 1.00e+00 [1] "nonframeshift deletion" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000000 0.0000000 0.0000000 0.0002179 0.0000000 1.0000000 [1] "nonframeshift insertion" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000000 0.0000000 0.0000000 0.0005604 0.0000000 2.0000000 [1] "nonframeshift substitution" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0 0 0 0 0 0 [1] "nonsynonymous SNV" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.07729 0.00000 16.00000 [1] "stopgain SNV" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000000 0.000000 0.000000 0.001121 0.000000 2.000000 [1] "stoploss SNV" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00e+00 0.00e+00 0.00e+00 7.47e-05 0.00e+00 1.00e+00 [1] "synonymous SNV" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.06902 0.00000 8.00000 [1] "unknown" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000000 0.0000000 0.0000000 0.0008718 0.0000000 3.0000000 > > for (i in 1:length(variantClass)) { + + print(variantClass[i]) + print("####################") + print(summary(variantCountD[,i])) + + + } [1] "downstream" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000 0.000 0.000 0.121 0.000 23.000 [1] "exonic" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000 0.0000 0.0000 0.1236 0.0000 19.0000 [1] "exonic;splicing" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000000 0.000000 0.000000 0.001551 0.000000 1.000000 [1] "intergenic" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00 0.00 7.00 8.11 11.00 252.00 [1] "intronic" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000 0.000 1.000 5.677 9.000 226.000 [1] "ncRNA_exonic" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.02609 0.00000 15.00000 [1] "ncRNA_intronic" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000 0.0000 0.0000 0.3247 0.0000 128.0000 [1] "ncRNA_splicing" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000000 0.0000000 0.0000000 0.0001059 0.0000000 1.0000000 [1] "ncRNA_UTR3" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000000 0.000000 0.000000 0.002167 0.000000 5.000000 [1] "ncRNA_UTR5" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.00028 0.00000 4.00000 [1] "ncRNA_UTR5;ncRNA_UTR3" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0 0 0 0 0 0 [1] "splicing" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000000 0.000000 0.000000 0.001027 0.000000 1.000000 [1] "upstream" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000 0.0000 0.0000 0.1142 0.0000 10.0000 [1] "upstream;downstream" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000000 0.000000 0.000000 0.004315 0.000000 5.000000 [1] "UTR3" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000 0.0000 0.0000 0.1429 0.0000 51.0000 [1] "UTR5" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00000 0.00000 0.02767 0.00000 7.00000 [1] "UTR5;UTR3" [1] "####################" Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0e+00 0.0e+00 0.0e+00 6.2e-06 0.0e+00 1.0e+00 > > > Per haplotype: ############## > print(dim(exonicCountD)) [1] 160588 11 > > exD <- apply(exonicCountD, 2, function(x) length(which(x>0))) > print(exD) frameshift deletion frameshift insertion 30 171 frameshift substitution nonframeshift deletion 2 35 nonframeshift insertion nonframeshift substitution 87 0 nonsynonymous SNV stopgain SNV 9870 179 stoploss SNV synonymous SNV 12 9230 unknown 120 > > print(dim(variantCountD)) [1] 160588 17 > vaS <- apply(variantCountD, 2, function(x) length(which(x>0))) > print(vaS) downstream exonic exonic;splicing 12845 13796 249 intergenic intronic ncRNA_exonic 111998 86164 2591 ncRNA_intronic ncRNA_splicing ncRNA_UTR3 6579 17 202 ncRNA_UTR5 ncRNA_UTR5;ncRNA_UTR3 splicing 41 0 165 upstream upstream;downstream UTR3 12645 552 12527 UTR5 UTR5;UTR3 3837 1 Reference: * Wang K, Li M, Hakonarson H. ANNOVAR: Functional annotation of genetic variants from next-generation sequencing data Nucleic Acids Research, 38:e164, 2010 Value Default precedence Explanation exonic 1 variant overlaps a coding exon splicing 1 variant is within 2-bp of a splicing junction (use -splicing_threshold to change this) ncRNA 2 variant overlaps a transcript without coding annotation in the gene definition (see Notes below for more explanation) UTR5 3 variant overlaps a 5' untranslated region UTR3 3 variant overlaps a 3' untranslated region intronic 4 variant overlaps an intron upstream 5 variant overlaps 1-kb region upstream of transcription start site downstream 5 variant overlaps 1-kb region downtream of transcription end site (use -neargene to change this) intergenic 6 variant is in intergenic region The value of the first column takes the following precedence (as of December 2010 and later version of ANNOVAR): exonic = splicing > ncRNA> > UTR5/UTR3 > intron > upstream/downstream > intergenic. The precedence defined above is used to decide what function to print out when a variant fit multiple functional categories. Note that: 1. the "exonic" here refers only to coding exonic portion , but not UTR portion, as there are two keywords (UTR5, UTR3) that are specifically reserved for UTR annotations. 2. "splicing" in ANNOVAR is defined as variant that is within 2-bp away from an exon/intron boundary by default, but the threshold can be changed by the --splicing_threshold argument. If "exonic,splicing" is shown, it means that this is a variant within exon but close to exon/intron boundary. 3. If a variant is located in both 5' UTR and 3' UTR region (possibly for two different genes), then the "UTR5,UTR3" will be printed as the output. 4. The term "upstream" and "downstream" is defined as 1-kb away from transcription start site or transcription end site, respectively, taking in account of the strand of the mRNA; the --neargene threshold can be used to adjust this threshold. 5. If a variant is located in both downstream and upstream region (possibly for 2 different genes), then the "upstream,downstream" will be printed as the output. Annotation Precedence Explanation frameshift insertion 1 an insertion of one or more nucleotides that cause frameshift changes in protein coding sequence frameshift deletion 2 a deletion of one or more nucleotides that cause frameshift changes in protein coding sequence frameshift block substitution 3 a block substitution of one or more nucleotides that cause frameshift changes in protein coding sequence stopgain 4 a nonsynonymous SNV, frameshift insertion/deletion, nonframeshift insertion/deletion or block substitution that lead to the immediate creation of stop codon at the variant site. For frameshift mutations, the creation of stop codon downstream of the variant will not be counted as "stopgain"! stoploss 5 a nonsynonymous SNV, frameshift insertion/deletion, nonframeshift insertion/deletion or block substitution that lead to the immediate elimination of stop codon at the variant site nonframeshift insertion 6 an insertion of 3 or multiples of 3 nucleotides that do not cause frameshift changes in protein coding sequence nonframeshift deletion 7 a deletion of 3 or mutliples of 3 nucleotides that do not cause frameshift changes in protein coding sequence nonframeshift block substitution 8 a block substitution of one or more nucleotides that do not cause frameshift changes in protein coding sequence nonsynonymous SNV 9 a single nucleotide change that cause an amino acid change synonymous SNV 10 a single nucleotide change that cause an amino acid change unknown 11 unknown function (due to various errors in the gene structure definition in the database file) When specifying amino acid changes, the specification always relates to a position for a transcript (not a "gene"). For example, the R702W mutation refers to an amino acid change at position 702 in exon 4 in a transcript called NM_022162 (which corresponds to the NOD2 gene). Since there is only one transcript annotated with the NOD2 gene, there is no ambiguity here. However, due to alternative splicing, if there are two or more transcripts that are all annotated for a gene, then the position of the amino acid change will differ, and it is important to always list the transcripts, in addition to gene names. ================================================================================= # > table(indi[,3]) # # AFR AMR ASN EUR # 492 362 572 758 # # AMR = PUR CLM MXL > source("/seppdata/sepp/linkage/release/split/ana_S1.R") $breaks [1] 1.5 2.5 3.5 5.5 10.5 20.5 185.5 $counts [1] 39500 18998 20982 26282 23374 31452 $intensities [1] 0.245971056 0.118302737 0.065328667 0.032732209 0.014555259 0.001187001 $density [1] 0.245971056 0.118302737 0.065328667 0.032732209 0.014555259 0.001187001 $mids [1] 2.0 3.0 4.5 8.0 15.5 103.0 $xname [1] "noSamp" $equidist [1] FALSE attr(,"class") [1] "histogram" $breaks [1] 8.5 9.5 10.5 12.5 15.5 20.5 30.5 50.5 266.5 $counts [1] 49470 21800 26626 21775 15766 12399 8622 4130 $intensities [1] 0.3080553964 0.1357511147 0.0829015867 0.0451984789 0.0196353401 [6] 0.0077210003 0.0026845094 0.0001190648 $density [1] 0.3080553964 0.1357511147 0.0829015867 0.0451984789 0.0196353401 [6] 0.0077210003 0.0026845094 0.0001190648 $mids [1] 9.0 10.0 11.5 14.0 18.0 25.5 40.5 158.5 $xname [1] "noSnps" $equidist [1] FALSE attr(,"class") [1] "histogram" $breaks [1] 8.5 50.5 500.5 2000.5 5000.5 10000.5 20000.5 [8] 50000.5 100000.5 200000.5 500000.5 658118.5 1000000.5 $counts [1] 25 52851 12664 722 3785 27146 62728 254 260 131 22 0 $intensities [1] 3.706616e-06 7.313539e-04 5.257346e-05 1.498659e-06 4.713926e-06 [6] 1.690413e-05 1.302048e-05 3.163375e-08 1.619050e-08 2.719174e-09 [11] 8.664196e-10 0.000000e+00 $density [1] 3.706616e-06 7.313539e-04 5.257346e-05 1.498659e-06 4.713926e-06 [6] 1.690413e-05 1.302048e-05 3.163375e-08 1.619050e-08 2.719174e-09 [11] 8.664196e-10 0.000000e+00 $mids [1] 29.5 275.5 1250.5 3500.5 7500.5 15000.5 35000.5 75000.5 [9] 150000.5 350000.5 579059.5 829059.5 $xname [1] "ibdLength" $equidist [1] FALSE attr(,"class") [1] "histogram" AFR AMR AFR/AMR ASN AFR/ASN 93197 981 42631 2522 615 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 384 1196 1191 1720 1901 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 8322 556 307 933 4132 [1] "1" AFR AMR AFR/AMR ASN AFR/ASN 34503 781 2357 663 28 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 17 0 689 170 252 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 0 40 0 0 0 [1] "2" AFR AMR AFR/AMR ASN AFR/ASN 15770 140 2096 309 18 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 20 5 210 165 183 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 41 22 7 12 0 [1] "3" AFR AMR AFR/AMR ASN AFR/ASN 15901 41 3698 371 63 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 29 13 164 196 254 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 162 36 19 29 6 [1] "5" AFR AMR AFR/AMR ASN AFR/ASN 15792 12 8033 482 115 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 55 49 88 428 388 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 565 96 37 75 67 [1] "10" AFR AMR AFR/AMR ASN AFR/ASN 8535 5 11007 337 204 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 75 174 39 493 429 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1440 141 97 157 241 [1] "20" AFR AMR AFR/AMR ASN AFR/ASN 2696 2 15440 360 187 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 188 955 1 268 395 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 6114 221 147 660 3818 [1] "8" AFR AMR AFR/AMR ASN AFR/ASN 36439 548 8300 848 96 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 88 71 665 459 603 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 830 98 48 108 269 [1] "9" AFR AMR AFR/AMR ASN AFR/ASN 15218 203 4255 387 49 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 52 61 229 195 323 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 505 52 22 63 186 [1] "10" AFR AMR AFR/AMR ASN AFR/ASN 17032 148 6401 461 98 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 64 100 162 257 331 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 972 66 29 135 370 [1] "12" AFR AMR AFR/AMR ASN AFR/ASN 11789 52 6723 321 76 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 66 115 62 245 274 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1223 85 39 162 543 [1] "15" AFR AMR AFR/AMR ASN AFR/ASN 6737 17 5949 246 89 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 54 138 41 191 154 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1275 65 36 168 606 [1] "20" AFR AMR AFR/AMR ASN AFR/ASN 3898 11 5339 163 81 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 37 182 15 184 120 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1326 96 58 175 714 [1] "30" AFR AMR AFR/AMR ASN AFR/ASN 1707 2 3926 79 78 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 19 244 11 143 80 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1302 66 54 97 814 [1] "50" AFR AMR AFR/AMR ASN AFR/ASN 377 0 1738 17 48 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 4 285 6 46 16 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 889 28 21 25 630 [1] "8" AFR AMR AFR/AMR ASN AFR/ASN 12 0 4 0 0 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 0 1 0 0 0 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 3 0 0 1 4 [1] "50" AFR AMR AFR/AMR ASN AFR/ASN 23565 96 19338 570 278 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 115 593 117 597 436 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 4274 190 116 402 2164 [1] "500" AFR AMR AFR/AMR ASN AFR/ASN 3189 7 5232 158 85 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 36 369 21 187 137 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1777 111 70 168 1117 [1] "2000" AFR AMR AFR/AMR ASN AFR/ASN 418 5 179 16 4 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 0 5 6 12 7 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 47 2 2 6 13 [1] "5000" AFR AMR AFR/AMR ASN AFR/ASN 2389 26 861 84 29 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 17 6 33 59 59 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 117 18 10 22 55 [1] "10000" AFR AMR AFR/AMR ASN AFR/ASN 19086 231 5073 540 79 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 53 63 313 293 384 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 617 67 37 87 223 [1] "20000" AFR AMR AFR/AMR ASN AFR/ASN 44056 602 11887 1128 135 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 156 159 676 556 865 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1483 156 71 246 552 [1] "50000" AFR AMR AFR/AMR ASN AFR/ASN 187 3 27 8 0 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 5 0 8 4 6 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 3 1 1 0 1 [1] "1e+05" AFR AMR AFR/AMR ASN AFR/ASN 185 8 22 7 2 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 2 0 13 6 3 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 1 10 0 1 0 [1] "2e+05" AFR AMR AFR/AMR ASN AFR/ASN 92 3 7 10 3 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 0 0 4 4 4 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 0 1 0 0 3 [1] "5e+05" AFR AMR AFR/AMR ASN AFR/ASN 18 0 1 1 0 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 0 0 0 2 0 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 0 0 0 0 0 [1] "1e+06" AFR AMR AFR/AMR ASN AFR/ASN 0 0 0 0 0 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 0 0 0 0 0 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 0 0 0 0 0 breaksIF <- c(0,0.001,0.002,0.005,0.01,0.02,0.03,0.04,0.05,0.95,0.96,0.97,0.98,0.995,0.998,0.999,1.0) iF <- hist(ibdFreq,breaks=breaksIF,plot=FALSE,freq=TRUE,right=TRUE) print(iF) $breaks [1] 0.000 0.001 0.002 0.005 0.010 0.020 0.030 0.040 0.050 0.950 0.960 0.970 [13] 0.980 0.995 0.998 0.999 1.000 $counts [1] 1520 7315 27191 37728 45358 22453 11411 4608 218 602 656 612 [13] 819 86 11 0 $intensities [1] 9.465215334 45.551348793 56.440497837 46.987321593 28.244949809 [6] 13.981742098 7.105761327 2.869454754 0.001508346 0.374872344 [11] 0.408498767 0.381099459 0.340000498 0.178510640 0.068498269 [16] 0.000000000 $density [1] 9.465215334 45.551348793 56.440497837 46.987321593 28.244949809 [6] 13.981742098 7.105761327 2.869454754 0.001508346 0.374872344 [11] 0.408498767 0.381099459 0.340000498 0.178510640 0.068498269 [16] 0.000000000 $mids [1] 0.0005 0.0015 0.0035 0.0075 0.0150 0.0250 0.0350 0.0450 0.5000 0.9550 [11] 0.9650 0.9750 0.9875 0.9965 0.9985 0.9995 $xname [1] "ibdFreq" $equidist [1] FALSE attr(,"class") [1] "histogram" ########################## IBDs with EU: > sum(allC[8:15]) [1] 19062 SNPs in IBDs: > length(snpsA) [1] 751592 Compared to SNPs with MAF 2/2184 = 0.0009157509 2/2184 <= MAF <= 0.05 (0.95 <= MAF <= 2182/2184) 1920833 All Snps: 3201157 > 1920833/3201157 [1] 0.6000434 60.00 % of the SNPs are rare and not privat. privat SNPs: 684171 21.37 % are privat no SNPs: 15124 0.47% are not SNPs 3201157-1920833-684171-15124= 581029 581029/3201157 18.15 % are common SNPs 23.48 % of the SNPs are in rare IBDs > 751592/1920833 [1] 0.3912844 39 % of the rare SNPs are in rare IBDs AFR AMR AFR/AMR ASN AFR/ASN 93197 981 42631 2522 615 AMR/ASN AFR/AMR/ASN EUR AFR/EUR AMR/EUR 384 1196 1191 1720 1901 AFR/AMR/EUR ASN/EUR AFR/ASN/EUR AMR/ASN/EUR AFR/AMR/ASN/EUR 8322 556 307 933 4132 African IBD: > 93197+42631+ 615+1196+1720+8322+307+4132 [1] 152120