var n = flag.Int("n", 1000, "length of result")
-const WIDTH = 60
+const WIDTH = 60 // Fold lines after WIDTH bytes
func min(a, b int) int {
if a < b {
var lastrandom uint32 = 42
+// Random number between 0.0 and 1.0
func myrandom() float {
const (
IM = 139968;
}
func AccumulateProbabilities(genelist []AminoAcid) {
- cp := 0.0;
- for i := 0; i < len(genelist); i++ {
- cp += genelist[i].p;
- genelist[i].p = cp;
+ for i := 1; i < len(genelist); i++ {
+ genelist[i].p += genelist[i-1].p;
}
}
-/* This function prints the characters of the string s. When it */
-/* reaches the end of the string, it goes back to the beginning */
-/* It stops when the total number of characters printed is count. */
-/* Between each WIDTH consecutive characters it prints a newline */
-/* This function assumes that WIDTH <= strlen (s) + 1 */
+// RepeatFasta prints the characters of the byte slice s. When it
+// reaches the end of the slice, it goes back to the beginning.
+// It stops after generating count characters.
+// After each WIDTH characters it prints a newline.
+// It assumes that WIDTH <= len(s) + 1.
func RepeatFasta(s []byte, count int) {
pos := 0;
s2 := make([]byte, len(s) + WIDTH);
bytes.Copy(s2, s);
bytes.Copy(s2[len(s):len(s2)], s);
- for {
+ for count > 0 {
line := min(WIDTH, count);
out.Write(s2[pos:pos+line]);
out.WriteByte('\n');
pos -= len(s);
}
count -= line;
- if count <= 0 {
- break
- }
}
}
-/* This function takes a pointer to the first element of an array */
-/* Each element of the array is a struct with a character and */
-/* a float number p between 0 and 1. */
-/* The function generates a random float number r and */
-/* finds the first array element such that p >= r. */
-/* This is a weighted random selection. */
-/* The function then prints the character of the array element. */
-/* This is done count times. */
-/* Between each WIDTH consecutive characters, the function prints a newline */
+// Each element of genelist is a struct with a character and
+// a floating point number p between 0 and 1.
+// RandomFasta generates a random float r and
+// finds the first element such that p >= r.
+// This is a weighted random selection.
+// RandomFasta then prints the character of the array element.
+// This sequence is repeated count times.
+// Between each WIDTH consecutive characters, the function prints a newline.
func RandomFasta(genelist []AminoAcid, count int) {
buf := make([]byte, WIDTH + 1);
- for {
+ for count > 0 {
line := min(WIDTH, count);
- pos := 0;
- for {
+ for pos := 0; pos < line; pos++ {
r := myrandom();
var i int;
for i = 0; genelist[i].p < r; i++ {
}
buf[pos] = genelist[i].c;
- pos++;
- if pos >= line {
- break
- }
}
buf[line] = '\n';
out.Write(buf[0:line + 1]);
count -= line;
- if count <= 0 {
- break
- }
}
}
AccumulateProbabilities(iub);
AccumulateProbabilities(homosapiens);
- alu := strings.Bytes(""
+ alu := strings.Bytes(
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
out.WriteString(">ONE Homo sapiens alu\n");
RepeatFasta(alu, 2 * *n);
- out.Flush();
out.WriteString(">TWO IUB ambiguity codes\n");
RandomFasta(iub, 3 * *n);
- out.Flush();
out.WriteString(">THREE Homo sapiens frequency\n");
RandomFasta(homosapiens, 5 * *n);
- out.Flush();
}
--- /dev/null
+>ONE Homo sapiens alu
+GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
+AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
+GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
+CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
+GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
+GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
+TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
+AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
+GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
+AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
+AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
+GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
+CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
+AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
+TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
+TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
+GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
+TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
+CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
+CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
+TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
+CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
+AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
+GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
+TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
+TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
+GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
+GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
+ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
+TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
+CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
+CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
+GGCGACAGAGCGAGACTCCG
+>TWO IUB ambiguity codes
+cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
+tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
+NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
+cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
+gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
+HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
+tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
+tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
+acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
+tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
+gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
+accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
+RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
+tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
+cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
+ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
+actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
+YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
+KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
+aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
+aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
+gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
+tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
+tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
+ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
+ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
+BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
+aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
+tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
+cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
+aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
+tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
+aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
+gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
+ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
+taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
+ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
+gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
+gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
+tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
+tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
+taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
+cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
+aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
+cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
+ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
+attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
+ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
+attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
+tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
+>THREE Homo sapiens frequency
+aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga
+atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc
+ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc
+atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa
+tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca
+tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag
+gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat
+tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt
+gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc
+gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc
+atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc
+taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta
+ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg
+acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag
+ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg
+ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt
+cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg
+ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt
+aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag
+attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac
+acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat
+tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca
+attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt
+aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt
+tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg
+ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga
+gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac
+caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct
+taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga
+ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg
+ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat
+gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga
+ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact
+aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc
+cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt
+gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat
+ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt
+tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata
+tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac
+ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga
+tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac
+gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat
+ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc
+actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc
+gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca
+ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata
+tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca
+atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata
+aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat
+tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt
+ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat
+acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga
+gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata
+gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg
+tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac
+gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga
+gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat
+tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta
+acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga
+tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata
+catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga
+attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt
+ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt
+ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg
+gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa
+tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg
+tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct
+ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc
+gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta
+ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact
+tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc
+ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc
+tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt
+ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca
+actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac
+gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc
+gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag
+accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga
+gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct
+cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta
+tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat
+atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt
+ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta
+ggaagtgaaaagataaatat