#!/usr/bin/env python # coding=utf-8 import argparse def get_parser(): ''' Using argpaser to parse the input parameters ''' parser = argparse.ArgumentParser(description='Scattering genome sequence to bins with assigned length') parser.add_argument('-i', '--input', type=str, nargs=1,help='input the reference genome file with fasta format') parser.add_argument('-b', '--binsize', type=int, nargs=1, help='bin size') return parser def binGenome(fasta, binsize): '''Bining the reference to segments head to end with equal length''' sequence = {} with open(fasta) as fa: for line in fa: if line.startswith(">"): ac = line.strip().split(">")[1] seq = "" else: seq += line.strip() sequence[ac] = seq for ac, seq in sequence.items(): seqlen = len(seq) for bin in range(0, seqlen, binsize): start, end = bin, bin + binsize if bin+binsize > seqlen: end = seqlen print(">%s_%d-%d"%(ac,start,end)) print(seq[start:end])