如题:将mol2分子库文件拆分为单个mol2文件
用法:
注释:python 脚本文件 mol2分子库 输出目录
split_multimol2.py:
#Python2 or Python3 #AspirinCode 2018 #Script that splits a multi-mol2 file into individual mol2 files. #python split_multimol2.py multi-mol2.mol2 out_dir import sys import os def split_multimol2(multimol2): """ Splits a multi-mol2 file. Parameters ---------- multimol2 : str Path to the multi-mol2 file. Returns ---------- A generator object for lists for every extracted mol2-file. Lists contain the molecule ID and the mol2 file contents. e.g., ['ID1234', '@<TRIPOS>MOLECULE...' """ with open(multimol2, 'r') as mol2file: line = mol2file.readline() while not mol2file.tell() == os.fstat(mol2file.fileno()).st_size: if line.startswith("@<TRIPOS>MOLECULE"): mol2cont = [] mol2cont.append(line) line = mol2file.readline() molecule_id = line.strip() while not line.startswith("@<TRIPOS>MOLECULE"): mol2cont.append(line) line = mol2file.readline() if mol2file.tell() == os.fstat(mol2file.fileno()).st_size: mol2cont.append(line) break mol2cont[-1] = mol2cont[-1].rstrip() # removes blank line at file end yield [molecule_id, "".join(mol2cont)] def write_multimol2(multimol2, out_dir): """ Splits a multi-mol2 file into smaller multi-mol2 files. Parameters ----------- multimol2 : str Path to the multi-mol2 file. out_dir : str: Output directory. New files will be named <molecule_name_1>.mol2, ... <molecule_name_n>.mol2 Returns ----------- chunks : int Number of files written. """ if not out_dir: os.mkdir(out_dir) single_mol2s = split_multimol2(args.MOL2_FILE) for mol2 in single_mol2s: out_mol2 = os.path.join(args.OUT_DIR, mol2[0]) + '.mol2' with open(out_mol2, 'w') as out_file: for line in mol2[1]: out_file.write(line) out_file.write('\n') def write_multimol2_chunks(multimol2, chunk_size, out_dir): """ Splits a multi-mol2 file into smaller multi-mol2 files. Parameters ----------- multimol2 : str Path to the multi-mol2 file. chunksize : int Number of mol2 files per chunk. out_dir : str: Output directory. New files will be named <multimol2>_1.mol2, ... <multimol2>_n.mol2 Returns ----------- chunks : int Number of files written. """ if not os.path.exists(out_dir): os.mkdir(out_dir) out_path_stem = os.path.dirname(multimol2) out_file_stem = os.path.basename(multimol2).split('.mol2')[0] cnt = 0 chunks = 1 out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2' % chunks, 'w') for mol2 in split_multimol2(multimol2): cnt += 1 if cnt == chunk_size: cnt = 0 chunks += 1 out_file.close() out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2' % chunks, 'w') out_file.write(mol2[1] + '\n') out_file.close() return chunks if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( description='Splits a multi-mol2 file into individual mol2 files', formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument('MOL2_FILE') parser.add_argument('OUT_DIR') parser.add_argument('-c', '--chunksize', help='Number of MOL2 structures per file (1 by default)', type=int) parser.add_argument('-v', '--version', action='version', version='split_multimol2 v. 1.1') args = parser.parse_args() if args.chunksize: write_multimol2_chunks(multimol2=args.MOL2_FILE, chunk_size=args.chunksize, out_dir=args.OUT_DIR) else: write_multimol2(multimol2=args.MOL2_FILE, out_dir=args.OUT_DIR)
