Python脚本：将mol2分子库文件拆分为单个mol2文件-阿里云开发者社区

Python脚本：将mol2分子库文件拆分为单个mol2文件

2021-12-07 400

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介： Python脚本：将mol2分子库文件拆分为单个mol2文件

如题：将mol2分子库文件拆分为单个mol2文件

用法：

注释：python 脚本文件 mol2分子库输出目录

split_multimol2.py：

#Python2 or Python3
#AspirinCode 2018
#Script that splits a multi-mol2 file into individual mol2 files.
#python split_multimol2.py multi-mol2.mol2 out_dir
import sys
import os
def split_multimol2(multimol2):
    """
    Splits a multi-mol2 file.
    Parameters
    ----------
    multimol2 : str
      Path to the multi-mol2 file.
    Returns
    ----------
    A generator object for lists for every extracted mol2-file. Lists contain
      the molecule ID and the mol2 file contents.
      e.g., ['ID1234', '@<TRIPOS>MOLECULE...'
    """
    with open(multimol2, 'r') as mol2file:
        line = mol2file.readline()
        while not mol2file.tell() == os.fstat(mol2file.fileno()).st_size:
            if line.startswith("@<TRIPOS>MOLECULE"):
                mol2cont = []
                mol2cont.append(line)
                line = mol2file.readline()
                molecule_id = line.strip()
                while not line.startswith("@<TRIPOS>MOLECULE"):
                    mol2cont.append(line)
                    line = mol2file.readline()
                    if mol2file.tell() == os.fstat(mol2file.fileno()).st_size:
                        mol2cont.append(line)
                        break
                mol2cont[-1] = mol2cont[-1].rstrip() # removes blank line at file end
                yield [molecule_id, "".join(mol2cont)]
def write_multimol2(multimol2, out_dir):
    """
    Splits a multi-mol2 file into smaller multi-mol2 files.
    Parameters
    -----------
    multimol2 : str
      Path to the multi-mol2 file.
    out_dir : str:
      Output directory. New files will be named
      <molecule_name_1>.mol2, ... <molecule_name_n>.mol2
    Returns
    -----------
    chunks : int
      Number of files written.
    """
    if not out_dir:
        os.mkdir(out_dir)
    single_mol2s = split_multimol2(args.MOL2_FILE)
    for mol2 in single_mol2s:
        out_mol2 = os.path.join(args.OUT_DIR, mol2[0]) + '.mol2'
        with open(out_mol2, 'w') as out_file:
            for line in mol2[1]:
                out_file.write(line)
            out_file.write('\n')
def write_multimol2_chunks(multimol2, chunk_size, out_dir):
    """
    Splits a multi-mol2 file into smaller multi-mol2 files.
    Parameters
    -----------
    multimol2 : str
      Path to the multi-mol2 file.
    chunksize : int
      Number of mol2 files per chunk.
    out_dir : str:
      Output directory. New files will be named
      <multimol2>_1.mol2, ... <multimol2>_n.mol2
    Returns
    -----------
    chunks : int
      Number of files written.
    """
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    out_path_stem = os.path.dirname(multimol2)
    out_file_stem = os.path.basename(multimol2).split('.mol2')[0]
    cnt = 0
    chunks = 1
    out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2' % chunks, 'w')
    for mol2 in split_multimol2(multimol2):
        cnt += 1
        if cnt == chunk_size:
            cnt = 0
            chunks += 1
            out_file.close()
            out_file = open(os.path.join(out_dir, out_file_stem)+'_%d.mol2' % chunks, 'w')
        out_file.write(mol2[1] + '\n')
    out_file.close()
    return chunks
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description='Splits a multi-mol2 file into individual mol2 files',
        formatter_class=argparse.RawTextHelpFormatter
        )
    parser.add_argument('MOL2_FILE')
    parser.add_argument('OUT_DIR')
    parser.add_argument('-c', '--chunksize', help='Number of MOL2 structures per file (1 by default)', type=int)
    parser.add_argument('-v', '--version', action='version', version='split_multimol2 v. 1.1')
    args = parser.parse_args()
    if args.chunksize:
        write_multimol2_chunks(multimol2=args.MOL2_FILE, chunk_size=args.chunksize, out_dir=args.OUT_DIR)
    else:
        write_multimol2(multimol2=args.MOL2_FILE, out_dir=args.OUT_DIR)

Python脚本：将mol2分子库文件拆分为单个mol2文件

热门文章

最新文章

相关课程

相关电子书

推荐镜像

探索云世界

热门

云计算

大数据

云原生

人工智能

数据库

开发与运维

活动广场

任务中心

训练营

直播

乘风者计划

下载

镜像站

技术资料

Python脚本：将mol2分子库文件拆分为单个mol2文件

热门文章

最新文章

相关课程

相关电子书

推荐镜像