Source code for skchem.utils.io
#! /usr/bin/env python
#
# Copyright (C) 2016 Rich Lewis <rl403@cam.ac.uk>
# License: 3-clause BSD
"""
# skchem.utils.io
IO helper functions for skchem.
"""
import yaml
import json
[docs]def line_count(filename):
""" Quickly count the number of lines in a file.
Adapted from http://stackoverflow.com/questions/845058/how-to-get-line-count-cheaply-in-python
Args:
filename (str):
The name of the file to count for.
"""
f = open(filename, 'rb')
lines = 0
buf_size = 1024 * 1024
read_f = f.read
buf = read_f(buf_size)
while buf:
lines += buf.count(b'\n')
buf = read_f(buf_size)
return lines
[docs]def sdf_count(filename):
""" Efficiently count molecules in an sdf file.
Specifically, the function counts the number of times '$$$$' occurs at the
start of lines in the file.
Args:
filename (str): The filename of the sdf file.
Returns:
int: the number of molecules in the file.
"""
with open(filename, 'rb') as f:
return sum(1 for l in f if l[:4] == b'$$$$')
[docs]def json_dump(obj, target=None):
""" Write object as json to file or stream, or return as string. """
if target is None:
return json.dumps(obj)
elif isinstance(target, str):
with open(target, 'w') as f:
json.dump(obj, f)
else:
json.dump(obj, target)
[docs]def yaml_dump(obj, target=None):
""" Write object as yaml to file or stream, or return as string. """
if isinstance(target, str):
with open(target, 'w') as f:
yaml.dump(obj, f, default_flow_style=False)
else:
return yaml.dump(obj, target, default_flow_style=False)