Gregor Lenz
20.7.2022
avoid
prefer
https://github.com/github/gitignore
Many available: conda
, pipenv
, venv
, virtualenv
, docker
, ...
For Conda:
1. Create new environment for project
2. Install as many packages as possible via Conda
3. Then switch to pip
Export your environment.yml regularly
(if it changes)
├── data
├── docs
├── results
├── scripts
├── src
├── tests
├── .gitignore
├── environment.yml
├── README.md
└── setup.py
please avoid
├── data
├── docs
├── results
├── scripts
├── src
├── tests
├── .gitignore
├── environment.yml
├── README.md
└── setup.py
Datasets, do not include in git
Your documentation
Plots, tables, reports
Notebooks, experiments
Your implementation
Yes you need them
Files that git should ignore
For reproducible results
Your project description
For local development
├── data
├── docs
├── results
├── scripts
├── my_experiment.py
├── src
├── awesome_layer.py
├── tests
├── .gitignore
├── environment.yml
└── README.md
from ..src.awesome_layer import AwesomeLayer
layer = AwesomeLayer()
$ python scripts/my_experiment.py
Traceback:
tests/test_layer.py:2: in <module>
from ..src.awesome_layer import AwesomeLayer
E ImportError: attempted relative import with no known parent package
import sys
sys.path.append('/home/me/Documents/codebook/src')
├── data
├── docs
├── results
├── scripts
├── my_experiment.py
├── src
├── awesome_layer.py
├── tests
├── .gitignore
├── environment.yml
└── README.md
from ..src.awesome_layer import AwesomeLayer
layer = AwesomeLayer()
$ python scripts/my_experiment.py
Traceback:
tests/test_layer.py:2: in <module>
from ..src.awesome_layer import AwesomeLayer
E ImportError: attempted relative import with no known parent package
import sys
sys.path.append('/home/me/Documents/codebook/src')
├── data
├── docs
├── results
├── scripts
├── my_experiment.py
├── src
├── __init__.py
├── awesome_layer.py
├── tests
├── .gitignore
├── environment.yml
├── README.md
└── setup.py
from src import AwesomeLayer
from setuptools import find_packages, setup
setup(
name="src",
packages=find_packages(),
)
from .awesome_layer import AwesomeLayer
$ pip install .
Install copy of current package
$ pip install -e .
Install link to current package
“Does it spark joy?”
- Marie Kondo
“Any color you like.”
from seven_dwwarfs import Grumpy, Happy, Sleepy, Bashful, Sneezy, Dopey, Doc
x = { 'a':37,'b':42,
'c':927}
x = 123456789.123456789E123456789
if very_long_variable_name is not None and \
very_long_variable_name.field > 0 or \
very_long_variable_name.is_debug:
z = 'hello '+'world'
else:
world = 'world'
a = 'hello {}'.format(world)
f = rf'hello {world}'
if (this
and that): y = 'hello ''world'#FIXME: https://github.com/psf/black/issues/26
class Foo ( object ):
def f (self ):
return 37*-2
def g(self, x,y=42):
return y
def f ( a: List[ int ]) :
return 37-a[42-u : y**3]
def very_important_function(template: str,*variables,file: os.PathLike,debug:bool=False,):
"""Applies `variables` to the `template` and writes to `file`."""
with open(file, "w") as f:
...
from seven_dwwarfs import Grumpy, Happy, Sleepy, Bashful, Sneezy, Dopey, Doc
x = {"a": 37, "b": 42, "c": 927}
x = 123456789.123456789e123456789
if (
very_long_variable_name is not None
and very_long_variable_name.field > 0
or very_long_variable_name.is_debug
):
z = "hello " + "world"
else:
world = "world"
a = "hello {}".format(world)
f = rf"hello {world}"
if this and that:
y = "hello " "world" # FIXME: https://github.com/psf/black/issues/26
class Foo(object):
def f(self):
return 37 * -2
def g(self, x, y=42):
return y
def f(a: List[int]):
return 37 - a[42 - u : y**3]
def very_important_function(
template: str,
*variables,
file: os.PathLike,
debug: bool = False,
):
"""Applies `variables` to the `template` and writes to `file`."""
with open(file, "w") as f:
...
Before
After
pre-commit
hooks$ pip install black
$ black ./my_source_folder
$ pip install pre-commit
...
$ pre-commit sample-config
...
$ pre-commit install
...
$ git add --all; git commit -m "my incremental work done"
black..............................................Passed
https://testdriven.io/blog/clean-code-python/
# Avoid ambiguous variable names
c = 5
d = 12
# Prefer longer variable names
city_counter = 5
elapsed_time_in_days = 12
# Avoid arbitrary shortening of words
self.clc_mem_ptl(spks: torch.Tensor)
# Spell it out
self.calculate_membrane_potential(input_spikes: torch.Tensor)
Ambiguous variable / function names
# Avoid indexing of variables that is opaque
def training_step(self, batch: torch.Tensor):
y_hat = self.network(batch[0])
loss = criterion(y_hat, batch[1])
# Make things explicit
def training_step(self, batch: torch.Tensor):
input_data, targets = batch
y_hat = self.network(input_data)
loss = criterion(y_hat, targets)
Magic numbers
# one function does two things
def fetch_and_display_personnel():
data = # ...
for person in data:
print(person)
# Split it
def fetch_personnel():
return # ...
def display_personnel(data):
for person in data:
print(person)
personnel_data = fetch_personnel()
display_personnel(personell_data)
Strongly coupled code
# isolated mega function
def render_blog_post(title, author, created_timestamp, updated_timestamp, content):
# ...
render_blog_post("Clean code", "Nik Tomazic", 1622148362, 1622148362, "...")
# See if you can abstract away some things into a separate class
class BlogPost:
def __init__(self, title, author, created_timestamp, updated_timestamp, content):
self.title = title
self.author = author
self.created_timestamp = created_timestamp
self.updated_timestamp = updated_timestamp
self.content = content
blog_post1 = BlogPost("Clean code", "Nik Tomazic", 1622148362, 1622148362, "...")
def render_blog_post(blog_post):
# ...
render_blog_post(blog_post1)
Many arguments in function
# isolated mega function
def render_blog_post(title, author, created_timestamp, updated_timestamp, content):
# ...
render_blog_post("Clean code", "Nik Tomazic", 1622148362, 1622148362, "...")
# See if you can abstract away some things into a separate class
class BlogPost:
def __init__(self, title, author, created_timestamp, updated_timestamp, content):
self.title = title
self.author = author
self.created_timestamp = created_timestamp
self.updated_timestamp = updated_timestamp
self.content = content
blog_post1 = BlogPost("Clean code", "Nik Tomazic", 1622148362, 1622148362, "...")
def render_blog_post(blog_post):
# ...
render_blog_post(blog_post1)
Many arguments in function
# unnecessary complexity
names = ["Fang", "Debra", "Pascal"]
full_names = []
for i in range(len(names)):
name = names[i] + " Wang"
full_names.append(name)
# instead use built-in iterators and list comprehensions
names_list = ["Fang", "Debra", "Pascal"]
full_names_list = [name + " Wang" for name in names_list]
Un - Pythonic code
The first few items in the Fibonacci sequence are:
We implement the following functions:
def fibonacci(x):
if x <= 2:
return 1
else:
return fibonacci(x - 1) + fibonacci(x - 2)
├── data
├── docs
├── results
├── scripts
├── src
├── __init__.py
├── fibonacci.py
├── tests
├── .gitignore
├── environment.yml
├── README.md
└── setup.py
>>> from src import fibonacci
>>> fibonacci(1)
1
>>> fibonacci(3)
2
>>> fibonacci(0)
1
Manual testing
def fibonacci(x):
if x <= 2:
return 1
else:
return fibonacci(x - 1) + fibonacci(x - 2)
├── data
├── docs
├── results
├── scripts
├── src
├── __init__.py
├── fibonacci.py
├── tests
├── .gitignore
├── environment.yml
├── README.md
└── setup.py
>>> from src import fibonacci
>>> fibonacci(1)
1
>>> fibonacci(3)
2
>>> fibonacci(0)
1
Manual testing
from src.fibonacci import fibonacci
def test_fibonacci_0():
assert fibonacci(0) == 0
def test_fibonacci_1():
assert fibonacci(1) == 1
def test_fibonacci_2():
assert fibonacci(2) == 1
def test_fibonacci_6():
assert fibonacci(6) == 8
def test_fibonacci_40():
assert fibonacci(40) == 102334155
├── data
├── docs
├── results
├── scripts
├── src
├── __init__.py
├── fibonacci.py
├── tests
├── test_fibonacci.py
├── .gitignore
├── environment.yml
├── README.md
└── setup.py
test_
Automated unit tests
$ pytest tests/test_fibonacci.py
...
def test_fibonacci_0():
> assert fibonacci(0) == 0
E assert 1 == 0
E + where 1 = fibonacci(0)
tests/test_layer.py:6: AssertionError
========= short test summary info ============
FAILED tests::test_fibonacci_0 - assert 1 == 0
====== 1 failed, 4 passed in 0.09s ===========
We use pytest to run all tests at once
Tests provide peace of mind that code still does what it's supposed to do
def fibonacci(x):
if x <= 2:
return 1
else:
return fibonacci(x - 1) + fibonacci(x - 2)
Tests provide peace of mind that code still does what it's supposed to do
def fibonacci(x):
if x == 0:
return 0
if x == 1:
return 1
else:
return fibonacci(x - 1) + fibonacci(x - 2)
Tests provide peace of mind that code still does what it's supposed to do
def fibonacci(x):
if x == 0:
return 0
if x == 1:
return 1
else:
return fibonacci(x - 1) + fibonacci(x - 2)
$ pytest tests
======= test session starts ========
...
tests/test_fibonacci.py ..... [100%]
======= 5 passed in 0.01s ==========
from src.fibonacci import fibonacci
def test_fibonacci_0():
assert fibonacci(0) == 0
def test_fibonacci_1():
assert fibonacci(1) == 1
def test_fibonacci_2():
assert fibonacci(2) == 1
def test_fibonacci_6():
assert fibonacci(6) == 8
def test_fibonacci_8():
assert fibonacci(8) == 21
Refactoring the tests themselves
from src.fibonacci import fibonacci
import pytest
@pytest.mark.parametrize(
"n, correct_output",
[(0, 0), (1, 1), (2, 1), (6, 8), (8, 21)]
)
def test_fibonacci_output_is_correct(n, correct_output):
assert fibonacci(n) == correct_output
Refactoring the tests themselves
$ pytest tests
======= test session starts ========
...
tests/test_fibonacci.py ..... [100%]
======= 5 passed in 0.01s ==========
$ pytest tests
======= test session starts ========
...
tests/test_fibonacci.py ..... [100%]
======= 5 passed in 0.01s ==========
>>> from src import fibonacci
>>> fibonacci(1)
1
>>> fibonacci(3)
2
>>> fibonacci(0)
1
Manual testing
Automated test suite
# synaptic currents are initialised to zero
i_syn = torch.zeros((batch_size, n_neurons))
# we add the input
i_syn = i_syn + input_data[time_step]
# decaying the synaptic currents
i_syn = i_syn * alpha_syn
"Don’t comment bad code - rewrite it. " – Robert Martin in Clean Code
# iterate over all the lines
for l in L:
# split the line along hyphens
l.split("-")
for line in lines:
line.split("-")
class CropTime:
def __init__(self, min=0, max=None):
self.min = min
self.max = max
Type hints help the user to pass the right parameters
class CropTime:
def __init__(self, min: int = 0, max: Optional[int] = None):
self.min = min
self.max = max
https://blog.logrocket.com/understanding-type-annotation-python/
Docstrings explain what your class / method does
class CropTime:
"""Drops events with timestamps below min and above max.
Parameters:
min (int): The minimum timestamp below which all events are dropped.
Zero by default.
max (int): The maximum timestamp above which all events are dropped.
Example:
>>> transform = tonic.transforms.CropTime(min=1000, max=20000)
"""
def __init__(self, min: int = 0, max: Optional[int] = None):
self.min = min
self.max = max
Docstrings explain what your class / method does
def calc(self, x, tau):
"""
This method normalises synaptic input currents
by the neuron's respective membrane time constant.
Parameters:
x (torch.Tensor): the synaptic input currents
tau (torch.Tensor): the tau_mem for each neuron.
Returns:
torch.Tensor: normalised input currents
"""
return x * (1-tau)
def calc(self, x, tau):
# normalise synaptic currents x by tau_mem
return x * (1-tau)
def normalise_i_syn_by_tau(self, i_syn: torch.Tensor, tau_mem: torch.Tensor):
return i_syn * (1-tau_mem)
def normalise_i_syn_by_tau(self, i_syn: torch.Tensor, tau_mem: torch.Tensor):
"""
Normalising synaptic input current by the neuron's membrane potential
helps when training time constants, as the amount of current injected
over time is the same.
"""
return i_syn * (1-tau_mem)