Practical Clean Code

Israel Saeta Pérez  (@dukebody)

projects delay/failure

sneaky bugs

developer suffering & quit

contagious mess

What is clean code?

  • reads like prose
  • elegant and efficient
  • makes it hard for bugs to hide
  • can be enhanced by a developer other than its original author
  • provides one obvious way to make things

let's start easy

Use a style guide checker

following convention makes it easier to read

  • pep8
  • flake8 - integrate with git hooks and CI
  • editor auto-format

Use meaningful variable and function names

# bad
ret = []
for l in legs:
    s = l.get_first_segment()
return ret

# better
leg = Leg()
segment = leg.get_first_segment()

first_segments = []
for leg in legs:
    segment = leg.get_first_segment()
return first_segments

We have autocomplete!

# unnecessary verbosity
class Address:
    def __init__(self, address_street, address_country):
        self.address_street = address_street
        self.address_country = address_country

# better
class Address:
    def __init__(self, street, city):
        self.street = street = city

Use context and namespacing

# how do I know without looking at the class definition?
res = BookingResult(,

# better
booking_result = BookingResult(

Use kwargs in functions with many arguments

# OK
if action in ['create', 'update', 'delete']:
    check_permission(user, 'write')

# better
is_write_action = action in ['create', 'update', 'delete']
if is_write_action:
    check_permission(user, 'write')

# OK?
if self.from_subject.subject_company != self.to_subject.subject_company:
    raise InvalidRelation()

# better
is_same_company = from_subject.subject_company != to_subject.subject_company
if not is_same_company:
    raise InvalidRelation()

# even better?
if not is_same_company(from_subject, to_subject):
    raise InvalidRelation()

clarify conditionals

functions should only do

one obvious thing

# bad
def get_digest_airlines(offer):
    digest = ""
    items = offer.items
    offer_airlines = set()
    for item in items:
        item_digest, item_airlines = get_item_digest(item)
        digest += item_digest
        offer_airlines = offer_airlines.union(item_airlines)
    return digest, offer_airlines

# good
def get_digest(self, offer):
    digest = ""
    items = offer.items
    for item in items:
        item_digest, item_airlines = self.get_item_digest(item)
        digest += item_digest
    return digest

def get_airlines(offer):
    items = offer.items
    airlines = set()
    for item in items:
        item_airlines =.get_item_airlines(item)
        airlines = airlines.union(item_airlines)
    return airlines

avoid returning non-homogeneous tuples

break long functions

into smaller ones

make it read like prose, cascade

# bad
def get_duplicated_trips(trip):
    # calculate signature for trip
    signature = ''
    for leg in trip.legs:
        for segment in leg.segments:
            signature += get_signature(segment)

    # get trips candidate for being duplicate
    company =
    candidate_trips = Trip.objects.filter(company=company)

    # calculate signature for each trip
    candidate_signatures = []
    for candidate_trip in candidate_trips:
        candidate_signature = ''
        for leg in candidate_trip.legs:
            for segment in leg.segments:
                candidate_signature += get_signature(segment)

    # get the trips for which the signature matches
    duplicated_trips = []
    for candidate_trip, candidate_signature in zip(candidate_trips, candidate_signatures):
        if candidate_signature == signature

Get duplicated trips

# better
def get_duplicated_trips(trip):
    signature = get_trip_signature(trip)
    candidate_trips = get_candidate_trips(trip)
    return [trip for trip in candidate_trips if get_trip_signature(trip) == signature]

def get_trip_signature(trip):
    signature = ''
    for leg in trip.legs:
        for segment in leg.segments:
            signature += get_signature(segment)
    return signature

def get_candidate_trips(trip):
    company =
    return Trip.objects.filter(company=company)

Get duplicated trips

# bad
def get_total_invoice_amounts(year, month):
    # get inovices for given month
    start = datetime(year, month, 1)
    next_month = month + 1 if month != 12 else 1
    end = datetime(year, next_month, 1)
    invoices = Invoice.objects.filter(created_at__gte=start, created_at__lte=end)

    # sum amounts for invoices
    total = 0
    for invoice in not invoices:
        for item in invoice.items:
            total += item.amount
    return total

Compute total invoice amounts for month

# better
def get_total_invoice_amounts(year, month):
    invoices = get_invoices_for_month(year, month)
    return sum(get_amount(invoice) for invoice in invoices)

def get_invoices_for_month(year, month):
    start = datetime(year, month, 1)
    next_month = month + 1 if month != 12 else 1
    end = datetime(year, next_month, 1)
    return Invoice.objects.filter(created_at__gte=start, created_at__lte=end)

def get_amount(invoice):
    return sum(item.amount for item in invoice)

Compute total invoice amounts for month

Keep cognitive/cyclomatic complexity small

conditionals, for's, nested code...

# bad - cyclomatic complexity of 6
def post_comment(self):
    if self.type == 'success':
        comment = 'Build succeeded'
    elif self.type == 'warning':
        comment = 'Build had issues'
    elif self.type == 'failed':
        comment = 'Build failed'
        comment = 'Unknown status'

    if self.type == 'success':, type='success')
    else:, type='error')
# better - both functions have complexity of 1
def get_comment(self):
    comments = {
        'success': 'Build succeeded',
        'warning': 'Build had issues',
        'failed': 'Build failed'
    return comments.get(self.type, 'Unknown status')

def post_comment(self):
    comment = self.get_comment(self), type=self.type)

principle of least knowledge

(information hiding, encapsulation)

hide the internal madness of your objects

  • Rule of "only one dot"
  • Create business methods for classes, avoid accessing attributes directly
  • Create more classes to hold data and logic to transfer defensiveness from the caller to the callee
# bad
def get_total_invoice_amounts(invoices):
    # sum amounts for invoices
    total = 0
    for invoice in invoices:
        # need to know that invoices have items!
        for item in invoice.items:
            total += item.amount
    return total

# better
def get_total_invoice_amounts(invoices):
    total = 0
    for invoice in invoices:
        total += invoice.amount
    return total

class Invoice:
    def amount(self):
        sum(item.amount for item in self.items)

Sum invoices amounts (rule one dot)

# bad
# caller needs to know internals
for leg in dog.legs:

# better

class Dog:
    def move(self, forward):
        for leg in self.legs:

ask dog move forward (rule one dot)

# bad
def pay(product, wallet):
    # also assumes internal structure -= product.price

# better
def pay(product, wallet):

class Wallet:
    def take(self, price):
        if price >
            raise ValueError('Not enough money')
        # ...

avoid impossible states

pay(amount, currency):

price = {
    'amount': 13,
    'currency': 'EUR'


class Price:
    def __init__(self, amount, currency):
        if self.amount < 0:
            raise ValueError('Amount cannot be smaller than zero')
        self.amount = amount

        is self.currency not in ALLOWED_CURRENCIES:
            raise ValueError('Unknown currency %s' % currency)
        self.currency = currency

mandatory structure, avoid impossible states

# Allows a flight with no legs, or leg without segments
flight = {
    'legs': [
            'key': 'abc',
            'segments': [...]

def get_direction(leg):
    if len(leg['segments']):  # defensive caller
        # what???

class Leg:
    def __init__(self, segments):
        assert len(segments) > 0  # defensive callee
        self.segments = segments

    def get_direction(self):
        # we are sure the leg has segment

Premature optimization

is the root of all evil



because YAGNI, KISS!

if the design is simple, it will be easy to add the features later if needed

continuous refactoring

boy scout rule

avoid superfluous function arguments

# get n posts, sorted by creation date
def get_posts(count, order_reverse=True):
    if order_reverse:
        order_by = '-created'
        order_by = 'created'
    return Post.objects.order_by(order_by)[:count]

# perhaps we don't need to control the ordering direction yet?
def get_posts(count):
    return Post.objects.order_by('-created')[:count]

avoid unneccessary abstraction

class TemplateEngine:
    def render(self, file_path, context):
        # ...

class Jinja2TemplateEngine(TemplateEngine):
    def render(...):
        template = jinja2_env.get_template('my_template.html')
        return template.render(**context)

def my_view(...):
    template_engine = config.get_template_engine()
    body = template_engine.render('my_template.html', context)
    return Response(body)

# what if we are only using Jinja2 for now?
def my_view(...):
    template = jinja2_env.get_template('my_template.html')
    body = template.render(**context)
    return Response(body)

focus on making your code easy to follow

optimize based of performance analysis when necessary

# Early loop break
has_luggage = False
for leg in legs:
    if has_luggage(leg):
        has_luggage = True

# Easier to read? If has_luggage is not expensive...
has_luggage = any([has_luggage(leg) for leg in legs])

# Turn into early break without sacrifycing readability
has_luggage = any(has_luggage(leg) for leg in legs)

(line profiler)

def maybe_slow_function(numbers):

$ kernprof -l
# script runs....

$ python -m line_profiler

# output
Timer unit: 1e-06 s

Total time: 0.000649 s
File: <ipython-input-2-2e060b054fea>
Function: maybe_slow_function at line 4

Line #      Hits         Time  Per Hit   % Time  Line Contents
     4                                           def maybe_slow_function(numbers):
     5         1           10     10.0      1.5      total = sum(numbers)
     6         1          186    186.0     28.7      divided = [numbers[i]/43 for i in range(len(numbers))]
     7         1          453    453.0     69.8      return ['hello' + str(numbers[i]) for i in range(len(numbers))]

tests are also code

use similar (relaxed) principles

cost/effectiveness of unit tests


  • Robert C. Martin books (Clean Code, Clean Architecture)
  • Learn from open source: AOSA books