Daniel Haehn PRO
Hi, I am a biomedical imaging and visualization researcher who investigates how computational methods can accelerate biological and medical research.
Full-Power
Project Presentations
5/8
5/10
5/12
5/19
Everything needs to be done
* Swarthmore
* IntellAdapt
* BCH
* LE-V-EL
* Northwestern
* FlyEM
* FluxMarine
* MGH
* Robomaster
* UPenn
* Axolotl
* BWH1
* BWH2
* OMAMA-DB
* TradingBot
Project Presentations
Be loud, be proud!
Stay High-level (nobody wants all the details)
Summarize the project
Talk about your Plan, Ideas, and Solutions
Describe the Main Challenge
And how you solved it...
Show something in action (video or live)
Describe final steps to finish in the last weeks..
Opportunity!
Grade!
Project Presentations
10 minutes per team
Suggestion: use max. 8 slides
1. Client and Team
2. Project intro
3. Demo (video or live)
4. Main Challenge
5. Solution
6. Technologies used
7. Lessons learned, Teamwork enhancements..
8. Final steps to finish
Update your client this week!
Did you create something useful?
Github Repository
Final Google Doc
Peer Assessment
rate each other
Functions, Classes
Arrays, Vectors
Templates
Cython
Run our C++ code in Python using Cython
and compare timing against NumPy
Analyze a bunch of numbers and calculate min, max, mean, stddev.
Course Evaluations!
Functions, Classes
Arrays, Vectors
Templates
Cython
Run our C++ code in Python using Cython
and compare timing against NumPy
Analyze a bunch of numbers and calculate min, max, mean, stddev.
Start an Ubuntu Virtual Machine
SSH into it
Install our code
Run tests to measure speed
pytest
What do we have?
stats.cc
What do we need?
setup.py
statistics.pyx
python setup.py build_ext --inplace
.so
conda install cython
#include <iostream>
#include <vector>
#include <algorithm>
#include <cassert>
#include <cmath>
template <typename T>
class Stats {
public:
T get_min(std::vector<T> v);
T get_max(std::vector<T> v);
float get_mean(std::vector<T> v);
float get_stddev(std::vector<T> v);
};
template <typename T>
T Stats<T>::get_min(std::vector<T> v) {
T minvalue = v[0];
for(int i=1; i<v.size(); i++) {
minvalue = std::min(minvalue, v[i]);
}
return minvalue;
}
template <typename T>
T Stats<T>::get_max(std::vector<T> v) {
T minvalue = v[0];
for(int i=1; i<v.size(); i++) {
minvalue = std::max(minvalue, v[i]);
}
return minvalue;
}
template <typename T>
float Stats<T>::get_mean(std::vector<T> v) {
float sum = v[0];
for(int i=1; i<v.size(); i++) {
sum += v[i];
}
sum /= v.size();
return sum;
}
template <typename T>
float Stats<T>::get_stddev(std::vector<T> v) {
float stddev = 0;
float mean = Stats<T>::get_mean(v);
for(int i=1; i<v.size(); i++) {
stddev += std::pow(v[i] - mean, 2);
}
return std::sqrt(stddev / v.size());
}
void test_get_min() {
std::vector<float> somevalues;
somevalues.push_back(1.3);
somevalues.push_back(2);
somevalues.push_back(3);
somevalues.push_back(-241);
Stats<float> stats;
assert(stats.get_min(somevalues)==-241);
std::cout << "Test OK!" << std::endl;
}
void test_get_max() {
std::vector<float> somevalues;
somevalues.push_back(1.3);
somevalues.push_back(2);
somevalues.push_back(3);
somevalues.push_back(-241);
Stats<float> stats;
assert(stats.get_max(somevalues)==3);
std::cout << "Test OK!" << std::endl;
}
void test_get_mean() {
std::vector<float> somevalues;
somevalues.push_back(1.3);
somevalues.push_back(2);
somevalues.push_back(3);
somevalues.push_back(-241);
Stats<float> stats;
float diff = std::abs(stats.get_mean(somevalues)) - std::abs(-58.675);
assert(diff < 0.0005);
std::cout << "Test OK!" << std::endl;
}
void test_get_stddev() {
std::vector<float> somevalues;
somevalues.push_back(1.3);
somevalues.push_back(2);
somevalues.push_back(3);
somevalues.push_back(-241);
Stats<float> stats;
float diff = std::abs(stats.get_stddev(somevalues)) - std::abs(105.26712152899404);
assert(diff < 0.0005);
std::cout << "Test OK!" << std::endl;
}
int main()
{
test_get_min();
test_get_max();
test_get_mean();
test_get_stddev();
}
from setuptools import setup
from Cython.Build import cythonize
setup(ext_modules=cythonize("statistics.pyx"))
# distutils: language = c++
from libcpp.vector cimport vector
#
# Connection to C++
#
cdef extern from "stats.cc":
cdef cppclass Stats[T]:
T get_min(vector[T])
T get_max(vector[T])
float get_mean(vector[T])
float get_stddev(vector[T])
#
# Python Interface
#
cdef class PyStats:
cdef Stats[float] stats
def get_min(self, vector[float] v):
return self.stats.get_min(v)
def get_max(self, vector[float] v):
return self.stats.get_max(v)
def get_mean(self, vector[float] v):
return self.stats.get_mean(v)
def get_stddev(self, vector[float] v):
return self.stats.get_stddev(v)
stats.cc
statistics.pyx
setup.py
import statistics
s = statistics.PyStats()
somevalues = [1.3, 2, 3, -241]
print( s.get_min( somevalues ) )
pytest
$ pytest
=========================== test session starts ============================
platform linux -- Python 3.x.y, pytest-5.x.y, py-1.x.y, pluggy-0.x.y
cachedir: $PYTHON_PREFIX/.pytest_cache
rootdir: $REGENDOC_TMPDIR
collected 1 item
test_sample.py F [100%]
================================= FAILURES =================================
_______________________________ test_answer ________________________________
def test_answer():
> assert inc(3) == 5
E assert 4 == 5
E + where 4 = inc(3)
test_sample.py:6: AssertionError
========================= short test summary info ==========================
FAILED test_sample.py::test_answer - assert 4 == 5
============================ 1 failed in 0.12s =============================
# content of test_sample.py
def inc(x):
return x + 1
def test_answer():
assert inc(3) == 5
import timeit
def test_cython():
code = '''import statistics
import numpy as np
somevalues = [100]*1000
s = statistics.PyStats()
s.get_min( somevalues )
'''
print('Cython Mean', timeit.timeit(code, number=100))
def test_numpy():
code = '''import statistics
import numpy as np
somevalues = [100]*1000
s = statistics.PyStats()
np.mean( somevalues )
'''
print('Numpy Mean', timeit.timeit(code, number=100))
def test_python():
code = '''import statistics
import numpy as np
somevalues = [100]*1000
s = statistics.PyStats()
mean = 0
for v in somevalues:
mean+=v
mean /= len(somevalues)
'''
print('Python Mean', timeit.timeit(code, number=100))
import statistics
s = statistics.PyStats()
somevalues = [1.3, 2, 3, -241]
print( s.get_min( somevalues ) )
FROM continuumio/miniconda3
MAINTAINER CS410.net version: 0.1
RUN apt-get update
RUN apt-get install -y g++
RUN conda create --name CS410 python=3.9
SHELL ["conda", "run", "-n", "CS410", "/bin/bash", "-c"]
RUN conda install numpy
RUN conda install cython
RUN conda install pytest
RUN pip install hypothesis
RUN git clone https://github.com/haehn/cs410stats.git
WORKDIR /cs410stats
ADD setup.py setup.py
ADD statistics.cpp statistics.cpp
ADD statistics.pyx statistics.pyx
ADD stats.cc stats.cc
ADD test_stats.py test_stats.py
RUN python setup.py build_ext --inplace
ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "CS410", "pytest", "-s"]
Docker commands we used today:
# build the container from the Dockerfile
docker build -t TAG .
# run the container interactively
docker run -it TAG
# login to DockerHub
docker login
# deploy the container
docker push TAG:latest
By Daniel Haehn
Hi, I am a biomedical imaging and visualization researcher who investigates how computational methods can accelerate biological and medical research.