GIL - это боль

GIL

def sum_seq(n):
    result = 0
    for i in range(n):
        result += i

    return result

GIL

- Time (sec)
python (sequence) 86.349
python (thread) 92.636
python (process) 22.646

GIL - Global Interpreter Lock 

Гарантия, что только один поток исполняет код в единицу времени

Это не позволяет нормально пользоваться тредами

GIL

  • Так проще
    • Писать код
    • Поддерживать
  • Пока что никто не смог                                вырезать GIL,                                                                не замедлив однопоточные программы

GIL

  • Python threads = OS threads (pthreads, etc)
  • Thread Scheduler = OS Scheduler

 

GIL

GIL

struct _gil_runtime_state {
    /* microseconds (the Python API uses seconds, though) */
    unsigned long interval;
    /* Last PyThreadState holding / having held the GIL. This helps us
       know whether anyone else was scheduled after we dropped the GIL. */
    _Py_atomic_address last_holder;
    /* Whether the GIL is already taken (-1 if uninitialized). This is
       atomic because it can be read without any lock taken in ceval.c. */
    _Py_atomic_int locked;
    /* Number of GIL switches since the beginning. */
    unsigned long switch_number;
    /* This condition variable allows one or several threads to wait
       until the GIL is released. In addition, the mutex also protects
       the above variables. */
    PyCOND_T cond;
    PyMUTEX_T mutex;
#ifdef FORCE_SWITCHING
    /* This condition variable helps the GIL-releasing thread wait for
       a GIL-awaiting thread to be scheduled and take the GIL. */
    PyCOND_T switch_cond;
    PyMUTEX_T switch_mutex;
#endif
};

GIL

void PyEval_RestoreThread(PyThreadState *tstate) {
    if (tstate == NULL)
        Py_FatalError("PyEval_RestoreThread: NULL tstate");
    assert(gil_created());

    int err = errno;
    take_gil(tstate);
    /* _Py_Finalizing is protected by the GIL */
    if (_Py_IsFinalizing() && !_Py_CURRENTLY_FINALIZING(tstate)) {
        drop_gil(tstate);
        PyThread_exit_thread();
        Py_UNREACHABLE();
    }
    errno = err;

    PyThreadState_Swap(tstate);
}



PyThreadState * PyEval_SaveThread(void)
{
    PyThreadState *tstate = PyThreadState_Swap(NULL);
    if (tstate == NULL)
        Py_FatalError("PyEval_SaveThread: NULL tstate");
    assert(gil_created());
    drop_gil(tstate);
    return tstate;
}

GIL

#define Py_BEGIN_ALLOW_THREADS { \
                        PyThreadState *_save; \
                        _save = PyEval_SaveThread();

#define Py_END_ALLOW_THREADS    PyEval_RestoreThread(_save); \
                 }
{
  PyThreadState *_save;
  _save = PyEval_SaveThread();

  /* ... Do some blocking I/O operation ... */

  PyEval_RestoreThread(_save);
}

Что делать будем?

1. Не использовать питон

2. Использовать другой интерпретатор

  - pypy-stm

  - Jython

  - IronPython

3. Писать все критические части

на C/C++, отпускать GIL руками

4. Multiprocessing

C-extensions

#include "sum_seq.h"

unsigned long long int sum_seq(unsigned long long int n) {
    unsigned long long int result = 0, i = 0;

    for (;i < n; i++) {
        result += i;
    }

    return result;
}
def sum_seq(n):
    result = 0
    for i in range(n):
        result += i

    return result
unsigned long long int sum_seq(unsigned long long int n);

sum_seq.h

sum_seq.c

C-extensions

#include <Python.h>
#include "sum_seq.h"


static PyObject *sum_seq_sum_seq(PyObject *self, PyObject *args);

static PyMethodDef module_methods[] = {
    {"sum_seq", sum_seq_sum_seq, METH_VARARGS, NULL},
    {NULL, NULL, 0, NULL}
};

static struct PyModuleDef moduledef = {
    PyModuleDef_HEAD_INIT,
    "sum_seq",  // module name
    NULL, // module docstring
    -1,
    module_methods,
    NULL,
    NULL,
    NULL,
    NULL,
};

PyObject *PyInit_sum_seq(void){
    PyObject *module = PyModule_Create(&moduledef);
    if (module == NULL)
        return NULL;
    return module;
}

_sum_seq.c

C-extensions

static PyObject *sum_seq_sum_seq(PyObject *self, PyObject *args){
    unsigned long long int n = 0, value = 0;

    if (!PyArg_ParseTuple(args, "K", &n)){
        return NULL;
    }

    value = sum_seq(n);

    PyObject *ret = Py_BuildValue("K", value);
    return ret;
}

_sum_seq.c

C-extensions

from setuptools import setup, Extension

setup(
    name="sum_seq",
    ext_modules=[
        Extension("sum_seq.c.sum_seq", 
                  sources=["./sum_seq/c/_sum_seq.c", "./sum_seq/c/sum_seq.c"])
    ]
)

setup.py

C-extensions

- Time (sec)
python (sequence) 86.349
python (thread) 92.636
python (process) 22.646
c (sequence) 0.66055
c (thread) 0.66104

C-extensions

static PyObject *sum_seq_sum_seq(PyObject *self, PyObject *args){
    unsigned long long int n = 0, value = 0;

    if (!PyArg_ParseTuple(args, "K", &n)){
        return NULL;
    }

    Py_BEGIN_ALLOW_THREADS
    // ^^^^^^^^^^^^^^^^^^^

    value = sum_seq(n);

    Py_END_ALLOW_THREADS
    // ^^^^^^^^^^^^^^^^^

    PyObject *ret = Py_BuildValue("K", value);
    return ret;
}

_sum_seq.c

C-extensions

- Time (sec)
python (sequence) 86.349
python (thread) 92.636
python (process) 22.646
c (sequence) 0.66055
c (thread) 0.66104
c (thread) 0.18879

C-extensions

htop здорового человека

htop питониста курильщика

C-extensions

+

  • ВСЁ ЗЕЛЁНЕНЬКОЕ!!1111
  • NO GIL
  • Мечты сбылись

​-

  • Нужно писать обвязку (docs)
  • Нужно писать на С

А можно ли без обвязок (нет)?

def sum_seq(n):
    cdef long long unsigned int result = 0, i = 0;
    cdef long long unsigned int n_internal = n;

    with nogil:
        for i in range(n_internal):
            result += i

    return result
def sum_seq(n):
    result = 0
    for i in range(n):
        result += i

    return result

sum_seq.pyx

Cython

from setuptools import setup, Extension
from Cython.Build import cythonize

setup(
    name="sum_seq",
    ext_modules=[
        Extension("sum_seq.c.sum_seq", 
                  sources=["./sum_seq/c/_sum_seq.c", "./sum_seq/c/sum_seq.c"])
    ] + cythonize("sum_seq/cython/sum_seq.pyx",
                  compiler_directives={"language_level": 3}),
)

setup.py

Cython

- Time (sec)
python (sequence) 86.349
python (thread) 92.636
python (process) 22.646
c (sequence) 0.66055
c (thread) 0.66104
c (thread) 0.18879
cython (sequence) 0.65923
cython (thread) 0.19168

Cython

from gensim.downloader import load
from gensim.models import Word2Vec

corpus = load("text8")
model = Word2Vec(corpus)

print(model.most_similar("cat"))

"""
[(u'dog', 0.830856204032898),
 (u'panda', 0.7922875285148621),
 (u'pig', 0.7816621661186218),
 (u'goat', 0.779694676399231),
 (u'bee', 0.7739807367324829),
 (u'ass', 0.7661241292953491),
 (u'hamster', 0.761398196220398),
 (u'blonde', 0.7453086376190186),
 (u'bird', 0.7418307065963745)]
"""

Cython

Gensim GSoC 2018 

Cython

Gensim GSoC 2018 

Cython

Gensim GSoC 2018 

Cython

Gensim GSoC 2018 

[pid 19844] 18:11:40.165364 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19847] 18:11:40.165447 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19845] 18:11:40.165466 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19843] 18:11:40.165479 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19842] 18:11:40.165510 futex(0x55f71dc5b220, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19704] 18:11:40.165525 futex(0x55f71ebf1cd0, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19846] 18:11:40.167154 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1) = 1 <0.000016>
[pid 19844] 18:11:40.167193 <... futex resumed> ) = 0 <0.001755>
[pid 19846] 18:11:40.167204 futex(0x55f71dc5b220, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19844] 18:11:40.167419 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1) = 1 <0.000015>
[pid 19843] 18:11:40.167460 <... futex resumed> ) = 0 <0.001973>
[pid 19843] 18:11:40.167485 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19844] 18:11:40.167500 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19843] 18:11:40.167511 <... futex resumed> ) = -1 EAGAIN (Resource temporarily unavailable) <0.000015>
[pid 19847] 18:11:40.167523 <... futex resumed> ) = 0 <0.002065>
[pid 19844] 18:11:40.167532 <... futex resumed> ) = 1 <0.000028>
[pid 19847] 18:11:40.167541 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19844] 18:11:40.167552 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19843] 18:11:40.167562 futex(0x55f71dc5b220, FUTEX_WAKE_PRIVATE, 1) = 1 <0.000011>
[pid 19842] 18:11:40.167584 <... futex resumed> ) = 0 <0.002066>
[pid 19842] 18:11:40.167599 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19843] 18:11:40.174995 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1) = 1 <0.000009>
[pid 19843] 18:11:40.175030 futex(0x55f71dc5b220, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19845] 18:11:40.175058 <... futex resumed> ) = 0 <0.009584>
[pid 19845] 18:11:40.175136 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1) = 1 <0.000058>
[pid 19847] 18:11:40.175206 <... futex resumed> ) = 0 <0.007660>
[pid 19847] 18:11:40.175221 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19845] 18:11:40.175233 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19847] 18:11:40.175242 <... futex resumed> ) = -1 EAGAIN (Resource temporarily unavailable) <0.000013>
[pid 19847] 18:11:40.175265 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1) = 1 <0.000014>
[pid 19845] 18:11:40.175290 <... futex resumed> ) = 1 <0.000054>
[pid 19847] 18:11:40.175298 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19844] 18:11:40.175308 <... futex resumed> ) = 0 <0.007750>
[pid 19847] 18:11:40.175316 <... futex resumed> ) = 0 <0.000013>
[pid 19842] 18:11:40.175326 <... futex resumed> ) = 0 <0.007721>
[pid 19847] 18:11:40.175338 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19845] 18:11:40.175351 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19842] 18:11:40.175365 futex(0x55f71dd9d290, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, 0xffffffff <unfinished ...>
[pid 19844] 18:11:40.175377 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19845] 18:11:40.175399 <... futex resumed> ) = -1 EAGAIN (Resource temporarily unavailable) <0.000038>
[pid 19847] 18:11:40.175409 <... futex resumed> ) = 0 <0.000065>
[pid 19845] 18:11:40.175417 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19847] 18:11:40.175427 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19845] 18:11:40.175436 <... futex resumed> ) = 0 <0.000014>
[pid 19847] 18:11:40.175444 <... futex resumed> ) = 0 <0.000013>
[pid 19845] 18:11:40.175452 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19847] 18:11:40.175461 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19845] 18:11:40.175471 <... futex resumed> ) = 0 <0.000013>
[pid 19847] 18:11:40.175478 <... futex resumed> ) = 0 <0.000013>
[pid 19845] 18:11:40.175486 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
[pid 19847] 18:11:40.175496 futex(0x55f71dd9d290, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>

Cython

Gensim GSoC 2018 

strace: Process 17570 attached with 9 threads
strace: [ Process PID=17707 runs in x32 mode. ]
strace: [ Process PID=17707 runs in 64 bit mode. ]
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 99.32   47.545466           8   5848739    621257 futex
  0.44    0.208902           1    380473           read
  0.24    0.114486          31      3736           munmap
  0.00    0.000610           1       534           close
  0.00    0.000591           1       549           write
  0.00    0.000524           1       525           openat
  0.00    0.000330           1       548           stat
  0.00    0.000258           0      1050           fstat
  0.00    0.000142           0       548           getpid
  0.00    0.000064           8         8           madvise
  0.00    0.000014           0       146           mprotect
  0.00    0.000003           3         1           rt_sigaction
------ ----------- ----------- --------- --------- ----------------
100.00   47.871390               6236857    621257 total

Cython

Gensim GSoC 2018 

def train_batch(..., sentences, ...):
    # Initialization code (cast python config to C types)
    ...

    # Convert batch to needed format (for NOGIL)
    ...

    with nogil:
        for sent_idx in range(effective_sentences):
            # Training
            ...

Cython

Gensim before GSoC 2018 

def train_epoch(..., corpus_file, ...):
  # Initialization code (cast python config to C types)
  ...
  input_stream = get_stream_from(corpus_file)

  with nogil:
      input_stream.reset()
      while not (input_stream.is_eof() or 
                 total_words > expected_words / c.workers):
          sentences = input_stream.next_batch()

          # Convert batch to needed format 
          ...

          # Training
          ...

Cython

Gensim after GSoC 2018 

Cython

Gensim GSoC 2018 

Спасибо

GIL - это боль

By Ivan Menshikh

GIL - это боль

  • 1,492