Episode 3
Что скрывает python?
(cpython, memory, gc)
52645-22327
>>> Point = namedtuple('Point', ['x', 'y'])
>>> p = Point(11, y=22) # instantiate with positional or keyword arguments
>>> p[0] + p[1] # indexable like the plain tuple (11, 22)
33
>>> x, y = p # unpack like a regular tuple
>>> x, y
(11, 22)
>>> p.x + p.y # fields also accessible by name
33
>>> p # readable __repr__ with a name=value style
Point(x=11, y=22)
>>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
>>> d = defaultdict(list)
>>> for k, v in s:
... d[k].append(v)
...
>>> sorted(d.items())
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
Проще диагностировать проблемы
Меньше магии
Всегда знаем куда пойти посмотреть
$ python my_code.py
code.py
tokenizer
parser
compiler
VM
$ echo "print('Hello world')" > my_code.py
$ python3 -m tokenize my_code.py
0,0-0,0: ENCODING 'utf-8'
1,0-1,5: NAME 'print'
1,5-1,6: OP '('
1,6-1,19: STRING "'Hello world'"
1,19-1,20: OP ')'
1,20-1,21: NEWLINE '\n'
2,0-2,0: ENDMARKER ''
...
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
...
In [0]: import ast
In [1]: tree = ast.parse('print("Hello world")')
In [2]: tree
Out[2]: <_ast.Module at 0x10a3ba0b8>
Получаем набор низкоуровневых команд (opcode) для последующей интерпретации
In [1]: import dis
In [2]: def x():
...: a = 2 ** 3
...: return a
...:
...:
In [3]: dis.dis(x)
2 0 LOAD_CONST 1 (8)
2 STORE_FAST 0 (a)
3 4 LOAD_FAST 0 (a)
6 RETURN_VALUE
In [4]: x.__code__.co_code
Out[4]: b'd\x01}\x00|\x00S\x00'
#ifdef MS_WINDOWS
int
wmain(int argc, wchar_t **argv)
{
return Py_Main(argc, argv);
}
#else
int
main(int argc, char **argv)
{
return _Py_UnixMain(argc, argv);
}
#endif
static int
pymain_main(_PyMain *pymain)
{
PyInterpreterState *interp;
pymain_init(pymain, &interp);
// ...
pymain_run_python(pymain, interp) // выполнение кода
// ...
pymain_free(pymain);
// ... обработка ошибок, сигналов
return pymain->status;
}
static int
pymain_run_python(_PyMain *pymain, PyInterpreterState *interp)
{
// ... проверяем настройки
// в зависимости от способа запуска
if (pymain->filename != NULL) { // stdin, module, etc.
pymain_run_file(pymain, config, &cf); // наш случай
}
// ...
pymain_repl(pymain, config, &cf); # тут REPL
// ...
return res;
}
int
PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit,
PyCompilerFlags *flags)
{
// ...
if (maybe_pyc_file(fp, filename, ext, closeit)) {
// ...
v = run_pyc_file(pyc_fp, filename, d, d, flags);
} else {
// ...
v = PyRun_FileExFlags(fp, filename, Py_file_input, d, d,
closeit, flags);
}
// ...
return ret;
}
PyObject *
PyRun_FileExFlags(FILE *fp, const char *filename_str, int start, PyObject *globals,
PyObject *locals, int closeit, PyCompilerFlags *flags)
{
// ...
mod = PyParser_ASTFromFileObject(fp, filename, NULL, start, 0, 0,
flags, NULL, arena); # получаем AST
// ...
ret = run_mod(mod, filename, globals, locals, flags, arena); # выполняем код
// ...
return ret;
}
static PyObject *
run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals,
PyCompilerFlags *flags, PyArena *arena)
{
PyCodeObject *co;
PyObject *v;
co = PyAST_CompileObject(mod, filename, flags, -1, arena); // получаем байт-код
if (co == NULL)
return NULL;
v = run_eval_code_obj(co, globals, locals); // выполняем байт-код
Py_DECREF(co);
return v;
}
Модуль
main
sort
sort_file
current frame
code, args, kwargs, const, etc.
code, args, kwargs, const, etc.
code, args, kwargs, const, etc.
In [9]: import inspect
In [10]: current_frame = inspect.currentframe()
In [11]: current_frame
Out[11]: <frame at 0x1068357a8, file '<...>', line 1, code <module>>
In [12]: current_frame.f_back
Out[12]: <frame at 0x7fa91b874df8, file './.py', line 2981, code run_code>
import traceback
traceback.print_stack()
(_PyEval_EvalFrameDefault)
Создаем фрейм и последовательно выполняем его opcode`ы
main_loop:
for (;;) {
// ...
switch (opcode) {
case TARGET(LOAD_FAST): {
PyObject *value = GETLOCAL(oparg);
Py_INCREF(value);
PUSH(value);
FAST_DISPATCH();
}
case TARGET(LOAD_CONST): {
PREDICTED(LOAD_CONST);
PyObject *value = GETITEM(consts, oparg);
Py_INCREF(value);
PUSH(value);
FAST_DISPATCH();
}
// ...
Каждый фрейм имеет стек c данными, над которым совершаются операции, закодированные в opcode
case TARGET(BINARY_SUBTRACT): {
PyObject *right = POP();
PyObject *left = TOP();
PyObject *diff = PyNumber_Subtract(left, right);
Py_DECREF(right);
Py_DECREF(left);
SET_TOP(diff);
if (diff == NULL)
goto error;
DISPATCH();
}
typedef struct _object {
Py_ssize_t ob_refcnt;
struct _typeobject *ob_type;
} PyObject;
typedef struct {
PyObject ob_base;
double ob_fval;
} PyFloatObject;
typedef struct {
PyObject ob_base;
PyObject *func_code; /* A code object, the __code__ attribute */
PyObject *func_globals; /* A dictionary (other mappings won't do) */
PyObject *func_defaults; /* NULL or a tuple */
PyObject *func_kwdefaults; /* NULL or a dict */
PyObject *func_closure; /* NULL or a tuple of cell objects */
PyObject *func_doc; /* The __doc__ attribute, can be anything */
PyObject *func_name; /* The __name__ attribute, a string object */
PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */
PyObject *func_weakreflist; /* List of weak references */
PyObject *func_module; /* The __module__ attribute, can be anything */
PyObject *func_annotations; /* Annotations, a dict or NULL */
PyObject *func_qualname; /* The qualified name */
} PyFunctionObject;
class X:
def method(self, y):
print(i)
x = X()
method = x.method
for i in range(1000):
method(i)
class X:
def method(self, y):
print(i)
x = X()
method = x.method
for i in range(1000):
method(i)
блок - это область памяти с фиксированным размером кратным 8
Например: если нужно выделить 13Б, берем блок на 16Б
struct pool_header {
union { block *_padding;
uint count; } ref; /* number of allocated blocks */
block *freeblock; /* pool's free list head */
struct pool_header *nextpool; /* next pool of this size class */
struct pool_header *prevpool; /* previous pool "" */
uint arenaindex; /* index into arenas of base adr */
uint szidx; /* block size class index */
uint nextoffset; /* bytes to virgin block */
uint maxnextoffset; /* largest valid nextoffset */
};
struct arena_object {
uintptr_t address;
block* pool_address;
uint nfreepools;
uint ntotalpools;
struct pool_header* freepools;
struct arena_object* nextarena;
struct arena_object* prevarena;
};
class size num pools blocks in use avail blocks
----- ---- --------- ------------- ------------
0 8 2 801 211
1 16 2 394 112
2 24 4 509 163
3 32 48 6024 24
4 40 99 9944 55
5 48 69 5686 110
...
# arenas allocated total = 170
...
76 arenas * 262144 bytes/arena = 19,922,944
...
Total = 19,922,944
14 free PyCFunctionObjects * 48 bytes each = 672
72 free PyDictObjects * 48 bytes each = 3,456
3 free PyFloatObjects * 24 bytes each = 72
8 free PyFrameObjects * 368 bytes each = 2,944
...
In [1]: import sys
In [2]: sys.getsizeof([])
Out[2]: 64
In [3]: sys.getsizeof([1,2,3])
Out[3]: 88
In [4]: sys.getsizeof({})
Out[4]: 240
In [15]: class X:
...: pass
...:
...:
In [16]: X.__dict__
Out[16]:
mappingproxy({'__module__': '__main__',
'__dict__': <attribute '__dict__' of 'X' objects>,
'__weakref__': <attribute '__weakref__' of 'X' objects>,
'__doc__': None})
In [17]: X().__dict__
Out[17]: {}
В памяти питона словари повсюду!
In [1]: class X:
...: __slots__ = ('a', )
...: def __init__(self, a):
...: self.a = a
...:
...:
In [2]: X(1).__dict__
...
AttributeError: 'X' object has no attribute '__dict__'
In [19]: a = 256
In [20]: b = 256
In [21]: a is b
Out[21]: True
In [22]: a = 257
In [23]: b = 257
In [24]: a is b
Out[24]: False
$ python3 -c "a = 'abc' * 20; b = 'abc' * 20; print(a is b)"
True
$ python3 -c "a = input(); b = input(); print(a is b)"
a
a
True
$ python3 -c "a = input(); b = input(); print(a is b)"
ab
ab
False
import sys
a = sys.intern(input())
b = sys.intern(input())
print(a is b)
typedef struct _object {
Py_ssize_t ob_refcnt; # счетчик ссылок
struct _typeobject *ob_type;
} PyObject;
foo = []
# 2 references, 1 from the foo var and 1 from getrefcount
print(sys.getrefcount(foo))
def bar(a):
# 4 references
# from the foo var, function argument,
# getrefcount and Python's function stack
print(sys.getrefcount(a))
bar(foo)
# 2 references, the function scope is destroyed
print(sys.getrefcount(foo))
object_1 = {}
object_2 = {}
object_1['obj2'] = object_2
object_2['obj1'] = object_1
del object_1, object_2
После создания любого контейнерного объекта проверяется достиг ли счетчик некоторого порогового значения (700, 10, 10 по умолчанию) и запускается сборка для соответствующего поколения
gc.get_threshold
gc.set_threshold
import gc
gc.set_debug(gc.DEBUG_SAVEALL)
print(gc.get_count())
lst = []
lst.append(lst)
list_id = id(lst)
del lst
gc.collect()
for item in gc.garbage:
print(item)
assert list_id == id(item)
In [1]: class X:
...: def __del__(self):
...: print('del X')
...:
In [2]: x = X()
In [3]: del x
del X
In [10]: x = X()
In [11]: ref = weakref.ref(x)
In [12]: print(ref())
<__main__.X object at 0x102bdd668>
In [13]: del x
del X
In [14]: print(ref())
None
Line # Mem usage Increment Line Contents
==============================================
3 @profile
4 5.97 MB 0.00 MB def my_func():
5 13.61 MB 7.64 MB a = [1] * (10 ** 6)
6 166.20 MB 152.59 MB b = [2] * (2 * 10 ** 7)
7 13.61 MB -152.59 MB del b
8 13.61 MB 0.00 MB return a