A Non-coder's Journey with Python and Postgresql
Postgresql tips for the Python Coder
Agenda
-
What is this?
-
Python outside Postgres
-
Python inside Postgres
-
Tips
What is this?
The world's most advanced open source database.
integer (big,small, regular)
bit
serial (big, small, regular)
box
boolean
bytea
character
character varying
cidr
date
double precision
inet
path
psycopg2
>>> import psycopg2
>>> cnx = psycopg2.connect('postgres://rest:resting@54.208.29.106:5432/portent')
>>> crs = cnx.cursor()
>>> crs.execute("SELECT 1 + 3, 2.776 * 5.12, 4 * 7::float, 'arbitrary text', '{19.0,0.98,12}'::float[]")
>>> for t in crs.fetchone():
... print t,' is a ', type(t)
...
4 is a <type 'int'>
14.21312 is a <class 'decimal.Decimal'>
28.0 is a <type 'float'>
arbitrary text is a <type 'str'>
[19.0, 0.98, 12.0] is a <type 'list'>
>>> crs.execute('select generate_series(1,4), random()')
>>> crs.rowcount
4
>>> crs.fetchone()
(1, 0.733299259096384)
>>> crs.rownumber
1
>>> crs.scroll(-1)
>>> crs.rownumber
0
>>> crs.fetchone()
(1, 0.733299259096384)
>>> for qq in crs.fetchall():
... qq
...
(2, 0.428359337616712)
(3, 0.43456132337451)
(4, 0.561854084953666)
>>> crs.execute("insert into rigmarole VALUES ('pop',5)");
>>> crs.statusmessage
'INSERT 0 1'
>>> name = 'conundrum'
>>> num = 7
>>> crs.execute("insert into rigmarole VALUES (%s,%s)",(name, num));
>>> crs.statusmessage
'INSERT 0 1'
>>> crs.execute('select * from rigmarole')
>>> for qq in crs.fetchall():
... qq
...
('pop', 5)
('conundrum', 7)
>>> cnx.commit() # <<<<<---- Don't forget to commit!!!!!!!
>>> qry = "insert into rigmarole (nomad, elph) values (%s, %s)"
>>> for q in range(5,9):
... crs.execute(qry,(q,''.join(random.sample('postgres',4))))
...
>>> crs.execute('select * from rigmarole')
>>> for qq in crs.fetchall():
... qq
...
('pop', 5)
('conundrum', 7)
('srge', 5)
('spog', 6)
('esor', 7)
('stpr', 8)
>>>
>>> import psycopg2
>>> from psycopg2 import extras
>>> dsn = 'postgres://rest:resting@54.208.29.106:5432/portent?application_name=myapp'
>>> cnx = psycopg2.connect(dsn, cursor_factory=extras.RealDictCursor)
>>> crs = cnx.cursor()
>>> crs.execute('select * from rigmarole')
>>> for qq in crs.fetchall():
... qq
...
{'nomad': 5, 'elph': 'pop'}
{'nomad': 7, 'elph': 'conundrum'}
{'nomad': 6, 'elph': 'spog'}
{'nomad': 7, 'elph': 'esor'}
{'nomad': 8, 'elph': 'stpr'}
>>> import json
>>> crs.scroll(0,'absolute')
>>> json.dumps(crs.fetchmany(3))
'[{"nomad": 5, "elph": "pop"}, {"nomad": 7, "elph": "conundrum"}, {"nomad": 5, "elph": "srge"}]'
>>> # casting json to text in the database
>>> crs.execute('''select json_agg(row_to_json(rig.*))::text p
... from (select * from rigmarole limit 3) as rig''')
>>> crs.fetchone()['p']
'[{"elph":"pop","nomad":5}, {"elph":"conundrum","nomad":7}, {"elph":"srge","nomad":5}]'
>>> # using json_agg to generate a batch of JSON
>>> crs.execute('select json_agg(row_to_json(rig.*)) p from (select * from rigmarole limit 3) as rig')
>>> crs.fetchone()['p']
[{u'nomad': 5, u'elph': u'pop'}, {u'nomad': 7, u'elph': u'conundrum'}, {u'nomad': 5, u'elph': u'srge'}]
>>> cnx.autocommit = True
>>> crs = cnx.cursor()
>>> crs.execute('select * from rigmarole')
>>> for records in crs.fetchall():
... records
...
{'nomad': 5, 'elph': 'srge'}
{'nomad': 6, 'elph': 'spog'}
{'nomad': 7, 'elph': 'esor'}
{'nomad': 8, 'elph': 'stpr'}
>>> # no cnx.autocommit, the default is False
>>> crs = cnx.cursor()
>>> crs.execute('select * from rigmarole')
>>> for records in crs.fetchall():
... records
...
{'nomad': 5, 'elph': 'srge'}
{'nomad': 6, 'elph': 'spog'}
{'nomad': 7, 'elph': 'esor'}
{'nomad': 8, 'elph': 'stpr'}
>>> import psycopg2
>>> from psycopg2 import extras
>>> dsn = 'postgres://rest:resting@54.208.29.106:5432/portent?application_name=myapp'
>>> cnx = psycopg2.connect(dsn, cursor_factory=extras.RealDictCursor)
>>> crs.execute('''UPDATE rigmarole
... SET elph = 'turvy'
... where nomad = 6 ''')
>>> crs.execute('''UPDATE rigmarole
... SET elph = 'topsy'
... WHERE nomad = 8 ''')
>>> crs.execute('''select * from rigmarole
... WHERE nomad in (6,8) ''')
>>> crs.execute('''select * from rigmarole
... WHERE nomad in (6,8) ''')
>>> for res in crs.fetchall():
... res
...
{'nomad': 6, 'elph': 'turvy'}
{'nomad': 8, 'elph': 'topsy'}
>>> for res in crs.fetchall():
... res
...
{'nomad': 6, 'elph': 'turvy'}
{'nomad': 8, 'elph': 'stpr'}
>>> cnx.commit()
>>> crs.execute('''select * from rigmarole
... WHERE nomad in (6,8) ''')
>>> crs.fetchall():
[{'nomad': 6, 'elph': 'turvy'},{'nomad': 8, 'elph': 'topsy'}]
pl/pythonu
pl/python
pl/python3u
pl/python2u
or
portent|l|# do /* must be super user to create functions or ACB's in untrusted languages */ $$
import requests # HTTP for humans
url = 'https://raw.githubusercontent.com/rhysallister/lessArcane/master/README.rst'
r = requests.get(url)
plpy.execute('DROP TABLE IF EXISTS contrived')
plpy.execute('CREATE TABLE contrived( line_number SERIAL, line_text TEXT)')
for qq in r.text.split('\n'):
plpy.execute("""insert into contrived (line_text) values ({0})""".format(plpy.quote_literal(qq)))
$$ language plpythonu ;
DO
Time: 65.634 ms
portent|l|# select * from contrived;
line_number | line_text
-------------+-----------------------------------------------------------------------------------
1 | lessArcane
2 | ===========
3 | With lessArcane you can share your gists dumped from gistbox
4 | as a browseable library.
5 | `lessArcane.js` is written in coffeescript and compiled to js.
6 |
7 | We edit the resulting output just slightly by setting the json equal to
8 | a variable called `data`. If you look at the first line of our `sample.json`
9 | you'll see what we did.
10 |
11 | The `sample.json` file is called by `index.html` and processed by `lessArcane.js`
12 |
(12 rows)
Time: 27.924 ms
portent|l|#
portent|l|# CREATE EXTENSION plpythonu; /* MUST BE SUPERUSER TO create */
CREATE EXTENSION
Time: 32.880 ms
CREATE OR REPLACE FUNCTION _.paste_to_table(tablename text, tabledata text, delim text DEFAULT r'\t'::text)
RETURNS text AS $function$
##################################################################
# paste_to_table(tablename,delimiter,text)
# Quick create and load data to a table.
# __________________________________________________________
# First row of data will be used as column names. If
# table has illegal column names (i.e. spaces or col-
# umns that begin with a number) they will be legali-
# zed. lindim is used to determine the linebreak
# __________________________________________________________
##################################################################
import re
lindim = '\n'
textarray = tabledata.splitlines()
# create table
plpy.notice("Creating table {tablename}".format(tablename=tablename))
plpy.execute("CREATE TABLE {tablename}()".format(tablename=tablename))
# add columns
for colname in textarray[0].split(delim):
plpy.execute("ALTER TABLE {tablename} ADD COLUMN {col_name} TEXT".\
format(tablename=tablename, col_name=re.sub('[ \t\#]+','_',colname).lower()))
# populate table
plpy.notice("populating table {tablename}".format(tablename=tablename))
for thisline in textarray[1:]:
goods = [plpy.quote_literal(x)for x in thisline.split(delim)]
plpy.execute("INSERT INTO {tablename} VALUES ({values})".\
format(tablename=tablename,values=', '.join(goods)))
return "Inserted {length} rows into {tablename}".format(length = len(textarray[1:]), tablename=tablename)
$function$ LANGUAGE 'plpython3u';
portent|l|> select paste_to_table('rae_tae_tae',
$$commercialdivision feedername phases Length
St. Mary Annotto Bay - 210 3 15380
St. Mary Annotto Bay - 210 2 145
St. Mary Annotto Bay - 210 1 39729
St. Mary Annotto Bay - 310 3 21323
St. James Bogue - 310 1 139774
St. James Bogue - 410 3 18205
St. James Bogue - 410 1 529
KSAS Cane River - 310 3 13759$$);
NOTICE: 00000: Creating table rae_tae_tae
CONTEXT: PL/Python function "paste_to_table"
LOCATION: PLy_output, plpy_plpymodule.c:537
NOTICE: 00000: populating table rae_tae_tae
CONTEXT: PL/Python function "paste_to_table"
LOCATION: PLy_output, plpy_plpymodule.c:537
paste_to_table
----------------------------------
Inserted 8 rows into rae_tae_tae
(1 row)
Time: 35.310 ms
portent|l|> select * from rae_tae_tae ;
commercialdivision | feedername | phases | length
--------------------+-------------------+--------+--------
St. Mary | Annotto Bay - 210 | 3 | 15380
St. Mary | Annotto Bay - 210 | 2 | 145
St. Mary | Annotto Bay - 210 | 1 | 39729
St. Mary | Annotto Bay - 310 | 3 | 21323
St. James | Bogue - 310 | 1 | 139774
St. James | Bogue - 410 | 3 | 18205
St. James | Bogue - 410 | 1 | 529
KSAS | Cane River - 310 | 3 | 13759
(8 rows)
Time: 28.871 ms
portent|l|>
The Tips you were promised
ROLES
Have separate roles for DDL & DML
portent|l|> CREATE ROLE appowner LOGIN CREATEROLE CREATEDB; -- this role owns all the tables.
CREATE ROLE
Time: 28.986 ms
portent|l|> CREATE ROLE app_reader LOGIN;
CREATE ROLE
Time: 28.536 ms
Only owners can delete or modify tables
Using a separate role to query the data reduces the risk significantly
Common
With queries
WITH list AS (
SELECT objectid, facilityid, g FROM livewire.ocdb
UNION ALL
SELECT objectid, facilityid, g from livewire.ocda
)
SELECT
d2.*,
st_distance(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),
facilityid,
FROM rhys.d2
LEFT JOIN list on
st_dwithin(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),2)
Table
Expression
SELECT
d2.*,
st_distance(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),
facilityid,
FROM rhys.d2
LEFT JOIN (
SELECT objectid, facilityid, g FROM livewire.ocdb
UNION ALL
SELECT objectid, facilityid, g from livewire.ocda)
AS list ON
st_dwithin(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),2)
LISTEN/Notify
portent|l|> select pg_notify('link_up','helooooo');
pg_notify
-----------
(1 row)
Time: 28.154 ms
Asynchronous notification "link_up" with payload "helooooo" received from server process with PID 10400.
portent|l|> listen link_up;
LISTEN
Time: 28.892 ms
portent|l|> notify link_up, 'chop';
NOTIFY
Time: 27.578 ms
Asynchronous notification "link_up" with payload "chop" received from server process with PID 10400.
import pgpubsub
uri = 'postgres://rest:resting@54.208.29.106:5432/portent'
pubsub = pgpubsub.connect(uri)
pubsub.listen('gimme_a_link')
for e in pubsub.events():
print(e)
import pgpubsub
pguri = 'postgres://rest:resting@54.208.29.106:5432/portent'
pubsub = pgpubsub.connect(pguri)
for qq in range(1,10):
pubsub.notify('boomobam','Notification #'+str(qq))
Its
portent|l|> select * from onek where amount = 66.11;
colour | amount | department | time_p
--------+--------+-------------+------------------------
Puce | 66.11 | Health | 2015-12-28 18:49:00-05
Pink | 66.11 | Electronics | 2015-12-26 01:58:00-05
(2 rows)
Time: 28.370 ms
About
Time
portent|l|> set timezone TO 'Asia/Saigon';
SET
Time: 27.744 ms
portent|l|> select * from onek where amount = 66.11;
colour | amount | department | time_p
--------+--------+-------------+------------------------
Puce | 66.11 | Health | 2015-12-29 06:49:00+07
Pink | 66.11 | Electronics | 2015-12-26 13:58:00+07
(2 rows)
Time: 28.303 ms
Use timestamptz
Stores data as UTC
99e9 problems but a BRIn ain't one
portent|l|> create index nuffnuff_normaall on nuffnuff (id);
CREATE INDEX
Time: 35510.815 ms
portent|l|> create index nuffnuff_brin on nuffnuff using brin (id);
CREATE INDEX
Time: 7005.565 ms
portent|l|> select count(*) from nuffnuff ;
count
----------
47000002
(1 row)
Time: 27290.434 ms
portent|l|> \di+ nuf*
List of relations
Schema | Name | Type | Owner | Table | Size | Description
--------+-------------------+-------+-------+----------+---------+-------------
public | nuffnuff_brin | index | rhys | nuffnuff | 64 kB |
public | nuffnuff_normaall | index | rhys | nuffnuff | 1007 MB |
(2 rows)
portent|l|> \dt+ nuffnuff*
List of relations
Schema | Name | Type | Owner | Size | Description
--------+----------+-------+-------+---------+-------------
public | nuffnuff | table | rhys | 1985 MB |
(1 row)
GIN *is* the chaser
Generalized
Inverted
Index
Useful for unstructured data:
-
Arrays
-
JSON
-
ts_vectors
JSON
The (not so new) Hype
portent|l|> select * from json_test ;
-[ RECORD 1 ]------------------------------------------------------------
id | 1
payload | {"menu": {"id": "file", "popup": {"menuitem": [{"value": "New",
| "onclick": "CreateNewDoc()"}, {"value": "Open","onclick": "OpenDoc()"},
| {"value": "Close", "onclick": "CloseDoc()"}]}, "value": "File"}}
(1 row)
Time: 28.140 ms
portent|l|> select jsonb_pretty(payload->'menu'->'popup'->'menuitem') from json_test ;
jsonb_pretty
-------------------------------------
[ +
{ +
"value": "New", +
"onclick": "CreateNewDoc()"+
}, +
{ +
"value": "Open", +
"onclick": "OpenDoc()" +
}, +
{ +
"value": "Close", +
"onclick": "CloseDoc()" +
} +
]
(1 row)
Time: 28.340 ms
JSON
The (not so new) Hype
portent|l|> select * from json_test ;
-[ RECORD 1 ]--------------------------------------------------------------------------------------
id | 1
payload | {"menu": {"id": "file", "popup": {"menuitem":
| [{"value": "New", "onclick": "CreateNewDoc()"}, {"value": "Open", "onclick": "OpenDoc()"},
| {"value": "Close", "onclick": "CloseDoc()"}]}, "value": "File"}}
(1 row)
Time: 28.140 ms
portent|l|> select jsonb_pretty(payload #> '{menu,popup,menuitem,2}') from json_test ;
jsonb_pretty
-----------------------------
{ +
"value": "Close", +
"onclick": "CloseDoc()"+
}
(1 row)
Time: 28.347 ms
JSON
The (not so new) Hype
portent|l|> select (row_to_json(foo.*))::jsonb - 'feedername' from
(select * from rae_tae_tae limit 5) as foo;
?column?
------------------------------------------------------------------------
{"length": "15380", "phases": "3", "commercialdivision": "St. Mary"}
{"length": "145", "phases": "2", "commercialdivision": "St. Mary"}
{"length": "39729", "phases": "1", "commercialdivision": "St. Mary"}
{"length": "21323", "phases": "3", "commercialdivision": "St. Mary"}
{"length": "139774", "phases": "1", "commercialdivision": "St. James"}
(5 rows)
Time: 28.510 ms
portent|l|> select payload #> '{menu,popup,menuitem,2}' from json_test ;
?column?
---------------------------------------------
{"value": "Close", "onclick": "CloseDoc()"}
(1 row)
Time: 30.006 ms
portent|l|> select json_agg(row_to_json(foo.*)) from (select * from rae_tae_tae) as foo;
-[ RECORD 1 ]-------------------------------------------------------------------------------------------------------------------------------------------
json_agg | [{"commercialdivision":"St. Mary","feedername":"Annotto Bay - 210","phases":"3","length":"15380"}, {"commercialdivision":"St. Mary","feedername":"Annotto Bay - 210","phases":"2","length":"145"}, {"commercialdivision":"St. Mary","feedername":"Annotto Bay - 210","phases":"1","length":"39729"}, {"commercialdivision":"St. Mary","feedername":"Annotto Bay - 310","phases":"3","length":"21323"}, {"commercialdivision":"St. James","feedername":"Bogue - 310","phases":"1","length":"139774"}, {"commercialdivision":"St. James","feedername":"Bogue - 410","phases":"3","length":"18205"}, {"commercialdivision":"St. James","feedername":"Bogue - 410","phases":"1","length":"529"}, {"commercialdivision":"KSAS","feedername":"Cane River - 310","phases":"3","length":"13759"}]
Time: 32.914 ms
Full
Text
Search
Ranking
Custom Dictionaries
GIST & GIN index support
Stemming
EXTENSIONS
Postgresql's
biggest
Feature
EXTENSIONS
Provides support for spatial types,
spatial queries and indexing.
EXTENSIONS
Citus
cstore_fdw
pg_shard
Postgres is pretty good at scaling up.
Add more memory and more cores and all will be well with the world.
EXTENSIONS
Foreign Data Wrappers
postgres_fdw |
redis_fdw |
mongo_fdw |
cstore_fdw |
file_fdw |
ogr_fdw |
EXTENSIONS
Roll your own FDW
multicorn
https://github.com/rotten/hue-multicorn-postgresql-fdw
EXTENSIONS
http://pgxn.org/
Questions?
deck
By rhysallister
deck
- 285