A Non-coder's Journey with Python and Postgresql

Postgresql tips for the Python Coder

Agenda

  • What is this?

  • Python outside Postgres

  • Python inside Postgres

  • Tips

What is this?

The world's most advanced open source database.

integer (big,small, regular)

bit

serial (big, small, regular)

box

boolean

bytea

character

character varying

cidr

date

double precision

inet

path

psycopg2

>>> import psycopg2
>>> cnx = psycopg2.connect('postgres://rest:resting@54.208.29.106:5432/portent')
>>> crs = cnx.cursor()
>>> crs.execute("SELECT 1 + 3, 2.776 * 5.12, 4 * 7::float, 'arbitrary text', '{19.0,0.98,12}'::float[]")
>>> for t in crs.fetchone():
...   print t,' is a ', type(t)
... 
4  is a  <type 'int'>
14.21312  is a  <class 'decimal.Decimal'>
28.0  is a  <type 'float'>
arbitrary text  is a  <type 'str'>
[19.0, 0.98, 12.0]  is a  <type 'list'>
>>> crs.execute('select generate_series(1,4), random()')
>>> crs.rowcount
4
>>> crs.fetchone()
(1, 0.733299259096384)
>>> crs.rownumber
1
>>> crs.scroll(-1)
>>> crs.rownumber
0
>>> crs.fetchone()
(1, 0.733299259096384)
>>> for qq in crs.fetchall():
...   qq
... 
(2, 0.428359337616712)
(3, 0.43456132337451)
(4, 0.561854084953666)
>>> crs.execute("insert into rigmarole VALUES ('pop',5)");
>>> crs.statusmessage
'INSERT 0 1'
>>> name = 'conundrum'
>>> num = 7
>>> crs.execute("insert into rigmarole VALUES (%s,%s)",(name, num));
>>> crs.statusmessage
'INSERT 0 1'
>>> crs.execute('select * from rigmarole')
>>> for qq in crs.fetchall():
...   qq
... 
('pop', 5)
('conundrum', 7)
>>> cnx.commit()  # <<<<<---- Don't forget to commit!!!!!!!
>>> qry = "insert into rigmarole (nomad, elph) values (%s, %s)"
>>> for q in range(5,9):
...   crs.execute(qry,(q,''.join(random.sample('postgres',4))))
... 
>>> crs.execute('select * from rigmarole')
>>> for qq in crs.fetchall():
...   qq
... 
('pop', 5)
('conundrum', 7)
('srge', 5)
('spog', 6)
('esor', 7)
('stpr', 8)
>>>
>>> import psycopg2
>>> from psycopg2 import extras
>>> dsn = 'postgres://rest:resting@54.208.29.106:5432/portent?application_name=myapp'
>>> cnx = psycopg2.connect(dsn, cursor_factory=extras.RealDictCursor)
>>> crs = cnx.cursor()
>>> crs.execute('select * from rigmarole')
>>> for qq in crs.fetchall():
...   qq
... 
{'nomad': 5, 'elph': 'pop'}
{'nomad': 7, 'elph': 'conundrum'}
{'nomad': 6, 'elph': 'spog'}
{'nomad': 7, 'elph': 'esor'}
{'nomad': 8, 'elph': 'stpr'}
>>> import json
>>> crs.scroll(0,'absolute')
>>> json.dumps(crs.fetchmany(3))
'[{"nomad": 5, "elph": "pop"}, {"nomad": 7, "elph": "conundrum"}, {"nomad": 5, "elph": "srge"}]'
>>> # casting json to text in the database
>>> crs.execute('''select json_agg(row_to_json(rig.*))::text p 
...     from (select * from rigmarole limit 3) as rig''')
>>> crs.fetchone()['p']
'[{"elph":"pop","nomad":5}, {"elph":"conundrum","nomad":7}, {"elph":"srge","nomad":5}]'
>>> # using json_agg to generate a batch of JSON 
>>> crs.execute('select json_agg(row_to_json(rig.*)) p from (select * from rigmarole limit 3) as rig')
>>> crs.fetchone()['p']
[{u'nomad': 5, u'elph': u'pop'}, {u'nomad': 7, u'elph': u'conundrum'}, {u'nomad': 5, u'elph': u'srge'}]
>>> cnx.autocommit = True
>>> crs = cnx.cursor()
>>> crs.execute('select * from rigmarole')
>>> for records in crs.fetchall():
...   records
... 
{'nomad': 5, 'elph': 'srge'}
{'nomad': 6, 'elph': 'spog'}
{'nomad': 7, 'elph': 'esor'}
{'nomad': 8, 'elph': 'stpr'}
>>> # no cnx.autocommit, the default is False
>>> crs = cnx.cursor()
>>> crs.execute('select * from rigmarole')
>>> for records in crs.fetchall():
...   records
... 
{'nomad': 5, 'elph': 'srge'}
{'nomad': 6, 'elph': 'spog'}
{'nomad': 7, 'elph': 'esor'}
{'nomad': 8, 'elph': 'stpr'}
>>> import psycopg2
>>> from psycopg2 import extras
>>> dsn = 'postgres://rest:resting@54.208.29.106:5432/portent?application_name=myapp'
>>> cnx = psycopg2.connect(dsn, cursor_factory=extras.RealDictCursor)
>>> crs.execute('''UPDATE rigmarole
...             SET elph = 'turvy'
...             where nomad = 6 ''')
>>> crs.execute('''UPDATE rigmarole
...             SET elph = 'topsy'
...             WHERE nomad = 8 ''')
>>> crs.execute('''select * from rigmarole
...             WHERE nomad in (6,8) ''')
>>> crs.execute('''select * from rigmarole
...             WHERE nomad in (6,8) ''')
>>> for res in crs.fetchall():
...   res
... 
{'nomad': 6, 'elph': 'turvy'}
{'nomad': 8, 'elph': 'topsy'}
>>> for res in crs.fetchall():
...   res
... 
{'nomad': 6, 'elph': 'turvy'}
{'nomad': 8, 'elph': 'stpr'}
>>> cnx.commit()
>>> crs.execute('''select * from rigmarole
...             WHERE nomad in (6,8) ''')
>>> crs.fetchall():
[{'nomad': 6, 'elph': 'turvy'},{'nomad': 8, 'elph': 'topsy'}]

pl/pythonu

pl/python

pl/python3u

pl/python2u

or

portent|l|# do /* must be super user to create functions or ACB's in untrusted languages */ $$ 
import requests  # HTTP for humans
url = 'https://raw.githubusercontent.com/rhysallister/lessArcane/master/README.rst'
r = requests.get(url)
plpy.execute('DROP TABLE IF EXISTS contrived')
plpy.execute('CREATE TABLE  contrived( line_number SERIAL, line_text TEXT)')
for qq in r.text.split('\n'):
  plpy.execute("""insert into contrived (line_text) values ({0})""".format(plpy.quote_literal(qq)))
$$ language plpythonu ;
DO
Time: 65.634 ms
portent|l|# select * from contrived;
 line_number |                                     line_text                                     
-------------+-----------------------------------------------------------------------------------
           1 | lessArcane
           2 | ===========
           3 | With lessArcane you can share your gists dumped from gistbox
           4 | as a browseable library.
           5 | `lessArcane.js` is written in coffeescript and compiled to js.
           6 | 
           7 | We edit the resulting output just slightly by setting the json equal to 
           8 | a variable called `data`. If you look at the first line of our `sample.json`
           9 | you'll see what we did.
          10 | 
          11 | The `sample.json` file is called by `index.html` and processed by `lessArcane.js`
          12 | 
(12 rows)

Time: 27.924 ms
portent|l|# 
portent|l|# CREATE EXTENSION plpythonu; /* MUST BE SUPERUSER TO create */
CREATE EXTENSION
Time: 32.880 ms
CREATE OR REPLACE FUNCTION _.paste_to_table(tablename text, tabledata text, delim text DEFAULT r'\t'::text)
  RETURNS text AS $function$

##################################################################
#	paste_to_table(tablename,delimiter,text)
#	Quick create and load data to a table.
#	__________________________________________________________
#	First row of data will be used as column names. If 
#	table has illegal column names (i.e. spaces or col-
#	umns that begin with a number) they will be legali-
#	zed. lindim is used to determine the linebreak
#	__________________________________________________________
##################################################################

import re
lindim = '\n'
textarray = tabledata.splitlines()
# create table
plpy.notice("Creating table {tablename}".format(tablename=tablename))
plpy.execute("CREATE TABLE {tablename}()".format(tablename=tablename))
# add columns
for colname in textarray[0].split(delim):
  plpy.execute("ALTER TABLE {tablename} ADD COLUMN {col_name} TEXT".\
                     format(tablename=tablename, col_name=re.sub('[ \t\#]+','_',colname).lower()))
# populate table
plpy.notice("populating table {tablename}".format(tablename=tablename))
for thisline in textarray[1:]:
  goods = [plpy.quote_literal(x)for x in thisline.split(delim)]
  plpy.execute("INSERT INTO {tablename} VALUES ({values})".\
                     format(tablename=tablename,values=', '.join(goods)))

return "Inserted {length} rows into {tablename}".format(length = len(textarray[1:]), tablename=tablename)

$function$ LANGUAGE 'plpython3u';
portent|l|> select paste_to_table('rae_tae_tae',
$$commercialdivision    feedername      phases  Length
St. Mary        Annotto Bay - 210       3       15380
St. Mary        Annotto Bay - 210       2       145
St. Mary        Annotto Bay - 210       1       39729
St. Mary        Annotto Bay - 310       3       21323
St. James       Bogue - 310     1       139774
St. James       Bogue - 410     3       18205
St. James       Bogue - 410     1       529
KSAS    Cane River - 310        3       13759$$);
NOTICE:  00000: Creating table rae_tae_tae
CONTEXT:  PL/Python function "paste_to_table"
LOCATION:  PLy_output, plpy_plpymodule.c:537
NOTICE:  00000: populating table rae_tae_tae
CONTEXT:  PL/Python function "paste_to_table"
LOCATION:  PLy_output, plpy_plpymodule.c:537
          paste_to_table          
----------------------------------
 Inserted 8 rows into rae_tae_tae
(1 row)

Time: 35.310 ms
portent|l|> select * from rae_tae_tae ;
 commercialdivision |    feedername     | phases | length 
--------------------+-------------------+--------+--------
 St. Mary           | Annotto Bay - 210 | 3      | 15380
 St. Mary           | Annotto Bay - 210 | 2      | 145
 St. Mary           | Annotto Bay - 210 | 1      | 39729
 St. Mary           | Annotto Bay - 310 | 3      | 21323
 St. James          | Bogue - 310       | 1      | 139774
 St. James          | Bogue - 410       | 3      | 18205
 St. James          | Bogue - 410       | 1      | 529
 KSAS               | Cane River - 310  | 3      | 13759
(8 rows)

Time: 28.871 ms
portent|l|> 

The Tips you were promised

ROLES

Have separate roles for DDL & DML

portent|l|> CREATE ROLE appowner LOGIN CREATEROLE CREATEDB; -- this role owns all the tables.
CREATE ROLE
Time: 28.986 ms
portent|l|> CREATE ROLE app_reader LOGIN;
CREATE ROLE
Time: 28.536 ms

Only owners can delete or modify tables

Using a separate role to query the data reduces the risk significantly

Common

With queries

WITH list AS (
    SELECT objectid, facilityid, g FROM livewire.ocdb
    UNION ALL
    SELECT objectid, facilityid, g from livewire.ocda
    )
SELECT 
  d2.*,
  st_distance(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),
  facilityid,
FROM rhys.d2
LEFT JOIN list on 
  st_dwithin(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),2)

Table

Expression

SELECT 
  d2.*,
  st_distance(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),
  facilityid,
FROM rhys.d2
LEFT JOIN (
    SELECT objectid, facilityid, g FROM livewire.ocdb
    UNION ALL
    SELECT objectid, facilityid, g from livewire.ocda) 
AS list ON
  st_dwithin(st_setsrid(st_makepoint(x1::int,y1::int),3448),g),2)

LISTEN/Notify

portent|l|> select pg_notify('link_up','helooooo');
 pg_notify 
-----------
 
(1 row)

Time: 28.154 ms
Asynchronous notification "link_up" with payload "helooooo" received from server process with PID 10400.
portent|l|> listen link_up;
LISTEN
Time: 28.892 ms
portent|l|> notify link_up, 'chop';
NOTIFY
Time: 27.578 ms
Asynchronous notification "link_up" with payload "chop" received from server process with PID 10400.
import pgpubsub
uri = 'postgres://rest:resting@54.208.29.106:5432/portent'
pubsub = pgpubsub.connect(uri)
pubsub.listen('gimme_a_link')
for e in pubsub.events():
    print(e)
import pgpubsub
pguri = 'postgres://rest:resting@54.208.29.106:5432/portent'
pubsub = pgpubsub.connect(pguri)
for qq in range(1,10):
    pubsub.notify('boomobam','Notification #'+str(qq))

Its

portent|l|> select * from onek where amount = 66.11;
 colour | amount | department  |         time_p         
--------+--------+-------------+------------------------
 Puce   |  66.11 | Health      | 2015-12-28 18:49:00-05
 Pink   |  66.11 | Electronics | 2015-12-26 01:58:00-05
(2 rows)

Time: 28.370 ms

About

Time

portent|l|> set timezone TO 'Asia/Saigon';
SET
Time: 27.744 ms
portent|l|> select * from onek where amount = 66.11;
 colour | amount | department  |         time_p         
--------+--------+-------------+------------------------
 Puce   |  66.11 | Health      | 2015-12-29 06:49:00+07
 Pink   |  66.11 | Electronics | 2015-12-26 13:58:00+07
(2 rows)

Time: 28.303 ms

Use timestamptz

Stores data as UTC

99e9 problems but a BRIn ain't one

portent|l|> create index nuffnuff_normaall on nuffnuff (id);
CREATE INDEX
Time: 35510.815 ms
portent|l|> create index nuffnuff_brin  on nuffnuff using brin (id);
CREATE INDEX
Time: 7005.565 ms
portent|l|> select count(*) from nuffnuff ;
  count   
----------
 47000002
(1 row)

Time: 27290.434 ms
portent|l|> \di+ nuf*
                               List of relations
 Schema |       Name        | Type  | Owner |  Table   |  Size   | Description 
--------+-------------------+-------+-------+----------+---------+-------------
 public | nuffnuff_brin     | index | rhys  | nuffnuff | 64 kB   | 
 public | nuffnuff_normaall | index | rhys  | nuffnuff | 1007 MB | 
(2 rows)
portent|l|> \dt+ nuffnuff*
                     List of relations
 Schema |   Name   | Type  | Owner |  Size   | Description 
--------+----------+-------+-------+---------+-------------
 public | nuffnuff | table | rhys  | 1985 MB | 
(1 row)

GIN *is* the chaser

Generalized

Inverted

Index

Useful for unstructured data:

  • Arrays

  • JSON

  • ts_vectors

JSON

The (not so new) Hype

portent|l|> select * from json_test ;
-[ RECORD 1 ]------------------------------------------------------------
id      | 1
payload | {"menu": {"id": "file", "popup": {"menuitem": [{"value": "New", 
        | "onclick": "CreateNewDoc()"}, {"value": "Open","onclick": "OpenDoc()"},
        | {"value": "Close", "onclick": "CloseDoc()"}]}, "value": "File"}}

(1 row)

Time: 28.140 ms
portent|l|> select jsonb_pretty(payload->'menu'->'popup'->'menuitem') from json_test ;
            jsonb_pretty             
-------------------------------------
 [                                  +
     {                              +
         "value": "New",            +
         "onclick": "CreateNewDoc()"+
     },                             +
     {                              +
         "value": "Open",           +
         "onclick": "OpenDoc()"     +
     },                             +
     {                              +
         "value": "Close",          +
         "onclick": "CloseDoc()"    +
     }                              +
 ]
(1 row)

Time: 28.340 ms

JSON

The (not so new) Hype

portent|l|> select * from json_test ;
-[ RECORD 1 ]--------------------------------------------------------------------------------------
id      | 1
payload | {"menu": {"id": "file", "popup": {"menuitem": 
        | [{"value": "New", "onclick": "CreateNewDoc()"}, {"value": "Open", "onclick": "OpenDoc()"},
        | {"value": "Close", "onclick": "CloseDoc()"}]}, "value": "File"}}

(1 row)

Time: 28.140 ms
portent|l|> select jsonb_pretty(payload #> '{menu,popup,menuitem,2}') from json_test ;
        jsonb_pretty         
-----------------------------
 {                          +
     "value": "Close",      +
     "onclick": "CloseDoc()"+
 }
(1 row)

Time: 28.347 ms

JSON

The (not so new) Hype

portent|l|> select (row_to_json(foo.*))::jsonb - 'feedername' from 
  (select * from rae_tae_tae  limit 5) as foo;                                                                                                                                   
                                ?column?                                
------------------------------------------------------------------------
 {"length": "15380", "phases": "3", "commercialdivision": "St. Mary"}
 {"length": "145", "phases": "2", "commercialdivision": "St. Mary"}
 {"length": "39729", "phases": "1", "commercialdivision": "St. Mary"}
 {"length": "21323", "phases": "3", "commercialdivision": "St. Mary"}
 {"length": "139774", "phases": "1", "commercialdivision": "St. James"}
(5 rows)

Time: 28.510 ms
portent|l|> select payload #> '{menu,popup,menuitem,2}' from json_test ; 
                  ?column?                   
---------------------------------------------
 {"value": "Close", "onclick": "CloseDoc()"}
(1 row)

Time: 30.006 ms
portent|l|> select json_agg(row_to_json(foo.*)) from (select * from rae_tae_tae) as foo;
-[ RECORD 1 ]-------------------------------------------------------------------------------------------------------------------------------------------
json_agg | [{"commercialdivision":"St. Mary","feedername":"Annotto Bay - 210","phases":"3","length":"15380"}, {"commercialdivision":"St. Mary","feedername":"Annotto Bay - 210","phases":"2","length":"145"}, {"commercialdivision":"St. Mary","feedername":"Annotto Bay - 210","phases":"1","length":"39729"}, {"commercialdivision":"St. Mary","feedername":"Annotto Bay - 310","phases":"3","length":"21323"}, {"commercialdivision":"St. James","feedername":"Bogue - 310","phases":"1","length":"139774"}, {"commercialdivision":"St. James","feedername":"Bogue - 410","phases":"3","length":"18205"}, {"commercialdivision":"St. James","feedername":"Bogue - 410","phases":"1","length":"529"}, {"commercialdivision":"KSAS","feedername":"Cane River - 310","phases":"3","length":"13759"}]

Time: 32.914 ms

Full

Text

Search

Ranking

Custom Dictionaries

GIST & GIN index support

Stemming

EXTENSIONS

Postgresql's

biggest

Feature

EXTENSIONS

Provides support for spatial types, 

spatial queries and indexing.

EXTENSIONS

Citus

cstore_fdw

pg_shard

Postgres is pretty good at scaling up.

Add more memory and more cores and all will be well with the world.

EXTENSIONS

Foreign Data Wrappers

postgres_fdw
redis_fdw
mongo_fdw
cstore_fdw
file_fdw
ogr_fdw

EXTENSIONS

Roll your own FDW

multicorn

https://github.com/rotten/hue-multicorn-postgresql-fdw

EXTENSIONS

http://pgxn.org/

Questions?

Made with Slides.com