25 October 2018
Günce Kaya
guncekaya.blogspot.com
@gguncesi
8.4
9.1
9.2
9.3
9.4
9.5
9.6
10
11
Access external source without duplicated or replaced data
Clearer syntax for accessing remote tables
INSERT, UPDATE, DELETE statements on foreign tables (not all of them!)
Any data can be used, not only relational data.
Some FDWs support push down for particular clauses.
FDW executes queries in remote servers for particular clauses to reduce the amount of transferred data from external source. (WHERE pushdown, JOIN pushdown, etc.)
A foreign table has meta-table definition.
It helps to avoid running "SELECT * FROM table;"
A connection string only needs to be defined one time. FDW uses SERVER to do it.
Some FDWs support connection pooling.
artemis=# explain (analyze, verbose) select * from students limit 1;
QUERY PLAN
--------------------------------------------------------------------------------
---------------------------------------------
Limit (cost=100.00..100.04 rows=1 width=36) (actual time=19.554..19.567 rows=1
loops=1)
Output: id, name
-> Foreign Scan on public.students (cost=100.00..150.95 rows=1365 width=36)
(actual time=19.544..19.549 rows=1 loops=1)
Output: id, name
Remote SQL: SELECT id, name FROM public.students
Planning time: 4.123 ms
Execution time: 275.614 ms
(7 rows)
-- Create partition table
artemis=# CREATE TABLE all_class (i integer, random double precision)
PARTITION BY RANGE (i);
CREATE TABLE
-- Attach foreign tables to the partition table
artemis=# ALTER TABLE all_class ATTACH PARTITION pg10_class
FOR VALUES from (1) TO (200);
ALTER TABLE
artemis=# ALTER TABLE all_class ATTACH PARTITION pg10_class2
FOR VALUES from (200) TO (300);
ALTER TABLE
artemis=# select * from all_class ;
i | random
-----+--------------------
200 | 0.794649593532085
1 | 0.979228006210178
PostgreSQL
PostgreSQL
PostgreSQL
PostgreSQL
PostgreSQL
PostgreSQL
PostgreSQL
PostgreSQL
file_fdw
MySQL
artemis=# select ft.foreign_table_name,
fs.foreign_server_name,
fs.foreign_data_wrapper_name
from information_schema.foreign_tables ft inner join information_schema.foreign_servers fs
on fs.foreign_server_name = ft.foreign_server_name;
foreign_table_name | foreign_server_name | foreign_data_wrapper_name
--------------------+---------------------+---------------------------
file_class | csv | file_fdw
mysql_class | mysql_server | mysql_fdw
pg10_class | pg10_luna_server | postgres_fdw
(3 rows)
artemis=# \d+ all_class
Table "public.all_class"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
--------+------------------+-----------+----------+---------+---------+--------------+-------------
i | integer | | | | plain | |
random | double precision | | | | plain | |
Partition key: RANGE (i)
Partitions: file_class FOR VALUES FROM (600) TO (700),
mysql_class FOR VALUES FROM (500) TO (600),
pg10_class FOR VALUES FROM (1) TO (200)
artemis=# select a.i, a.random,
(case
when exists (select 1 from file_class where i=a.i) then 'file_class'
when exists (select 1 from pg10_class where i=a.i) then 'pg10_class'
else 'mysql_class'
end) as foreign_table_name
from all_class a ;
i | random | foreign_table_name
-----+--------------------+------------------------
13 | 0.979228006210178 | pg10_class
500 | 2.8 | mysql_class
1 | 4.6643 | file_class
(3 rows)
artemis=# explain (analyze, verbose) select avg(id) from students where id<10;
QUERY PLAN
--------------------------------------------------------------------------------
-----------------------------------------
Aggregate (cost=168.50..168.51 rows=1 width=32) (actual time=2.635..2.650 rows
=1 loops=1)
Output: avg(id)
-> Foreign Scan on public.students (cost=100.00..166.06 rows=975 width=4) (
actual time=2.477..2.547 rows=9 loops=1)
Output: id, name
Remote SQL: SELECT id FROM public.students WHERE ((id < 10))
Planning time: 0.240 ms
Execution time: 5.149 ms
(7 rows)
Aggregate Functions (PG 9.6)
Aggregate Functions (PG 10)
artemis=# explain (analyze, verbose) select avg(id) from students where id<10;
QUERY PLAN
--------------------------------------------------------------------------------
---------------
Foreign Scan (cost=102.44..149.03 rows=1 width=32) (actual time=3.001..3.003 r
ows=1 loops=1)
Output: (avg(id))
Relations: Aggregate on (public.students)
Remote SQL: SELECT avg(id) FROM public.students WHERE ((id < 10))
Planning time: 29.511 ms
Execution time: 18.558 ms
(6 rows)
Pushdown for JOIN statements and Sorting
artemis=# explain (analyze, verbose)
select * from class c1 inner join class c2 on c1.class_name=c2.class_name
order by c1.class_group limit 10;
QUERY PLAN
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-----------------------------------------------------------
Limit (cost=100.00..106.23 rows=10 width=32) (actual time=2.730..2.739 rows=1
0 loops=1)
Output: c1.class_name, c1.class_group, c2.class_name, c2.class_group, c1.cla
ss_group
-> Foreign Scan (cost=100.00..16223.03 rows=25878 width=32) (actual time=2
.728..2.733 rows=10 loops=1)
Output: c1.class_name, c1.class_group, c2.class_name, c2.class_group,
c1.class_group
Relations: (public.class c1) INNER JOIN (public.class c2)
Remote SQL: SELECT r1.class_name, r1.class_group, r2.class_name, r2.cl
ass_group FROM (public.class r1 INNER JOIN public.class r2 ON (((r1.class_name
= r2.class_name)))) ORDER BY r1.class_group ASC NULLS LAST
Planning time: 0.309 ms
Execution time: 4.753 ms
(8 rows)
CREATE FOREIGN TABLE <table name>
(col1 datatype, col2 datatype ...)
SERVER <server name>;
IMPORT FOREIGN SCHEMA <target schema>
from server <server name>
into <name of postgresql schema>;
CREATE EXTENSION postgres_fdw;
CREATE SERVER <server name>
FOREIGN DATA WRAPPER <fdw name>
OPTIONS(....);
CREATE SERVER test_server
FOREIGN DATA WRAPPER postgres_fdw
OPTIONS(host '192.168.111.1', port '5432');
CREATE USER MAPPING FOR <user>
server <server name>
options(....);
-- An example;
CREATE USER MAPPING FOR postgres
SERVER test_server
OPTIONS(user 'reporting', password 'secret');
The data types of the columns of the remote tables and foreign tables should have a similar structure.
RDBMS FDW
jdbc_fdw
firebird_fdw
informix_fdw
..
postgres_fdw IN-CORE
oracle_fdw
tds_fdw
mysql_fdw
odbc_fdw
sqlite_fdw
RDBMS FDW
jdbc_fdw
firebird_fdw
informix_fdw
..
postgres_fdw IN-CORE
oracle_fdw
tds_fdw
mysql_fdw
odbc_fdw
sqlite_fdw
RDBMS FDW
jdbc_fdw
firebird_fdw
informix_fdw
..
postgres_fdw IN-CORE
oracle_fdw
tds_fdw
mysql_fdw
odbc_fdw
sqlite_fdw
RDBMS FDW
jdbc_fdw
firebird_fdw
informix_fdw
..
postgres_fdw IN-CORE
oracle_fdw
tds_fdw
mysql_fdw
odbc_fdw
sqlite_fdw
RDBMS FDW
jdbc_fdw
firebird_fdw
informix_fdw
..
postgres_fdw IN-CORE
oracle_fdw
tds_fdw
mysql_fdw
odbc_fdw
sqlite_fdw
NoSQL FDW
File Wrappers
ldap_fdw
blackhole_fdw
twitter_fdw
..
Others..
Generating FDW is easy with Multicorn.
Any source can be your dataset in Postgres.
Multicorn makes it possible to use your source as Foreign Table to query in Postgres.
FileSystem Foreign Data Wrapper (
RST files can be a source.
File pattern is important.
Imap Foreign Data Wrapper (
LDAP Foreign Data Wrapper (
CSV Foreign Data Wrapper (
Use CSV files as a source for Foreign Tables.
RSS foreign data wrapper (
items: title, link, description, author, pubDate, source
Each FDW has its own class.
Use class names in CREATE SERVER.
For fsfdw:
Class: multicorn.fsfdw.FilesystemFdw
Source code: multicorn/fsfdw/__init__.py
main_directory/
2018/
01/
report1 - 2018-01-01.csv
report2 - 2018-02-01.csv
report3 - 2018-03-01.csv
report4 - 2018-03-05.csv
02/
report1 - 2018-01-01.csv
report2 - 2018-02-01.csv
2017/
01/
report1 - 2017-01-01.csv
report2 - 2017-02-01.csv
File Structure
main_directory/
2018/
01/
report1 - 2018-01-01.csv
report2 - 2018-02-01.csv
report3 - 2018-03-01.csv
report4 - 2018-03-05.csv
02/
report1 - 2018-01-01.csv
report2 - 2018-02-01.csv
2017/
01/
report1 - 2017-01-01.csv
report2 - 2017-02-01.csv
File Structure
Year | Month | Date | Report_name | file_name |
2018 | 01 | 2018-02-01 | report | report1 - 2018-01-01.csv |
Foreign Table
-- First step: Create Server!
artemis=# CREATE SERVER filesystem_srv foreign data wrapper multicorn options (
wrapper 'multicorn.fsfdw.FilesystemFdw');
CREATE SERVER
-- Second step: Create Foreign Table!
artemis=# CREATE FOREIGN TABLE fs_reports (
f_year INTEGER,
f_month INTEGER,
f_date TEXT,
f_report_name TEXT,
f_file_name TEXT,
f_content TEXT)
SERVER filesystem_srv OPTIONS(
root_dir '/var/lib/pgsql/fsfdw_files/',
pattern '{f_year}/{f_month}/{f_report_name} - {f_date}.csv',
filename_column 'f_file_name',
content_column 'f_content');
Options
/var/lib/pgsql/fsfdw_files/2018/01/report1 - 2018-01-01.csv
root_dir
f_year
f_date
f_report_name
f_month
f_file_name
The Result:
artemis=# select * from fs_reports limit 1;
f_year | f_month | f_date | f_report_name | f_file_name | f_content
--------+---------+------------+---------------+----------------------------------+--------------------------------
2018 | 1 | 2018-02-01 | report1 | 2018/01/report1 - 2018-02-01.csv | This file doesn't have content!+
| | | | |
(1 row)
Items: title, link, description, author, pubDate, source
Requirements
artemis=# CREATE SERVER rss_srv
foreign data wrapper multicorn
options (
wrapper 'multicorn.rssfdw.RssFdw');
ERROR: Error in python: ImportError
DETAIL: No module named lxml
artemis=# CREATE FOREIGN TABLE cnnrss (
title character varying,
image text,
link text,
guid text,
pubDate text,
"pubDate" text
) server rss_srv options (
url 'https://www.cnnturk.com/feed/rss/all/news');
CREATE FOREIGN TABLE
artemis=# CREATE SERVER rss_srv foreign data wrapper multicorn options (
wrapper 'multicorn.rssfdw.RssFdw');
CREATE SERVER
artemis=# select * from cnnrss offset 12 limit 1;
-[ RECORD 1 ]-----------------------------------------------------------------
title | Banvit - MHP Riesen: 89 -76
image | https://i.cnnturk.com/ps/cnnturk/75/720x490/5bbe45b3ae784917346b6c8d
link | https://www.cnnturk.com/spor/basketbol/banvit-mhp-riesen-89-76
guid | 5bbc4eaf61361f271c1ae628
pubdate |
pubDate | Wed, 10 Oct 2018 18:57:00 GMT
References
@gguncesi
guncekaya.blogspot.com