Large datasets on the Hasura console
1. Track a large number of tables and relationships from console
2. Postgres catalog query optimizations
Problem:
- Catalog query was slow for large schemas
- It affected Console loading time
- Related issue:
const runSql = `
SELECT
COALESCE(Json_agg(Row_to_json(info)), '[]' :: json) AS tables
FROM (
SELECT
pgn.nspname as table_schema,
pgc.relname as table_name,
case
when pgc.relkind = 'r' then 'TABLE'
when pgc.relkind = 'f' then 'FOREIGN TABLE'
when pgc.relkind = 'v' then 'VIEW'
when pgc.relkind = 'm' then 'MATERIALIZED VIEW'
when pgc.relkind = 'p' then 'PARTITIONED TABLE'
end as table_type,
obj_description(pgc.oid) AS comment,
COALESCE(json_agg(DISTINCT row_to_json(isc) :: jsonb || jsonb_build_object('comment', col_description(pga.attrelid, pga.attnum))) filter (WHERE isc.column_name IS NOT NULL), '[]' :: json) AS columns,
COALESCE(json_agg(DISTINCT row_to_json(ist) :: jsonb || jsonb_build_object('comment', obj_description(pgt.oid))) filter (WHERE ist.trigger_name IS NOT NULL), '[]' :: json) AS triggers,
row_to_json(isv) AS view_info
FROM pg_class as pgc
INNER JOIN pg_namespace as pgn
ON pgc.relnamespace = pgn.oid
/* columns */
/* This is a simplified version of how information_schema.columns was
** implemented in postgres 9.5, but modified to support materialized
** views.
*/
LEFT OUTER JOIN pg_attribute AS pga
ON pga.attrelid = pgc.oid
LEFT OUTER JOIN (
SELECT
current_database() AS table_catalog,
nc.nspname AS table_schema,
c.relname AS table_name,
a.attname AS column_name,
a.attnum AS ordinal_position,
pg_get_expr(ad.adbin, ad.adrelid) AS column_default,
CASE WHEN a.attnotnull OR (t.typtype = 'd' AND t.typnotnull) THEN 'NO' ELSE 'YES' END AS is_nullable,
CASE WHEN t.typtype = 'd' THEN
CASE WHEN bt.typelem <> 0 AND bt.typlen = -1 THEN 'ARRAY'
WHEN nbt.nspname = 'pg_catalog' THEN format_type(t.typbasetype, null)
ELSE 'USER-DEFINED' END
ELSE
CASE WHEN t.typelem <> 0 AND t.typlen = -1 THEN 'ARRAY'
WHEN nt.nspname = 'pg_catalog' THEN format_type(a.atttypid, null)
ELSE 'USER-DEFINED' END
END AS data_type,
CASE WHEN nco.nspname IS NOT NULL THEN current_database() END AS collation_catalog,
nco.nspname AS collation_schema,
co.collname AS collation_name,
CASE WHEN t.typtype = 'd' THEN current_database() ELSE null END AS domain_catalog,
CASE WHEN t.typtype = 'd' THEN nt.nspname ELSE null END AS domain_schema,
CASE WHEN t.typtype = 'd' THEN t.typname ELSE null END AS domain_name,
current_database() AS udt_catalog,
coalesce(nbt.nspname, nt.nspname) AS udt_schema,
coalesce(bt.typname, t.typname) AS udt_name,
a.attnum AS dtd_identifier,
CASE WHEN c.relkind = 'r' OR
(c.relkind IN ('v', 'f', 'p') AND
pg_column_is_updatable(c.oid, a.attnum, false))
THEN 'YES' ELSE 'NO' END AS is_updatable
FROM (pg_attribute a LEFT JOIN pg_attrdef ad ON attrelid = adrelid AND attnum = adnum)
JOIN (pg_class c JOIN pg_namespace nc ON (c.relnamespace = nc.oid)) ON a.attrelid = c.oid
JOIN (pg_type t JOIN pg_namespace nt ON (t.typnamespace = nt.oid)) ON a.atttypid = t.oid
LEFT JOIN (pg_type bt JOIN pg_namespace nbt ON (bt.typnamespace = nbt.oid))
ON (t.typtype = 'd' AND t.typbasetype = bt.oid)
LEFT JOIN (pg_collation co JOIN pg_namespace nco ON (co.collnamespace = nco.oid))
ON a.attcollation = co.oid AND (nco.nspname, co.collname) <> ('pg_catalog', 'default')
WHERE (NOT pg_is_other_temp_schema(nc.oid))
AND a.attnum > 0 AND NOT a.attisdropped AND c.relkind in ('r', 'v', 'm', 'f', 'p')
AND (pg_has_role(c.relowner, 'USAGE')
OR has_column_privilege(c.oid, a.attnum,
'SELECT, INSERT, UPDATE, REFERENCES'))
) AS isc
ON isc.table_schema = pgn.nspname
AND isc.table_name = pgc.relname
AND isc.column_name = pga.attname
/* triggers */
LEFT OUTER JOIN pg_trigger AS pgt
ON pgt.tgrelid = pgc.oid
LEFT OUTER JOIN information_schema.triggers AS ist
ON ist.event_object_schema = pgn.nspname
AND ist.event_object_table = pgc.relname
AND ist.trigger_name = pgt.tgname
/* This is a simplified version of how information_schema.views was
** implemented in postgres 9.5, but modified to support materialized
** views.
*/
LEFT OUTER JOIN (
SELECT
current_database() AS table_catalog,
nc.nspname AS table_schema,
c.relname AS table_name,
CASE WHEN pg_has_role(c.relowner, 'USAGE') THEN pg_get_viewdef(c.oid) ELSE null END AS view_definition,
CASE WHEN 'check_option=cascaded' = ANY (c.reloptions) THEN 'CASCADED'
WHEN 'check_option=local' = ANY (c.reloptions) THEN 'LOCAL'
ELSE 'NONE'
END AS check_option,
CASE WHEN pg_relation_is_updatable(c.oid, false) & 20 = 20 THEN 'YES' ELSE 'NO' END AS is_updatable,
CASE WHEN pg_relation_is_updatable(c.oid, false) & 8 = 8 THEN 'YES' ELSE 'NO' END AS is_insertable_into,
CASE WHEN EXISTS (SELECT 1 FROM pg_trigger WHERE tgrelid = c.oid AND tgtype & 81 = 81) THEN 'YES' ELSE 'NO' END AS is_trigger_updatable,
CASE WHEN EXISTS (SELECT 1 FROM pg_trigger WHERE tgrelid = c.oid AND tgtype & 73 = 73) THEN 'YES' ELSE 'NO' END AS is_trigger_deletable,
CASE WHEN EXISTS (SELECT 1 FROM pg_trigger WHERE tgrelid = c.oid AND tgtype & 69 = 69) THEN 'YES' ELSE 'NO' END AS is_trigger_insertable_into
FROM pg_namespace nc, pg_class c
WHERE c.relnamespace = nc.oid
AND c.relkind in ('v', 'm')
AND (NOT pg_is_other_temp_schema(nc.oid))
AND (pg_has_role(c.relowner, 'USAGE')
OR has_table_privilege(c.oid, 'SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER')
OR has_any_column_privilege(c.oid, 'SELECT, INSERT, UPDATE, REFERENCES'))
) AS isv
ON isv.table_schema = pgn.nspname
AND isv.table_name = pgc.relname
WHERE
pgc.relkind IN ('r', 'v', 'f', 'm', 'p')
${whereQuery}
GROUP BY pgc.oid, pgn.nspname, pgc.relname, table_type, isv.*
) AS info;
Columns information
SELECT
COALESCE(Json_agg(Json_build_object('table_schema', table_schema, 'table_name', table_name, 'columns', columns)), '[]'::json) AS columns
FROM (
SELECT
isc.table_schema,
isc.table_name,
COALESCE(json_agg(DISTINCT row_to_json(isc)::jsonb || jsonb_build_object ('comment', col_description(isc.attrelid, isc.ordinal_position))) FILTER (WHERE isc.column_name IS NOT NULL), '[]'::json) AS columns FROM (
SELECT
current_database() AS table_catalog,
nc.nspname AS table_schema,
c.relname AS table_name,
a.attname AS column_name,
a.attnum AS ordinal_position,
attrelid,
pg_get_expr(ad.adbin, ad.adrelid) AS column_default,
CASE WHEN a.attnotnull
OR(t.typtype = 'd'
AND t.typnotnull) THEN
'NO'
ELSE
'YES'
END AS is_nullable,
CASE WHEN t.typtype = 'd' THEN
CASE WHEN bt.typelem <> 0
AND bt.typlen = - 1 THEN
'ARRAY'
WHEN nbt.nspname = 'pg_catalog' THEN
format_type(t.typbasetype, NULL)
ELSE
'USER-DEFINED'
END
ELSE
CASE WHEN t.typelem <> 0
AND t.typlen = - 1 THEN
'ARRAY'
WHEN nt.nspname = 'pg_catalog' THEN
format_type(a.atttypid, NULL)
ELSE
'USER-DEFINED'
END
END AS data_type,
CASE WHEN nco.nspname IS NOT NULL THEN
current_database()
END AS collation_catalog,
nco.nspname AS collation_schema,
co.collname AS collation_name,
CASE WHEN t.typtype = 'd' THEN
current_database()
ELSE
NULL
END AS domain_catalog,
CASE WHEN t.typtype = 'd' THEN
nt.nspname
ELSE
NULL
END AS domain_schema,
CASE WHEN t.typtype = 'd' THEN
t.typname
ELSE
NULL
END AS domain_name,
current_database() AS udt_catalog,
coalesce(nbt.nspname, nt.nspname) AS udt_schema,
coalesce(bt.typname, t.typname) AS udt_name,
a.attnum AS dtd_identifier,
CASE WHEN c.relkind = 'r'
OR(c.relkind IN('v', 'f', 'p')
AND pg_column_is_updatable (c.oid, a.attnum, FALSE)) THEN
'YES'
ELSE
'NO'
END AS is_updatable FROM (pg_attribute a
LEFT JOIN pg_attrdef ad ON attrelid = adrelid
AND attnum = adnum)
JOIN (pg_class c
JOIN pg_namespace nc ON (c.relnamespace = nc.oid)) ON a.attrelid = c.oid
JOIN (pg_type t
JOIN pg_namespace nt ON (t.typnamespace = nt.oid)) ON a.atttypid = t.oid
LEFT JOIN (pg_type bt
JOIN pg_namespace nbt ON (bt.typnamespace = nbt.oid)) ON (t.typtype = 'd'
AND t.typbasetype = bt.oid)
LEFT JOIN (pg_collation co
JOIN pg_namespace nco ON (co.collnamespace = nco.oid)) ON a.attcollation = co.oid
AND(nco.nspname, co.collname) <> ('pg_catalog',
'default')
WHERE (NOT pg_is_other_temp_schema(nc.oid))
and(nc.nspname = 'public')
AND a.attnum > 0
AND NOT a.attisdropped
AND nc.nspname = 'public'
AND c.relkind in('r', 'v', 'm', 'f', 'p')
AND(pg_has_role(c.relowner, 'USAGE')
OR has_column_privilege(c.oid, a.attnum, 'SELECT, INSERT, UPDATE, REFERENCES'))) AS isc
GROUP BY
(1,
2)) AS columns;
Schema
information
SELECT
COALESCE(Json_agg(Row_to_json(info)), '[]'::json) AS tables
FROM (
SELECT
pgn.nspname AS table_schema,
pgc.relname AS table_name,
/* remove this */
CASE WHEN pgc.relkind = 'r' THEN
'TABLE'
WHEN pgc.relkind = 'f' THEN
'FOREIGN TABLE'
WHEN pgc.relkind = 'v' THEN
'VIEW'
WHEN pgc.relkind = 'm' THEN
'MATERIALIZED VIEW'
WHEN pgc.relkind = 'p' THEN
'PARTITIONED TABLE'
END AS table_type,
obj_description(pgc.oid) AS COMMENT,
COALESCE(json_agg(DISTINCT row_to_json(ist)::jsonb || jsonb_build_object ('comment', obj_description(pgt.oid))) FILTER (WHERE ist.trigger_name IS NOT NULL), '[]'::json) AS triggers,
row_to_json(isv) AS view_info FROM pg_class AS pgc
INNER JOIN pg_namespace AS pgn ON pgc.relnamespace = pgn.oid
/* triggers */
LEFT OUTER JOIN pg_trigger AS pgt ON pgt.tgrelid = pgc.oid
LEFT OUTER JOIN information_schema.triggers AS ist ON ist.event_object_schema = pgn.nspname
AND ist.event_object_table = pgc.relname
AND ist.trigger_name = pgt.tgname
/* This is a simplified version of how information_schema.views was↵ ** implemented in postgres 9.5, but modified to support materialized↵ ** views.↵ */
LEFT OUTER JOIN (
SELECT
current_database() AS table_catalog,
nc.nspname AS table_schema,
c.relname AS table_name,
CASE WHEN pg_has_role(c.relowner, 'USAGE') THEN
pg_get_viewdef(c.oid)
ELSE
NULL
END AS view_definition,
CASE WHEN 'check_option=cascaded' = ANY (c.reloptions) THEN
'CASCADED'
WHEN 'check_option=local' = ANY (c.reloptions) THEN
'LOCAL'
ELSE
'NONE'
END AS check_option,
CASE WHEN pg_relation_is_updatable (c.oid,
FALSE) & 20 = 20 THEN
'YES'
ELSE
'NO'
END AS is_updatable,
CASE WHEN pg_relation_is_updatable (c.oid,
FALSE) & 8 = 8 THEN
'YES'
ELSE
'NO'
END AS is_insertable_into,
CASE WHEN EXISTS (
SELECT
1 FROM pg_trigger WHERE tgrelid = c.oid
AND tgtype & 81 = 81) THEN
'YES'
ELSE
'NO'
END AS is_trigger_updatable,
CASE WHEN EXISTS (
SELECT
1 FROM pg_trigger WHERE tgrelid = c.oid
AND tgtype & 73 = 73) THEN
'YES'
ELSE
'NO'
END AS is_trigger_deletable,
CASE WHEN EXISTS (
SELECT
1 FROM pg_trigger WHERE tgrelid = c.oid
AND tgtype & 69 = 69) THEN
'YES'
ELSE
'NO'
END AS is_trigger_insertable_into FROM pg_namespace nc,
pg_class c WHERE c.relnamespace = nc.oid
AND c.relkind in('v', 'm')
AND(NOT pg_is_other_temp_schema(nc.oid))
AND(pg_has_role(c.relowner, 'USAGE')
OR has_table_privilege(c.oid, 'SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES, TRIGGER')
OR has_any_column_privilege(c.oid, 'SELECT, INSERT, UPDATE, REFERENCES'))) AS isv ON isv.table_schema = pgn.nspname
AND isv.table_name = pgc.relname WHERE pgc.relkind IN('r', 'v', 'f', 'm', 'p')
and(pgn.nspname = 'public')
GROUP BY
pgc.oid,
pgn.nspname,
pgc.relname,
table_type,
isv.*) AS info;
Results
Tables query: actual time=61.784..61.796
Columns query: actual time=183.003..242.486
Old query: actual time=1049.521..1049.534
}
~245ms
For a schema with about 120 tables
5x improvement 🚀
Thank you!
Community Call
By Aleksandra Sikora
Community Call
- 1,411