DERIVING KNOWLEDGE FROM DATA
QUINN WILTON / @WILTON_QUINN
Welcome đź‘‹
- Hi, I'm Quinn!
 - Applied Researcher @ Fission
-
​Building an edge database for
local-first applications - "Building protocols for the future of the Internet"
Â
-
​Building an edge database for
-
@wilton_quinn on Twitter
- Please reach out if you want to talk about any of this!
COMPUTer PROGRAMMING today IS IN A STATE OF CRISIS
- Terry Winograd, Beyond Programming Languages (1979)
TO ACHIEVE A FUNDAMENTAL JUMP IN OUR PROGRAMMING CAPACITY,
WE NEED TO RETHINK WHAT WE ARE DOING FROM THE BEGINNING.
Ottmar Mergenthaler
Linotype Machine, 1889
Diagram of a Linotype machine
Linotype Keyboard
Fredric Brown
Unknown Worlds, 1942
Terry Winograd
"The Machine that Changed the World", 1992
Fallible $?X
Human $?X
Human Turing âś…
Greek Turing đźš«
Human Socrates âś…
Greek Socrates âś…
Core
Domain
Learned
Situational
Code
Data
Original French dialog
English translation
Prolog is an efficient programming language because it is a very stupid theorem prover.
- Richard O'Keefe
human(turing).
human(socrates).
greek(socrates).
fallible(X) :- human(X).
?- fallible(X), greek(X).
X = socrates.
Prolog
Datalog
1972
1988
1986
"Datalog" coined
1982
Chat-80
1977
Deductive Databases
Converting a Chat-80 query from English to Datalog
A Datalog rule, and corresponding SQL view
AI Winter, as interpreted by DALL-E 2
(Thank you @kipperrii!)
defmodule Example do
use Croline.DSL
defdatalog :networking do
input link(source, destination)
rule reachable(s, d), do: link(s, d)
rule reachable(s, d), do:
link(s, z) and
reachable(z, d)
rule in_cycle(n), do: reachable(n, n)
end
end
iex(1)> Example.networking(
facts: network,
query: "?- reachable(f, A)."
)
#MapSet<[
[A: :a],
[A: :b],
[A: :c],
[A: :d],
[A: :g],
[A: :h]
]>
iex(1)> Example.networking(
facts: network,
query: "?- in_cycle(N)."
)
#MapSet<[
[N: :a],
[N: :b],
[N: :c],
[N: :d]
]>
(Excerpt from a model of Rust's borrow checker)
defmodule Analysis do
use Croline.DSL
defdatalog :analysis do
input inst_name(inst_index, inst_name)
input inst_arg(inst_index, i, arg)
input fn_module(fn_index, mod)
input fn_name(fn_index, name)
input fn_arity(fn_index, arity)
input fn_entry_label(fn_index, entry_label)
end
end
defmodule Example do
def add(x, y) when is_integer(x) do
x + y
end
end
iex(1)> Decompiler.decompile(Example)
{Example, [
__info__: 1,
add: 2,
module_info: 0,
module_info: 1
],
[],
[
...
{:function, :add, 2, 9,
[
{:label, 8},
{:func_info, {:atom, Example}, {:atom, :add}, 2},
{:label, 9},
{:test, :is_integer, {:f, 8}, [x: 0]},
{:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
:return
]},
...
], 14}
defmodule Example do
def add(x, y) when is_integer(x) do
x + y
end
end
iex(1)> Decompiler.decompile(Example)
{Example, [
__info__: 1,
add: 2,
module_info: 0,
module_info: 1
],
[],
[
...
{:function, :add, 2, 9,
[
{:label, 8},
{:func_info, {:atom, Example}, {:atom, :add}, 2},
{:label, 9},
{:test, :is_integer, {:f, 8}, [x: 0]},
{:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
:return
]},
...
], 14}
[
{:fn_module, [1, Example]},
{:fn_name, [1, :add]},
{:fn_arity, [1, 2]},
{:fn_entry_label, [1, 9]},
{:inst_name, [21, :label]},
{:inst_arg, [21, 0, 8]},
{:inst_name, [22, :line]},
{:inst_name, [23, :func_info]},
{:inst_name, [24, :label]},
{:inst_arg, [24, 0, 9]},
{:inst_name, [25, :test]},
{:inst_arg, [25, 0, :is_integer]},
{:inst_arg, [25, 1, {:f, 8}]},
{:inst_arg, [25, 2, {:x, 0}]},
{:inst_name, [28, :gc_bif]},
{:inst_arg, [28, 0, :+]},
{:inst_arg, [28, 1, {:f, 0}]},
{:inst_arg, [28, 2, 2]},
{:inst_arg, [28, 3, [x: 0, x: 1]]},
{:inst_arg, [28, 4, {:x, 0}]},
{:inst_name, [29, :return]}
]
rule inst_exits(name), do: name == :return
rule inst_exits(name), do: name == :func_info
rule link(src, dest), do:
inst_name(src, name) and
!inst_exits(name) and
dest = src + 1
rule flow(src, dest), do: link(src, dest)
rule flow(src, dest), do:
flow(src, hop) and
link(hop, dest)
rule block_head(label, inst), do:
inst_name(inst, :label) and
inst_arg(inst, 0, label)
rule block_tail(label, inst), do:
block_head(label, head) and
flow(head, inst)
{:function, :add, 2, 9,
[
{:label, 8},
{:func_info, {:atom, Example}, {:atom, :add}, 2},
{:label, 9},
{:test, :is_integer, {:f, 8}, [x: 0]},
{:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
:return
]}
rule inst_exits(name), do: name == :return
rule inst_exits(name), do: name == :func_info
rule link(src, dest), do:
inst_name(src, name) and
!inst_exits(name) and
dest = src + 1
rule flow(src, dest), do: link(src, dest)
rule flow(src, dest), do:
flow(src, hop) and
link(hop, dest)
rule block_head(label, inst), do:
inst_name(inst, :label) and
inst_arg(inst, 0, label)
rule block_tail(label, inst), do:
block_head(label, head) and
flow(head, inst)
{:function, :add, 2, 9,
[
{:label, 8},
{:func_info, {:atom, Example}, {:atom, :add}, 2},
{:label, 9},
{:test, :is_integer, {:f, 8}, [x: 0]},
{:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
:return
]}
rule fn_reachable(fn_index, m, f, a), do:
fn_call(fn_index, m, f, a)
rule fn_reachable(fn_index, m, f, a), do:
fn_call(fn_index, c_m, c_f, c_a) and
fn(callee, c_m, c_f, c_a) and
fn_reachable(callee, m, f, a)
defmodule Example do
def foo(x) do
bar(x)
add(x, x)
end
def bar(x) do
identity(x)
end
def identity(x) do
x
end
def add(x, y) when is_integer(x) do
x + y
end
end
iex(1)> Analysis.run(Example,
query: "?- fn_name(I, foo), fn_reachable(I, M, F, A).")
)
#MapSet<[
[I: 3, M: Example, F: :add, A: 2],
[I: 3, M: Example, F: :bar, A: 1],
[I: 3, M: Example, F: :identity, A: 1]
]>
fact side_effectful(:erlang, :put, 2)
fact side_effectful(:erlang, :apply, 2)
fact side_effectful(:erlang, :apply, 3)
fact side_effectful(:erlang, :send, 2)
...
rule fn_impure(fn_index), do:
fn_reachable(fn_index, m, f, a) and
side_effectful(m, f, a)
defmodule Example do
def pure_a(x) do
pure_b(x)
end
def pure_b(x) do
x
end
def impure_a(x) do
impure_b(x)
end
def impure_b(x) do
send(self(), x)
end
end
iex(1)> Analysis.run(Example,
query: "?- fn_impure(I), fn_name(I, N).")
)
#MapSet<[
[I: 1, N: :impure_a],
[I: 2, N: :impure_b],
]>
Global growth of data volume, 2006-2020
(from "The Digital Universe in 2020)
With over 1.5 million publications per year and more than 50 million total peer-reviewed articles, the rate and volume of novel discoveries has surpassed our ability to fully utilize and understand what is known.
While all professional fields are subject to the effects of the “forgotten” or “uninferred” facts, the cost of the unknown known for healthcare providers is measured in human lives.
There will always be things we wish to say in our programs that in all known languages can only be said poorly.
- Alan Perlis
Deriving Knowledge from Data
By quinnwilton
Deriving Knowledge from Data
- 522