DERIVING KNOWLEDGE FROM DATA

QUINN WILTON / @WILTON_QUINN

Welcome đź‘‹

  • Hi, I'm Quinn!
     
  • Applied Researcher @ Fission
    • ​Building an edge database for
      local-first applications
    • "Building protocols for the future of the Internet"
       
  • @wilton_quinn on Twitter
    • Please reach out if you want to talk about any of this!

COMPUTer PROGRAMMING today IS IN A STATE OF CRISIS

- Terry Winograd, Beyond Programming Languages (1979)

TO ACHIEVE A FUNDAMENTAL JUMP IN OUR PROGRAMMING CAPACITY,
WE NEED TO RETHINK WHAT WE ARE DOING FROM THE BEGINNING.

Ottmar Mergenthaler

Linotype Machine, 1889

Diagram of a Linotype machine

Linotype Keyboard

Fredric Brown

Unknown Worlds, 1942

Terry Winograd

"The Machine that Changed the World", 1992

Fallible $?X

Human $?X

Human Turing âś…

Greek Turing đźš«

Human Socrates âś…

Greek Socrates âś…

Core

Domain

Learned

Situational

Code

Data

Original French dialog

English translation

Prolog is an efficient programming language because it is a very stupid theorem prover.

- Richard O'Keefe

human(turing).
human(socrates).
greek(socrates).

fallible(X) :- human(X).
?- fallible(X), greek(X).

X = socrates.

Prolog

Datalog

1972

1988

1986

"Datalog" coined

1982

Chat-80

1977

Deductive Databases

Converting a Chat-80 query from English to Datalog

A Datalog rule, and corresponding SQL view

AI Winter, as interpreted by DALL-E 2

(Thank you @kipperrii!)

defmodule Example do
  use Croline.DSL

  defdatalog :networking do
    input link(source, destination)

    rule reachable(s, d), do: link(s, d)
    rule reachable(s, d), do:
        link(s, z) and
        reachable(z, d)
        
    rule in_cycle(n), do: reachable(n, n)
  end
end
iex(1)> Example.networking(
  facts: network,
  query: "?- reachable(f, A)."
)
#MapSet<[
  [A: :a],
  [A: :b],
  [A: :c],
  [A: :d],
  [A: :g],
  [A: :h]
]>
iex(1)> Example.networking(
  facts: network,
  query: "?- in_cycle(N)."
)
#MapSet<[
  [N: :a],
  [N: :b],
  [N: :c],
  [N: :d]
]>

(Excerpt from a model of Rust's borrow checker)

defmodule Analysis do
  use Croline.DSL

  defdatalog :analysis do
    input inst_name(inst_index, inst_name)
    input inst_arg(inst_index, i, arg)

    input fn_module(fn_index, mod)
    input fn_name(fn_index, name)
    input fn_arity(fn_index, arity)
    input fn_entry_label(fn_index, entry_label)
  end
end
defmodule Example do
  def add(x, y) when is_integer(x) do
    x + y
  end
end
iex(1)> Decompiler.decompile(Example)
{Example, [
    __info__: 1,
    add: 2,
    module_info: 0,
    module_info: 1
 ],
 [],
 [
   ...
   {:function, :add, 2, 9,
    [
      {:label, 8},
      {:func_info, {:atom, Example}, {:atom, :add}, 2},
      {:label, 9},
      {:test, :is_integer, {:f, 8}, [x: 0]},
      {:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
      :return
    ]},
   ...
 ], 14}
defmodule Example do
  def add(x, y) when is_integer(x) do
    x + y
  end
end
iex(1)> Decompiler.decompile(Example)
{Example, [
    __info__: 1,
    add: 2,
    module_info: 0,
    module_info: 1
 ],
 [],
 [
   ...
   {:function, :add, 2, 9,
    [
      {:label, 8},
      {:func_info, {:atom, Example}, {:atom, :add}, 2},
      {:label, 9},
      {:test, :is_integer, {:f, 8}, [x: 0]},
      {:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
      :return
    ]},
   ...
 ], 14}
[
  {:fn_module, [1, Example]},
  {:fn_name, [1, :add]},
  {:fn_arity, [1, 2]},
  {:fn_entry_label, [1, 9]},
  {:inst_name, [21, :label]},
  {:inst_arg, [21, 0, 8]},
  {:inst_name, [22, :line]},
  {:inst_name, [23, :func_info]},
  {:inst_name, [24, :label]},
  {:inst_arg, [24, 0, 9]},
  {:inst_name, [25, :test]},
  {:inst_arg, [25, 0, :is_integer]},
  {:inst_arg, [25, 1, {:f, 8}]},
  {:inst_arg, [25, 2, {:x, 0}]},
  {:inst_name, [28, :gc_bif]},
  {:inst_arg, [28, 0, :+]},
  {:inst_arg, [28, 1, {:f, 0}]},
  {:inst_arg, [28, 2, 2]},
  {:inst_arg, [28, 3, [x: 0, x: 1]]},
  {:inst_arg, [28, 4, {:x, 0}]},
  {:inst_name, [29, :return]}
]
rule inst_exits(name), do: name == :return
rule inst_exits(name), do: name == :func_info
 
rule link(src, dest), do:
  inst_name(src, name) and
  !inst_exits(name) and
  dest = src + 1

rule flow(src, dest), do: link(src, dest)
rule flow(src, dest), do:
  flow(src, hop) and
  link(hop, dest)
  
rule block_head(label, inst), do:
  inst_name(inst, :label) and
  inst_arg(inst, 0, label)
      
rule block_tail(label, inst), do:
  block_head(label, head) and
  flow(head, inst)
{:function, :add, 2, 9,
    [
      {:label, 8},
      {:func_info, {:atom, Example}, {:atom, :add}, 2},
      {:label, 9},
      {:test, :is_integer, {:f, 8}, [x: 0]},
      {:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
      :return
    ]}
rule inst_exits(name), do: name == :return
rule inst_exits(name), do: name == :func_info
 
rule link(src, dest), do:
  inst_name(src, name) and
  !inst_exits(name) and
  dest = src + 1

rule flow(src, dest), do: link(src, dest)
rule flow(src, dest), do:
  flow(src, hop) and
  link(hop, dest)
  
rule block_head(label, inst), do:
  inst_name(inst, :label) and
  inst_arg(inst, 0, label)
      
rule block_tail(label, inst), do:
  block_head(label, head) and
  flow(head, inst)
{:function, :add, 2, 9,
    [
      {:label, 8},
      {:func_info, {:atom, Example}, {:atom, :add}, 2},
      {:label, 9},
      {:test, :is_integer, {:f, 8}, [x: 0]},
      {:gc_bif, :+, {:f, 0}, 2, [x: 0, x: 1], {:x, 0}},
      :return
    ]}
rule fn_reachable(fn_index, m, f, a), do:
  fn_call(fn_index, m, f, a)

rule fn_reachable(fn_index, m, f, a), do:
  fn_call(fn_index, c_m, c_f, c_a) and
  fn(callee, c_m, c_f, c_a) and
  fn_reachable(callee, m, f, a)
defmodule Example do
  def foo(x) do
    bar(x)
    add(x, x)
  end

  def bar(x) do
    identity(x)
  end

  def identity(x) do
    x
  end

  def add(x, y) when is_integer(x) do
    x + y
  end
end
iex(1)> Analysis.run(Example,
  query: "?- fn_name(I, foo), fn_reachable(I, M, F, A).")
)
#MapSet<[
  [I: 3, M: Example, F: :add, A: 2],
  [I: 3, M: Example, F: :bar, A: 1],
  [I: 3, M: Example, F: :identity, A: 1]
]>
fact side_effectful(:erlang, :put, 2)
fact side_effectful(:erlang, :apply, 2)
fact side_effectful(:erlang, :apply, 3)
fact side_effectful(:erlang, :send, 2)
... 
rule fn_impure(fn_index), do:
  fn_reachable(fn_index, m, f, a) and
  side_effectful(m, f, a)
defmodule Example do
  def pure_a(x) do
    pure_b(x)
  end

  def pure_b(x) do
    x
  end

  def impure_a(x) do
    impure_b(x)
  end

  def impure_b(x) do
    send(self(), x)
  end
end
iex(1)> Analysis.run(Example,
  query: "?- fn_impure(I), fn_name(I, N).")
)
#MapSet<[
  [I: 1, N: :impure_a],
  [I: 2, N: :impure_b],
]>

Global growth of data volume, 2006-2020
(from "The Digital Universe in 2020)

With over 1.5 million publications per year and more than 50 million total peer-reviewed articles, the rate and volume of novel discoveries has surpassed our ability to fully utilize and understand what is known.

While all professional fields are subject to the effects of the “forgotten” or “uninferred” facts, the cost of the unknown known for healthcare providers is measured in human lives.

There will always be things we wish to say in our programs that in all known languages can only be said poorly.

- Alan Perlis

Deriving Knowledge from Data

By quinnwilton

Deriving Knowledge from Data

  • 522