I asked ChatGPT 3.5 to generate a function that returns the maximal element of a binary tree of integers for the following OCaml definition of trees:
type 'a BinTree = | Leaf of 'a option | Node of 'a BinTree * 'a * 'a BinTree
ChatGPT returned the following code:
let rec maximal_element tree =
match tree with
| Leaf None -> failwith "Empty tree"
| Leaf (Some x) -> x
| Node (_, value, right) ->
let max_right = maximal_element right in
max value max_right
The code is obviously broken as it does not consider elements from the left side of the tree at all.
Now, let us imagine that we live in a utopia where it is a norm to write code in Coq, and we have to prove that our functions are correct. ChatGPT 3.5 gives the following Coq version of the code...
Inductive binTree (A : Type) : Type :=
| Leaf : option A -> binTree A
| Node : binTree A -> A -> binTree A -> binTree A.
Fixpoint maximal_element {A : Type} (tree : binTree A) : option A :=
match tree with
| Leaf _ None => None
| Leaf _ (Some x) => Some x
| Node _ _ v r =>
match maximal_element r with
| None => Some v
| Some max_right => Some (max v max_right)
end
end.
I needed a few queries to generate the following proposition: an element x is a member of the binary tree. It has four cases: members can be in the Leaf, in the value, or on either side recursively.
Inductive member_of {A : Type} : A -> binTree A -> Prop :=
| Member_Leaf: forall x , member_of x (Leaf A (Some x))
| Member_Value: forall x l r, member_of x (Node A l x r)
| Member_Left: forall x v l r, member_of x l -> member_of x (Node A l v r)
| Member_Right: forall x v l r, member_of x r -> member_of x (Node A l v r).
I needed a few queries to generate the following correctness lemmas: every element is less or equal to the maximal_element, and the maximal_element is a member of the tree. (The ChatGPT-generated proofs were nonsense -- Coq gave an error.)
Lemma maximal_element_corr (tree : binTree nat): (forall x m, member_of x tree -> maximal_element tree = Some m -> x <= m). Lemma maximal_element_member (tree : binTree nat) : forall m, maximal_element tree = Some m -> member_of m tree.
As the maximal_element function is incorrect, we will not be able to prove it.
We must fix maximal_element first. After several prompts to ChatGPT, we were able to generate the following functions.
Fixpoint maximal_element (tree : binTree nat) : option nat :=
match tree with
| Leaf (Some x) => Some x
| Node _ l v r =>
match (maximal_element l), (maximal_element r) with
| Some max_l, Some max_r => Some (max (max v max_l) max_r)
| Some max_l, None => Some (max v max_l)
| None, Some max_r => Some (max v max_r)
| None, None => Some v
end
| Leaf None => None
end.
A future AI version might be able to generate the following hand-crafted proof -- which means that the maximal_element function is now actually (partially) correct.
Lemma maximal_element_member (tree : binTree nat) :
forall m, maximal_element tree = Some m -> member_of m tree.
Proof.
induction tree; cbn; intros m H.
- destruct o; inversion H. apply Member_Leaf.
- destruct (maximal_element tree1) eqn:E1.
-- destruct (maximal_element tree2) eqn:E2; inv_clear H.
--- edestruct (max_spec (max a n) n0); destruct H;
edestruct (max_spec a n); destruct H1;
try rewrite H; try rewrite H1; auto; (constructor; auto;fail).
--- edestruct (max_spec a n); destruct H;
try rewrite H; try rewrite H; auto; (constructor; auto;fail).
-- destruct (maximal_element tree2) eqn:E2; inv_clear H.
--- edestruct (max_spec a n); destruct H;
try rewrite H; try rewrite H1; auto; (constructor; auto;fail).
--- constructor.
Qed.
Lemma max_none t x: maximal_element t = None -> member_of x t -> False.
Proof.
induction t; cbn; intros.
+ destruct o; [inversion H| inversion H0].
+ destruct (maximal_element t1) eqn:E1.
++ destruct (maximal_element t2); inversion H.
++ destruct (maximal_element t2); inversion H.
Qed.
Lemma maximal_element_corr (tree : binTree nat):
(forall x m, member_of x tree -> maximal_element tree = Some m -> x <= m).
Proof.
induction tree; cbn; intros x m M H.
- destruct o; inversion H. inversion M. lia.
- destruct (maximal_element tree1) eqn:E1.
-- destruct (maximal_element tree2) eqn:E2; inv_clear H.
--- edestruct (max_spec (max a n) n0); destruct H;
edestruct (max_spec a n); destruct H1;
try rewrite H; try rewrite H1;
inv_clear M; try lia;
try eapply IHtree1 in H5; auto; try lia;
try eapply IHtree2 in H5; auto; try lia.
--- edestruct (max_spec a n); destruct H;
try rewrite H; try rewrite H1;
inv_clear M; try lia;
try eapply IHtree1 in H3; auto; try lia;
eapply max_none in E2; eauto; inversion E2.
-- destruct (maximal_element tree2) eqn:E2; inv_clear H.
--- edestruct (max_spec a n); destruct H;
try rewrite H; inv_clear M; auto;
try eapply IHtree2 in H3; auto; try lia;
eapply max_none in E1; eauto; inversion E1.
--- inv_clear M; try lia.
+ eapply max_none in E1; eauto; inversion E1.
+ eapply max_none in E2; eauto; inversion E2.
Qed.