Misc.find_first_mono

Several places in the compiler have logic to "pick a fresh name" by starting from a prefix, say foo, and trying foo/0, foo/1, foo/2... until they find one that is "available" in some sense. This can easily be implemented through a linear search, but this gives a quadratic behavior when we repeatedly generate fresh names from the same prefix -- which can happen in practice in realistic example of source code repeating the same variable or constructor name many times. We introduce a function that finds the "first available number" in logarithmic time, assuming that the "unavailable numbers" form a strict prefix of all natural numbers. (That is: some numbers remain available, and all numbers after the first available numbers are available.) When called repeatedly on the same prefix, this gives a behavior in O(n log n) rather than O(n^2). Note: if called on availability predicates that are not monotonous (they become false again after being true), the function will not necessarily provide the smallest available number, but it should still provide an available number.
author: Gabriel Scherer <gabriel.scherer@gmail.com> 2023-02-23 13:23:11 +0100
committer: Gabriel Scherer <gabriel.scherer@gmail.com> 2023-02-28 11:11:56 +0100
commit: 0116a8f9c3afe7e36fb45126d3aa78607a8b2095 (patch)
tree: a9819e96c87596f5be928264cee6bfc685faa15f
parent: fd6e6e0c9cf636f69cc90f4590834f77efaa5b95 (diff)
download: ocaml-0116a8f9c3afe7e36fb45126d3aa78607a8b2095.tar.gz
4 files changed, 172 insertions, 0 deletions
diff --git a/testsuite/tests/utils/find_first_mono.ml b/testsuite/tests/utils/find_first_mono.ml
new file mode 100644
index 0000000000..80ea5e6255
--- /dev/null
+++ b/testsuite/tests/utils/find_first_mono.ml
@@ -0,0 +1,73 @@
+(* TEST
+include config
+include testing
+binary_modules = "config build_path_prefix_map misc"
+* bytecode
+*)
+
+let check_and_count_calls n =
+  let calls = ref 0 in
+  let res = Misc.find_first_mono (fun i -> incr calls; i >= n) in
+  assert (res = n);
+  !calls
+
+let report i =
+  let calls = check_and_count_calls i in
+  Printf.printf "%d: %d calls\n%!" i calls
+
+let () =
+  print_endline "Testing small values";
+  let small_tests = List.init 20 (fun i -> i) in
+  List.iter report small_tests;
+  print_newline ()
+
+let () =
+  print_endline "Testing around max_int/2";
+  (* we do not use [report] to show the test output, as the output
+     depends on the value of [max_int], so it is not portable across
+     integer sizes. *)
+  let median_tests =
+    List.map (fun i -> max_int / 2 + i) [-2; -1; 0; 1; 2] in
+  List.map check_and_count_calls median_tests |> ignore;
+  print_newline ()
+
+let () =
+  print_endline "Testing around max_int";
+  (* see above for why we do not use [report] *)
+  let max_int_tests =
+    List.map (fun i -> max_int + i) [-4; -3; -2; -1; 0] in
+  List.map check_and_count_calls max_int_tests |> ignore;
+  print_newline ()
+
+let () =
+  let n = 100 in
+  Printf.printf
+    "Showing predicate calls for find_first_mono to find n=%d\n%!" n;
+  let res =
+    Misc.find_first_mono (fun i -> Printf.printf "call on %d\n%!" i; i >= n) in
+  Printf.printf "result: %d\n%!" res;
+  print_newline ()
+
+let () =
+  print_endline "Test constantly-false predicates";
+  print_endline "Constantly-false predicates are outside the spec,\n\
+    but we ask them to return [max_int] rather than loop or crash.";
+  let res = Misc.find_first_mono (fun _ -> false) in
+  assert (res = max_int);
+  print_newline ()
+
+let () =
+  print_endline "Test non-monotonous predicates";
+  print_endline "Non-monotonous predicates are outside the spec,\n\
+    but we ask them to find a satisfying value rather than loop or crash\n\
+    as long as they are asymptotically true.";
+  (* We generate predicates that are true above 1000 but
+     are sometimes-true before that. *)
+  for n = 1 to 499 do
+    let pred i =
+      i >= 1000 || (i > n && i mod n = 0)
+    in
+    let res = Misc.find_first_mono pred in
+    assert (pred res)
+  done;
+  print_newline ()
diff --git a/testsuite/tests/utils/find_first_mono.reference b/testsuite/tests/utils/find_first_mono.reference
new file mode 100644
index 0000000000..7ade9e6528
--- /dev/null
+++ b/testsuite/tests/utils/find_first_mono.reference
@@ -0,0 +1,54 @@
+Testing small values
+0: 1 calls
+1: 2 calls
+2: 4 calls
+3: 4 calls
+4: 6 calls
+5: 6 calls
+6: 6 calls
+7: 6 calls
+8: 8 calls
+9: 8 calls
+10: 8 calls
+11: 8 calls
+12: 8 calls
+13: 8 calls
+14: 8 calls
+15: 8 calls
+16: 10 calls
+17: 10 calls
+18: 10 calls
+19: 10 calls
+
+Testing around max_int/2
+
+Testing around max_int
+
+Showing predicate calls for find_first_mono to find n=100
+call on 0
+call on 1
+call on 3
+call on 7
+call on 15
+call on 31
+call on 63
+call on 127
+call on 95
+call on 111
+call on 103
+call on 99
+call on 101
+call on 100
+result: 100
+
+Test constantly-false predicates
+Constantly-false predicates are outside the spec,
+but we ask them to return [max_int] rather than loop or crash.
+
+Test non-monotonous predicates
+Non-monotonous predicates are outside the spec,
+but we ask them to find a satisfying value rather than loop or crash
+as long as they are asymptotically true.
+
+
+All tests succeeded.
diff --git a/utils/misc.ml b/utils/misc.ml
index 54796af226..18fe9bfb1d 100644
--- a/utils/misc.ml
+++ b/utils/misc.ml
@@ -395,6 +395,32 @@ module Int_literal_converter = struct
   let nativeint s = cvt_int_aux s Nativeint.neg Nativeint.of_string
 end
 
+(* [find_first_mono p] assumes that there exists a natural number
+   N such that [p] is false on [0; N[ and true on [N; max_int], and
+   returns this N. (See misc.mli for the detailed specification.) *)
+let find_first_mono =
+  let rec find p ~low ~jump ~high =
+    (* Invariants:
+       [low, jump, high] are non-negative with [low < high],
+       [p low = false],
+       [p high = true]. *)
+    if low + 1 = high then high
+    (* ensure that [low + jump] is in ]low; high[ *)
+    else if jump < 1 then find p ~low ~jump:1 ~high
+    else if jump >= high - low then find p ~low ~jump:((high - low) / 2) ~high
+    else if p (low + jump) then
+      (* We jumped too high: continue with a smaller jump and lower limit *)
+      find p ~low:low ~jump:(jump / 2) ~high:(low + jump)
+    else
+      (* we jumped too low:
+         continue from [low + jump] with a larger jump *)
+      let next_jump = max jump (2 * jump) (* avoid overflows *) in
+      find p ~low:(low + jump) ~jump:next_jump ~high
+  in
+  fun p ->
+    if p 0 then 0
+    else find p ~low:0 ~jump:1 ~high:max_int
+
 (* String operations *)
 
 let chop_extensions file =
diff --git a/utils/misc.mli b/utils/misc.mli
index 0f77430c47..52e2bc5db6 100644
--- a/utils/misc.mli
+++ b/utils/misc.mli
@@ -307,6 +307,25 @@ module Int_literal_converter : sig
 
 end
 
+val find_first_mono : (int -> bool) -> int
+  (**[find_first_mono p] takes an integer predicate [p : int -> bool]
+     that we assume:
+     1. is monotonic on natural numbers:
+        if [a <= b] then [p a] implies [p b],
+     2. is satisfied for some natural numbers in range [0; max_int]
+        (this is equivalent to: [p max_int = true]).
+
+     [find_first_mono p] is the smallest natural number N that satisfies [p],
+     computed in O(log(N)) calls to [p].
+
+     Our implementation supports two cases where the preconditions on [p]
+     are not respected:
+     - If [p] is always [false], we silently return [max_int]
+       instead of looping or crashing.
+     - If [p] is non-monotonic but eventually true,
+       we return some satisfying value.
+  *)
+
 (** {1 String operations} *)
 
 val search_substring: string -> string -> int -> int
author	Gabriel Scherer <gabriel.scherer@gmail.com>	2023-02-23 13:23:11 +0100
committer	Gabriel Scherer <gabriel.scherer@gmail.com>	2023-02-28 11:11:56 +0100
commit	0116a8f9c3afe7e36fb45126d3aa78607a8b2095 (patch)
tree	a9819e96c87596f5be928264cee6bfc685faa15f
parent	fd6e6e0c9cf636f69cc90f4590834f77efaa5b95 (diff)
download	ocaml-0116a8f9c3afe7e36fb45126d3aa78607a8b2095.tar.gz