module LowStar

module B = LowStar.Buffer

open FStar.HyperStack.ST
open LowStar.BufferOps

F* のサブセットとしての Low*

この 論文 で形式化されている Low* は一階ラムダ計算です。 基本型はブールと固定幅整数です。 Low* は buffers とバッファ境界中におけるポインタ演算のプリミティブを持っています。 この形式化では、バッファ中に確保された場合のみ、構造体は有効です。

この章では有効と無効な例を使って Low* を説明します。 F* のサブセットが有効な Low* どのように構成するのか、読者に把握してもらうことが狙いです。

いくつかの有効な Low* の例

Low* の基本型は機種依存整数です。

let square (x: UInt32.t): UInt32.t =
  let open FStar.UInt32 in
  x *%^ x
uint32_t square(uint32_t x)
{
  return x * x;
}

古典的なコントロールフローは当然サポートされています。 再帰関数を使いたいかもしれません。 近代の GCC は末尾再帰最適化を行ないます。

let abs (x: Int32.t): Pure Int32.t
  (requires Int32.v x <> Int.min_int 32)
  (ensures fun r -> Int32.v r >= 0)
=
  let open FStar.Int32 in
  if x >=^ 0l then
    x
  else
    0l -^ x
int32_t abs(int32_t x)
{
  if (x >= (int32_t)0)
    return x;
  else
    return (int32_t)0 - x;
}

Low* は次の The buffer library のようなスタック確保もモデル化します。

let on_the_stack (): Stack UInt64.t (fun _ -> True) (fun _ _ _ -> True) =
  push_frame ();
  let b = B.alloca 0UL 64ul in
  b.(0ul) <- 32UL;
  let r = b.(0ul) in
  pop_frame ();
  r
uint64_t on_the_stack()
{
  uint64_t b[64U] = { 0U };
  b[0U] = (uint64_t)32U;
  uint64_t r = b[0U];
  return r;
}

同様に Low* はヒープ確保もサポートします。

let on_the_heap (): St UInt64.t =
  let b = B.malloc HyperStack.root 0UL 64ul in
  b.(0ul) <- 32UL;
  let r = b.(0ul) in
  B.free b;
  r
uint64_t on_the_heap()
{
  uint64_t *b = KRML_HOST_CALLOC((uint32_t)64U, sizeof (uint64_t));
  b[0U] = (uint64_t)32U;
  uint64_t r = b[0U];
  KRML_HOST_FREE(b);
  return r;
}

フラットなレコードは先の論文で形式化されていて、それはC言語の struct に変換されます。

type uint128 = {
  low: UInt64.t;
  high: UInt64.t
}
typedef struct uint128_s
{
  uint64_t low;
  uint64_t high;
}
uint128;

先の論文では構造体はバッファ中に確保されています。

let uint128_alloc (h l: UInt64.t): St (B.buffer uint128) =
  B.malloc HyperStack.root ({ low = l; high = h }) 1ul
uint128 *uint128_alloc(uint64_t h, uint64_t l)
{
  KRML_CHECK_SIZE(sizeof (uint128), (uint32_t)1U);
  uint128 *buf = KRML_HOST_MALLOC(sizeof (uint128));
  buf[0U] = ((uint128){ .low = l, .high = h });
  return buf;
}

また先の論文では、バッファのインデックスにアクセスでき、フィールドも選択できます。

let uint128_high (x: B.buffer uint128): Stack UInt64.t
  (requires fun h -> B.live h x /\ B.length x = 1)
  (ensures fun h0 _ h1 -> B.live h1 x)
=
  (x.(0ul)).high
uint64_t uint128_high(uint128 *x)
{
  return x->high;
}

C言語が定数を評価するのと同じようにグローバル定数を定義したいかもしれません。 概要としては、算術式と他のグローバルなアドレスはC言語の定数です。 しかし常に C11 標準 が真実です。

let min_int32 = FStar.Int32.(0l -^ 0x7fffffffl -^ 1l)
// Meta-evaluated by F*
int32_t min_int32 = (int32_t)-2147483648;

Low* におけるいくつかの拡張

KreMLin は先の論文における形式化を超えていくつかのプログラミングパターンをサポートしています。 それらはプログラマの生産性を最大化させます。 ここでその主要なものを見てみましょう。

KreMLin は F* 構造的な等価性 ((=) 演算子) をそれぞれの型に特化したC言語関数にコンパイルできます。 さらに、以下の関数は構造体の型を値として使うことを示しています。 この関数は値を渡されるC言語構造体に素直にコンパイルされます。 パフォーマンスへの影響に気をつけてください (?? を参照)。

let uint128_equal (x y: uint128) =
  x = y
static bool __eq__LowStar_uint128(uint128 y, uint128 x)
{
  return true && x.low == y.low && x.high == y.high;
}

bool uint128_equal(uint128 x, uint128 y)
{
  return __eq__LowStar_uint128(x, y);
}

F* の帰納を使いたいかもしれません。 KreMLin はそれら帰納をタグ付き共用体にコンパイルします。

noeq
type key =
  | Algorithm1: B.buffer UInt32.t -> key
  | Algorithm2: B.buffer UInt64.t -> key
typedef enum { Algorithm1, Algorithm2 } key_tags;

typedef struct key_s
{
  key_tags tag;
  union {
    uint32_t *case_Algorithm1;
    uint64_t *case_Algorithm2;
  }
  ;
}
key;

Generally, KreMLin performs a whole-program monomorphization of parameterized data types. The example below demonstrates this, along with a “pretty” compilation scheme for the option type that does not involves an anonymous union.

let abs2 (x: Int32.t): option Int32.t =
  let open FStar.Int32 in
  if x = min_int32 then
    None
  else if x >=^ 0l then
    Some x
  else
    Some (0l -^ x)
typedef enum { FStar_Pervasives_Native_None, FStar_Pervasives_Native_Some }
FStar_Pervasives_Native_option__int32_t_tags;

typedef struct FStar_Pervasives_Native_option__int32_t_s
{
  FStar_Pervasives_Native_option__int32_t_tags tag;
  int32_t v;
}
FStar_Pervasives_Native_option__int32_t;

FStar_Pervasives_Native_option__int32_t abs2(int32_t x)
{
  if (x == min_int32)
    return ((FStar_Pervasives_Native_option__int32_t){ .tag = FStar_Pervasives_Native_None });
  else if (x >= (int32_t)0)
    return
      ((FStar_Pervasives_Native_option__int32_t){ .tag = FStar_Pervasives_Native_Some, .v = x });
  else
    return
      (
        (FStar_Pervasives_Native_option__int32_t){
          .tag = FStar_Pervasives_Native_Some,
          .v = (int32_t)0 - x
        }
      );
}

Inductives are compiled by KreMLin, and so are pattern matches. Note that for a series of cascading if-then-elses, KreMLin has to insert a fallback else statement, both because the original F* code may be unverified and the pattern-matching may be incomplete, but also because the C compiler may trigger an error.

let fail_if #a #b (package: a * (a -> option b)): St b =
  let open C.Failure in
  let open C.String in
  let x, f = package in
  match f x with
  | None -> failwith !$"invalid argument: fail_if"
  | Some x -> x
int32_t
fail_if__int32_t_int32_t(
  K___int32_t_int32_t____FStar_Pervasives_Native_option__int32_t package
)
{
  int32_t x = package.fst;
  FStar_Pervasives_Native_option__int32_t (*f)(int32_t x0) = package.snd;
  FStar_Pervasives_Native_option__int32_t scrut = f(x);
  if (scrut.tag == FStar_Pervasives_Native_None)
    return C_Failure_failwith__int32_t("invalid argument: fail_if");
  else if (scrut.tag == FStar_Pervasives_Native_Some)
  {
    int32_t x1 = scrut.v;
    return x1;
  }
  else
  {
    KRML_HOST_PRINTF("KreMLin abort at %s:%d\n%s\n",
      __FILE__,
      __LINE__,
      "unreachable (pattern matches are exhaustive in F*)");
    KRML_HOST_EXIT(255U);
  }
}

Higher order is, to a certain extent, possible. The sample above demonstrates a block-scope function pointer. The fail_if function has been specialized on K__int32_t_int32_t, which is itself a specialization of the polymorphic pair type of F*. Below is a sample caller of fail_if__int32_t_int32_t, which relies on passing a pair of a function pointer and its argument.

let abs3 (x: Int32.t): St Int32.t =
  fail_if (x, abs2)
int32_t abs3(int32_t x)
{
  return
    fail_if__int32_t_int32_t((
        (K___int32_t_int32_t____FStar_Pervasives_Native_option__int32_t){ .fst = x, .snd = abs2 }
      ));
}

Local closures are not supported, as they do not have a natural compilation scheme to C. We will, however, show in (??) how to rely on F*’s meta-programming capabilities to normalize these closures away before passing them to KreMLin.

let pow4 (x: UInt32.t): UInt32.t =
  let open FStar.UInt32 in
  [@ inline_let ]
  let pow2 (y: UInt32.t) = y *%^ y in
  pow2 (pow2 x)
uint32_t pow4(uint32_t x)
{
  uint32_t x0 = x * x;
  return x0 * x0;
}

In the case that the user defines a global variable that does not compile to a C11 constant, KreMLin generates a “static initializer” in the special kremlinit_globals function. If the program has a main, KreMLin automatically prepends a call to kremlinit_globals in the main. If the program does not have a main and is intended to be used as a library, KreMLin emits a warning, which is fatal by default.

let uint128_zero (): Tot uint128 =
  { high = 0UL; low = 0UL }

let zero = uint128_zero ()
$ krml -skip-linking -no-prefix LowStar LowStar.fst
(...)
Warning 9: : Some globals did not compile to C values and must be
initialized before starting main(). You did not provide a main function,
so users of your library MUST MAKE SURE they call kremlinit_globals();
(see kremlinit.c).

$ cat kremlinit.c
(...)
void kremlinit_globals()
{
  zero = uint128_zero();
}

Some non-Low* code

We now review some classic programming patterns that are not supported in Low*.

The example below cannot be compiled for the following reasons:

  • local recursive let-bindings are not Low*;
  • local closure captures variable in scope (KreMLin does not do closure conversion)
  • the list type is not Low*.
let filter_map #a #b (f: a -> option b) (l: list a): list b =
  let rec aux (acc: list b) (l: list a): Tot (list b) (decreases l) =
    match l with
    | hd :: tl ->
        begin match f hd with
        | Some x -> aux (x :: acc) tl
        | None -> aux acc tl
        end
    | [] ->
        List.rev acc
  in
  aux [] l

Trying to compile the snippet above will generate a warning when calling F* to generate a .krml file.

$ krml -skip-compilation -verbose LowStar.fst
⚙ KreMLin auto-detecting tools.
(...)
✔ [F*,extract]
<dummy>(0,0-0,0): (Warning 250) Error while extracting LowStar.filter_map
to KreMLin (Failure("Internal error: name not found aux\n"))

To explain why the list type cannot be compiled to C, consider the snippet below. Data types are compiled as flat structures in C, meaning that the list type would have infinite size in C. This is compiled by KreMLin but rejected by the C compiler. See ?? for an example of a linked list.

type list_int32 =
| Nil: list_int32
| Cons: hd:Int32.t -> tl:list_int32 -> list_int32

let mk_list (): St list_int32 =
  Cons 0l Nil

Trying to compile the snippet above will generate an error when calling the C compiler to generate a .o file.

$ krml -skip-linking -verbose LowStar.fst
⚙ KreMLin auto-detecting tools.
(...)
✘ [CC,./LowStar.c]
In file included from ./LowStar.c:8:0:
./LowStar.h:95:22: error: field ‘tl’ has incomplete type
   LowStar_list_int32 tl;

Polymorphic assumes are also not compiled. KreMLin could generate one C extern declaration per monomorphic use, but this would require the user to provide a substantial amount of manually-written code, so instead we refuse to compile the definition below.

// Cannot be compiled: polymorphic assume val; solution: make the function
// monomorphic, or provide a C macro
assume val pair_up: #a:Type -> #b:Type -> a -> b -> a * b

Trying to compile the snippet above will generate a warning when calling F* to generate a .krml file.

$ krml -skip-compilation -verbose LowStar.fst
⚙ KreMLin auto-detecting tools.
(...)
✔ [F*,extract]
Not extracting LowStar.pair_up to KreMLin (polymorphic assumes are not supported)

One point worth mentioning is that indexed types are by default not supported. See section ?? for an unofficial KreMLin extension that works in some very narrow cases, or rewrite your code to make t an inductive. KreMLin currently does not have support for untagged unions, i.e. automatically making t a C union.

type alg = | Alg1 | Alg2
let t (a: alg) =
  match a with
  | Alg1 -> UInt32.t
  | Alg2 -> uint128

let default_t (a: alg): t a =
  match a with
  | Alg1 -> 0ul
  | Alg2 -> zero

Trying to compile the snippet above will generate invalid C code.

void *default_t(alg a)
{
  switch (a)
  {
    case Alg1:
      {
        return (void *)(uint32_t)0U;
      }
    case Alg2:
      {
        return (void *)zero
      }
    default:
      {
        KRML_HOST_PRINTF("KreMLin incomplete match at %s:%d\n", __FILE__, __LINE__);
        KRML_HOST_EXIT(253U);
      }
  }
}

If you are lucky, the C compiler may generate an error:

$ krml -skip-linking LowStar.fst -add-include '"kremstr.h"' -no-prefix LowStar -warn-error +9

✘ [CC,./LowStar.c]
./LowStar.c: In function ‘default_t’:
./LowStar.c:291:9: error: cannot convert to a pointer type
         return (void *)zero;

The Low* libraries

Low* is made up of a few primitive libraries that enjoy first-class support in KreMLin. These core libraries are typically made up of a model (an .fst file) and an interface (an .fsti file). Verification is performed against the model, but at extraction-time, the model is replaced with primitive C constructs.

The memory model

Beyond the language subset, one defining component of Low* is how it models the C memory.

The F* HyperHeap model

F* is by default equipped with HyperHeap, a hierarchical memory model that divides the heap into a tree of regions. This coarse-grained separation allows the programmer to state modifies clauses at the level of regions, rather than on individual references.

The HyperHeap memory model is described in the 2016 POPL paper, as well as the F* tutorial. We assume that the reader has a passing degree of familiarity with HyperHeap.

The HyperStack model

Low* refines the HyperHeap memory model, adding a distinguished set of regions that model the C call stack. Programs may use stack allocation, heap allocation or both. The HyperStack memory model offers a set of effects that capture the allocation behavior of functions.

The HyperStack memory model comprises the files FStar.Monotonic.HyperStack.fst, FStar.HyperStack.fst and FStar.HyperStack.ST.fst in the ulib directory of F*.

Note

Many verification errors point to definitions in these three files. Being familiar with these modules, their combinators and key concepts helps understand why a given program fails to verify.

Warning

We recommend always defining the ST abbreviation at the beginning of your module, in order to shadow the FStar.ST module, which is not Low*.

module ST = FStar.HyperStack.ST
module HS = FStar.HyperStack

The top-level region is the root, and is always a valid region. HS.rid is the type of regions.

let root: HS.rid = HS.root

Stack frames are modeled as distinguished regions that satisfy the is_stack_region predicate. Allocating in a stack frame, unsurprisingly, results in a stack-allocated variable or array in C. Stack frames may be de-allocated as program execution progresses up the call stack, meaning that the underlying HyperHeap region may disappear.

Regions that are not stack frames may not be de-allocated, and therefore satisfy the is_eternal_region predicate. This includes the root. Allocating in one of these regions amounts to performing a heap allocation in C.

Pushing a new stack frame amount to allocating a new stack region. In the HyperHeap model, creating a new region requires a parent. Thus, when a new stack frame is allocated, its parent is either the top-most stack frame, or the root if no stack frame has been allocated so far.

Warning

The root is not a stack region and does not satisfy is_stack_region.

let _ =
  assert (ST.is_eternal_region root /\ ~ (Monotonic.HyperStack.is_stack_region root))

The most popular effect is the Stack effect, which takes:

  • a precondition over the initial heap, of type HS.mem -> Type, and a

  • post-condition over the initial heap, the result, the final heap, of type HS.mem -> a -> HS.mem -> Type

    effect Stack (a:Type) (pre: ST.st_pre) (post: (m0: HS.mem -> Tot (ST.st_post' a (pre m0)))) =
      STATE a
        (fun (p: ST.st_post a) (h: HS.mem) ->
          pre h /\ (forall a h1. (pre h /\ post h a h1 /\ ST.equal_domains h h1) ==> p a h1))
    

The relevant bit in this otherwise mundane definition is the ST.equal_domains predicate.

let equal_domains (m0 m1: HS.mem) =
  m0.HS.tip == m1.HS.tip /\
  Set.equal (Map.domain m0.HS.h) (Map.domain m1.HS.h) /\
  ST.same_refs_in_all_regions m0 m1

The equal_domains predicate states that a function in the Stack effect:

  • preserves the tip of the memory, i.e. calling this function leaves the C call stack intact;
  • does not allocate any new region on the heap, i.e. this is a C function that does not heap-allocate;
  • does not allocate in any existing region, i.e. this is a C function that does not grow any existing stack frame on the call stack.

A function that satisfies these conditions is a function that can be safely compiled as a C function. In other words, using the native C call stack is a valid implementation of our model.

let f (x: UInt32.t): Stack UInt32.t (fun _ -> True) (fun _ _ _ -> True) =
  FStar.UInt32.( x *%^ x )

Note

The following examples use the [@ fail ] F* attribute. Remember that this tutorial is a valid F* file, which we put under continuous integration and version control. This attribute merely indicates to F* that the failure is intentional.

Based on the knowledge above, consider the following failing function.

[@ fail ]
let g (): Stack unit (fun _ -> True) (fun _ _ _ -> True) =
  let b = B.alloca 0ul 8ul in
  ()

F* reports an assertion failure for the is_stack_region predicate. Indeed, the alloca function requires that the tip be a valid stack region, which is false when no stack frame has been pushed on the call stack.

One important insight at this stage is that F* does not “automatically” enrich the verification context with the assumption that upon entering g, we have pushed a new stack frame. This would be the wrong thing to do for a total function; furthermore, there is simply no such support in the language.

Rather, the user is expected to manually indicate which operations need to conceptually happen in a new stack frame. The Low* memory model provides two combinators for this purpose: push_frame and pop_frame. The f function did not need them, because it performed no stateful operation.

We can attempt to fix g by adding a call to push_frame.

[@ fail ]
let g2 (): Stack unit (fun _ -> True) (fun _ _ _ -> True) =
  push_frame ();
  let b = B.alloca 0ul 8ul in
  ()

F* now reports an error for the equal_domains predicate above. Indeed, the only way to leave the C call stack intact, and therefore satisfy the requirements of the Stack effect, is to ensure we pop the stack frame we just pushed.

let g3 (): Stack unit (fun _ -> True) (fun _ _ _ -> True) =
  push_frame ();
  let b = B.alloca 0ul 8ul in
  pop_frame ();
  ()

g3 now successfully compiles to C:

void g3()
{
  uint32_t b[8U] = { 0U };
}

The Stack effect prevents heap allocation, hence ensuring that from the caller’s perspective, any heap (“eternal”) regions remain unchanged.

For code that performs heap allocations, the libraries offer the ST effect. It is similar to the Stack effect, and takes the same form of pre- and post-conditions, but allows heap allocation.

let g4 (): ST unit (fun _ -> True) (fun _ _ _ -> True) =
  push_frame ();
  let b = B.malloc HS.root 0ul 8ul in
  pop_frame ();
  ()

The St effect might occasionally be convenient.

effect St (a:Type) = ST a (fun _ -> True) (fun _ _ _ -> True)

One can reflect the memory as an HS.mem at any program point, by using ST.get ().

let test_st_get (): St unit =
  push_frame ();
  let m = ST.get () in
  assert (Monotonic.HyperStack.is_stack_region m.HS.tip);
  pop_frame ()

These are the basic building blocks of our memory model. Verifying on top of this memory model involves reflecting the state of the memory at the proof level, using the HS.mem type, and capturing the effect of allocations, updates and de-allocations using suitable pre- and post-conditions. This can be done using a combination of modifies clauses and libraries that reflect low-level constructs, such as buffers and machine integers, at the proof level. All of these are covered in the rest of this chapter.

Advanced: the StackInline effect

TODO

Machine integers

Machine integers are modeled as natural numbers that fit within a certain number of bits. This model is dropped by KreMLin, in favor of C’s fixed-width types.

Fixed-width integers are found in FStar.UInt{16,32,64,128}.fst and FStar.Int{16,32,64,128}. The FStar.Int.Cast.Full.fst module offers conversion functions between these integer types.

Warning

By default, KreMLin will rely on the non-standard unsigned __int128 C type to implement FStar.UInt128.t. This type is widely supported across GCC and Clang versions, but not by the Microsoft compilers. If you need 128-bit unsigned integers and portability, consider using KreMLin’s -fnouint128 flag to rely instead on an extracted version of FStar.UInt128.fst that is proven correct, but slower.

Machine integers offer the classic set of arithmetic operations. Like in C, unsigned integers have wraparound overflow semantics, exposed via the add_mod function. Signed integers offer no such function. Other undefined behaviors of C are ruled out at the F* level, such as shifting an integer by the bit width.

Note

In addition to classic arithmetic operations, some modules offer constant-time operations such as eq_mask and gte_mask, which allow defining a “secret integer” module on top of these integers, that offers no comparison operator returning a boolean, to avoid timing leaks. This is subject to change soon, with dedicated secret integer modules in the F* standard library.

Machine integers modules also define operators, suffixed with ^. For instance, the + operation for UInt32 is +^. Wraparound variants have an extra % character, such as +%^, when available.

FIXME (Jonathan Protzenko)

The unary minus is broken for machine integers. This does not parse: let x = UInt32.(-^ 0ul)

Operators follow the standard precedence rules of F*, which are outlined on its wiki. Operators are resolved in the current scope; we recommend the use of module abbreviations and the let-open notation M.( ... ).

module U32 = FStar.UInt32

let z = U32.(16ul -^ 8ul )

Note

By default, operations require that the caller prove that the result fits in the given integer width. For instance, U32.add has (requires (size (v a + v b) n)) as a precondition. The modules also offer variants such as U32.add_underspec, which can always be called, and has an implication in the post-condition (ensures (fun c -> size (v a + v b) n ==> v a + v b = v c)). The latter form is seldom used.

Machine integers can be reflected as natural numbers of type nat using the v function. It is generally more convenient to perform proofs on natural numbers.

let test_v (): unit =
  let x = 0ul in
  assert (U32.v x = 0)

The buffer library

LowStar.Buffer is the workhorse of Low*, and allows modeling C arrays on the stack and in the heap. LowStar.Buffer models C arrays as follows:

let lseq (a: Type) (l: nat) : Type =
  (s: Seq.seq a { Seq.length s == l } )

noeq
type buffer (a:Type) =
  | MkBuffer: max_length:UInt32.t
    -> content:reference (s: lseq a (U32.v max_length))
    -> idx:UInt32.t
    -> length:UInt32.t{U32.(v idx + v length <= v max_length)}
    -> buffer a

In other words, buffers are modeled as a reference to a sequence, along with a starting index idx, and a length, which captures how much of an allocation slice one is currently pointing to.

This is a model: at compilation-time, KreMLin implements buffers using C arrays.

The length is available in ghost (proof) code only: just like in C, one cannot compute the length of a buffer at run-time. Therefore, a typical pattern is to use refinements to tie together a buffer and its length, as we saw with the initial memcpy example.

let do_something (x: B.buffer UInt64.t) (l: U32.t { U32.v l = B.length x }): St unit =
  ()

Allocating a buffer on the stack is done using the alloca function, which takes an initial value and a length. alloca requires that the top of the stack be a valid stack frame.

let test_alloc_stack (): Stack unit (fun _ -> True) (fun _ _ _ -> True) =
  push_frame ();
  let b = B.alloca 0UL 8ul in
  pop_frame ();
  ()

Allocating a buffer on the heap is done using the malloc function, which takes a region, an initial value and a length. The region is purely for proof and separation purposes, and has no effect on the generated code. A buffer created with malloc can be freed with free.

let test_alloc (): St unit =
  let b = B.malloc HS.root 0UL 8ul in
  B.free b

Pointer arithmetic is performed by the means of the sub function. Under the hood, the sub function returns a buffer that points to the same underlying reference, but has different idx and length fields.

let test_sub (): St unit =
  let b = B.malloc HS.root 0UL 8ul in
  let b_l = B.sub b 0ul 4ul in // idx = 0; length = 4
  let b_r = B.sub b 4ul 4ul in // idx = 4; length = 4
  B.free b

Just like in C, one can only free the base pointer, i.e. this is an error:

[@ fail ]
let test_sub_error (): St unit =
  let b = B.malloc HS.root 0UL 8ul in
  let b_l = B.sub b 0ul 4ul in // idx = 0; length = 4
  B.free b_l

Reading and modifying a buffer is performed by means of the index and upd functions. These are exposed as the .() and .()<- operators respectively, defined in LowStar.BufferOps (the latter module only contains those operators, and is meant to be used with open to bring operators into scope without further polluting the context with any definition from LowStar.Buffer.)

let test_index (): St unit =
  let b = B.malloc HS.root 0UL 8ul in
  b.(0ul) <- UInt64.add_mod b.(0ul) b.(0ul);
  B.free b

Buffers are reflected at the proof level using sequences, via the as_seq function, which returns the contents of a given buffer in a given heap, i.e. a sequence slice ranging over the interval [idx; idx + length).

let test_as_seq (): St unit =
  let b = B.malloc HS.root 0UL 1ul in
  let h = ST.get () in
  assert (Seq.equal (B.as_seq h b) (Seq.cons 0UL Seq.createEmpty));
  B.free b

B.get is an often-convenient shorthand to index the value of a given buffer in a given heap.

let test_get (): St unit =
  let b = B.malloc HS.root 0UL 1ul in
  let h = ST.get () in
  assert (B.get h b 0 = 0UL);
  B.free b

The modifies clauses library

A modifies clause is a part of the postcondition of a Low* effectful function which describes which memory locations are modified by that function. For instance, consider the following function:

module M = LowStar.Modifies

let example_modifies_callee (b1 b2: B.buffer UInt32.t) : Stack unit
  (requires (fun h -> B.live h b1 /\ B.live h b2 /\ B.length b1 == 1 /\ B.length b2 == 1 /\ B.disjoint b1 b2))
  (ensures (fun h _ h' ->
    M.modifies (M.loc_union (M.loc_buffer b1) (M.loc_buffer b2)) h h' /\
    B.live h' b1 /\ B.live h' b2 /\
    B.get h' b1 0 == 18ul /\ B.get h' b2 0 == 42ul
  ))
= b2.(0ul) <- 42ul;
  b1.(0ul) <- 18ul

The pre- and post-conditions of the example_modifies_callee function state that, if b1 and b2 are two disjoint live buffers of length 1, then example_modifies changes their contents to 18ul and 42ul, respectively. In itself, the modifies clause tells nothing, but it starts becoming useful when the example_modifies_callee function is called by another function:

let example_modifies_caller (b0: B.buffer UInt32.t) : Stack unit
  (requires (fun h -> B.live h b0 /\ B.length b0 == 3))
  (ensures (fun h _ h' ->
    M.modifies (M.loc_buffer b0) h h' /\
    B.live h' b0 /\
    B.get h' b0 0 == B.get h b0 0
  ))
= let b1 = B.sub b0 1ul 1ul in
  let b2 = B.sub b0 2ul 1ul in
  example_modifies_callee b1 b2;
  assert (forall h . B.get h b0 0 == B.get h (B.gsub b0 0ul 1ul) 0)

This function takes a buffer b0 of length 3, and from it, extracts two disjoint buffers, b1 and b2, as the sub-buffers of b0 of length 1 at offsets 1 and 2, respectively. Since they are both live and disjoint, they can then be passed to example_modifies_callee. Then, the post-condition of example_modifies_caller about the contents of the cell of b0 at offset 0 is due to the fact that that cell of b0 is disjoint from both b1 and b2 (because it is the cell of the sub-buffer of b0 at offset 0, as suggested by the assert), and so, by virtue of the modifies clause of example_modifies_callee, its value is preserved.

The Low* system libraries

KreMLin offers primitive support for a variety of C concepts.

C standard library

The C.fst module, found in the kremlib directory, exposes a hodgepodge of C functions.

From the C header <stdlib.h>:

  • rand and srand; note that this makes the assumption that sizeof int == 4, which is true of most modern platforms
  • exit, which is not polymorphic, per the reasons exposed earlier in F* のサブセットとしての Low*, see C.Failure below instead
  • the stdout, stderr and fflush functions
  • the intptr_t C11 type, along with a default value nullptr – useful to expose external APIs that take void *
  • the EXIT_SUCCESS and EXIT_FAILURE macros; these are defined as an inductive; since C.fst is not extracted, the code is left with references that resolve to the underlying C macros

From the C header <time.h>:

  • clock_t and clock – the post-condition is not very good and needs to be fixed

Other:

  • a variety of endian-ness conversion macros that are implemented using host-specific instructions to maximize efficiency
  • a C.char type, which is interconvertible with uint8 through a variety of coercions; the C standard states that char is the only type that is not equal to its signed or unsigned variants, meaning that we can neither expose type char = UInt8.t or type char = Int8.t.

Test helpers

The TestLib.fsti module requires you to call KreMLin with -add-include '"testlib.h"' and testlib.c as extra arguments. It provides a couple helper functions, including print_clock_diff to deal with clock_t above.

C string literals

The C.String.fst module exposes a bare-bones model of C string literals, i.e. const char \*s = "my string literal";. This relies on a syntactic check that the argument to of_literal is a constant string literal in the original F* source. Such strings are zero-terminated, and their length can be computed (TODO). They can be printed on the standard output via C.String.print.

From the F* typing perspective, these strings are values that have an eternal lifetime and are immutable. This corresponds exactly to the semantics of a C string literal placed in the RODATA section of an ELF executable.

Operations such as mutation or concatenation cannot be implemented on C.String.t. The former is not possible given the value semantics of C.String.t, and the latter would require allocations that would never be freed. Instead, one should allocate a C character array and blit string literals into it to achieve concatenation, hence properly tracking mutation and the underlying allocation. See test/Server.fst for an incomplete, work-in-progress model of dealing with mutable string buffers.

Note

Writing a string literal directly in the F* source code will confusingly also extract it to a C string literal. This is not Low*, because such a string has type Prims.string, whose operations, such (^) (concatenation) cannot be implemented in Low*.

KreMLin offers support for Prims.string via the -add-include '"kremstr.h"' and kremstr.c options. These implement Prims.string as zero-terminated char *’s (not const). Operations such as (^) perform allocations on the heap and never free them, since Prims.strings are values that do not have a lifetime in the first place. This is a sound implementation, but should only be used to facilitate porting existing F* programs to Low*. Any program that uses Prims.string will leak memory.

C NULL pointers

LowStar.Buffer also exposes a model of the C NULL pointer, null – this is what you should use if you need zero-length buffers. The NULL pointer is always live, and always has length 0. The pointer and pointer_or_null functions define convenient aliases, while the (!*) operator (defined in LowStar.BufferOps) guarantees that the dereference will be pretty-printed with a * C dereference, as opposed to an access at array index 0. Pointers can always be tested for nullity via the is_null p function, which is guaranteed to be pretty-printed as p != NULL.

A polymorphic exit

The C.Failure.fst exposes a single failwith function that properly has a polymorphic return type. This uses a recursion hack in combination with KreMLin’s monomorphization, and will require you to disable compiler warnings for infinite recursion.