From 9f0759631ade18090cdd7265d9948b3ab8ab4d5a Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 13 Feb 2023 17:15:56 +0000 Subject: [PATCH 1/3] Apply make format Signed-off-by: Steven Woods --- lib/xapi-stdext-encodings/encodings.ml | 163 +++-- lib/xapi-stdext-encodings/encodings.mli | 71 +- lib/xapi-stdext-encodings/test.ml | 720 +++++++++++-------- lib/xapi-stdext-pervasives/pervasiveext.ml | 3 +- lib/xapi-stdext-pervasives/pervasiveext.mli | 1 + lib/xapi-stdext-std/listext.mli | 10 +- lib/xapi-stdext-std/listext_test.ml | 2 +- lib/xapi-stdext-std/xstringext.ml | 3 +- lib/xapi-stdext-std/xstringext.mli | 2 +- lib/xapi-stdext-std/xstringext_test.ml | 5 +- lib/xapi-stdext-threads/dune | 1 + lib/xapi-stdext-threads/semaphore.ml | 55 +- lib/xapi-stdext-threads/semaphore.mli | 12 +- lib/xapi-stdext-threads/threadext.ml | 121 ++-- lib/xapi-stdext-threads/threadext.mli | 12 +- lib/xapi-stdext-unix/dune | 1 + lib/xapi-stdext-unix/unixext.ml | 755 +++++++++++--------- lib/xapi-stdext-unix/unixext.mli | 167 +++-- lib/xapi-stdext-zerocheck/zerocheck.mli | 2 +- xapi-stdext.opam | 1 + 20 files changed, 1231 insertions(+), 876 deletions(-) diff --git a/lib/xapi-stdext-encodings/encodings.ml b/lib/xapi-stdext-encodings/encodings.ml index fe4c6526..1f955215 100644 --- a/lib/xapi-stdext-encodings/encodings.ml +++ b/lib/xapi-stdext-encodings/encodings.ml @@ -12,75 +12,76 @@ * GNU Lesser General Public License for more details. *) exception UCS_value_out_of_range + exception UCS_value_prohibited_in_UTF8 + exception UCS_value_prohibited_in_XML + exception UTF8_character_incomplete + exception UTF8_header_byte_invalid + exception UTF8_continuation_byte_invalid + exception UTF8_encoding_not_canonical + exception String_incomplete (* === Utility Functions === *) let ( +++ ) = Int32.add + let ( --- ) = Int32.sub + let ( &&& ) = Int32.logand + let ( ||| ) = Int32.logor + let ( <<< ) = Int32.shift_left + let ( >>> ) = Int32.shift_right_logical (* === Unicode Functions === *) module UCS = struct - let min_value = 0x000000l - let max_value = 0x1fffffl - let is_non_character value = false - || (0xfdd0l <= value && value <= 0xfdefl) (* case 1 *) - || (Int32.logand 0xfffel value = 0xfffel) (* case 2 *) + let max_value = 0x1fffffl - let is_out_of_range value = - value < min_value || value > max_value + let is_non_character value = + false + || (0xfdd0l <= value && value <= 0xfdefl) (* case 1 *) + || Int32.logand 0xfffel value = 0xfffel + (* case 2 *) - let is_surrogate value = - (0xd800l <= value && value <= 0xdfffl) + let is_out_of_range value = value < min_value || value > max_value + let is_surrogate value = 0xd800l <= value && value <= 0xdfffl end module XML = struct - - let is_forbidden_control_character value = value < 0x20l - && value <> 0x09l - && value <> 0x0al - && value <> 0x0dl - + let is_forbidden_control_character value = + value < 0x20l && value <> 0x09l && value <> 0x0al && value <> 0x0dl end (* === UCS Validators === *) module type UCS_VALIDATOR = sig - val validate : int32 -> unit - end module UTF8_UCS_validator : UCS_VALIDATOR = struct - let validate value = - if UCS.is_out_of_range value then raise UCS_value_out_of_range; - if UCS.is_non_character value then raise UCS_value_prohibited_in_UTF8; - if UCS.is_surrogate value then raise UCS_value_prohibited_in_UTF8 - + if UCS.is_out_of_range value then raise UCS_value_out_of_range ; + if UCS.is_non_character value then raise UCS_value_prohibited_in_UTF8 ; + if UCS.is_surrogate value then raise UCS_value_prohibited_in_UTF8 end module XML_UTF8_UCS_validator : UCS_VALIDATOR = struct - let validate value = - UTF8_UCS_validator.validate value; - if XML.is_forbidden_control_character value - then raise UCS_value_prohibited_in_XML - + UTF8_UCS_validator.validate value ; + if XML.is_forbidden_control_character value then + raise UCS_value_prohibited_in_XML end (* ==== Character Codecs ==== *) @@ -95,107 +96,129 @@ end module UTF8_CODEC (UCS_validator : UCS_VALIDATOR) = struct let width_required_for_ucs_value value = - if value < 0x000080l (* 1 lsl 7 *) then 1 else - if value < 0x000800l (* 1 lsl 11 *) then 2 else - if value < 0x010000l (* 1 lsl 16 *) then 3 else 4 + if value < 0x000080l (* 1 lsl 7 *) then + 1 + else if value < 0x000800l (* 1 lsl 11 *) then + 2 + else if value < 0x010000l (* 1 lsl 16 *) then + 3 + else + 4 (* === Decoding === *) let decode_header_byte byte = - if byte land 0b10000000 = 0b00000000 then (byte , 1) else - if byte land 0b11100000 = 0b11000000 then (byte land 0b0011111, 2) else - if byte land 0b11110000 = 0b11100000 then (byte land 0b0001111, 3) else - if byte land 0b11111000 = 0b11110000 then (byte land 0b0000111, 4) else + if byte land 0b10000000 = 0b00000000 then + (byte, 1) + else if byte land 0b11100000 = 0b11000000 then + (byte land 0b0011111, 2) + else if byte land 0b11110000 = 0b11100000 then + (byte land 0b0001111, 3) + else if byte land 0b11111000 = 0b11110000 then + (byte land 0b0000111, 4) + else raise UTF8_header_byte_invalid let decode_continuation_byte byte = - if byte land 0b11000000 = 0b10000000 then byte land 0b00111111 else + if byte land 0b11000000 = 0b10000000 then + byte land 0b00111111 + else raise UTF8_continuation_byte_invalid let decode_character string index = let value, width = decode_header_byte (Char.code string.[index]) in - let value = if width = 1 then (Int32.of_int value) - else begin + let value = + if width = 1 then + Int32.of_int value + else let value = ref (Int32.of_int value) in for index = index + 1 to index + width - 1 do let chunk = decode_continuation_byte (Char.code string.[index]) in - value := (!value <<< 6) ||| (Int32.of_int chunk) - done; - if width > (width_required_for_ucs_value !value) - then raise UTF8_encoding_not_canonical; + value := !value <<< 6 ||| Int32.of_int chunk + done ; + if width > width_required_for_ucs_value !value then + raise UTF8_encoding_not_canonical ; !value - end in - UCS_validator.validate value; + in + UCS_validator.validate value ; (value, width) (* === Encoding === *) let encode_header_byte width value = match width with - | 1 -> value - | 2 -> value ||| 0b11000000l - | 3 -> value ||| 0b11100000l - | 4 -> value ||| 0b11110000l - | _ -> raise UCS_value_out_of_range + | 1 -> + value + | 2 -> + value ||| 0b11000000l + | 3 -> + value ||| 0b11100000l + | 4 -> + value ||| 0b11110000l + | _ -> + raise UCS_value_out_of_range let encode_continuation_byte value = - ((value &&& 0b00111111l) ||| 0b10000000l, value >>> 6) + (value &&& 0b00111111l ||| 0b10000000l, value >>> 6) let encode_character value = - UCS_validator.validate value; + UCS_validator.validate value ; let width = width_required_for_ucs_value value in let b = Bytes.make width ' ' in (* Start by encoding the continuation bytes in reverse order. *) let rec encode_continuation_bytes remainder index = - if index = 0 then remainder else + if index = 0 then + remainder + else let byte, remainder = encode_continuation_byte remainder in - Bytes.set b index @@ Char.chr (Int32.to_int byte); - encode_continuation_bytes remainder (index - 1) in + Bytes.set b index @@ Char.chr (Int32.to_int byte) ; + encode_continuation_bytes remainder (index - 1) + in let remainder = encode_continuation_bytes value (width - 1) in (* Finish by encoding the header byte. *) let byte = encode_header_byte width remainder in - Bytes.set b 0 @@ Char.chr (Int32.to_int byte); + Bytes.set b 0 @@ Char.chr (Int32.to_int byte) ; Bytes.unsafe_to_string b - end -module UTF8_codec = UTF8_CODEC ( UTF8_UCS_validator) +module UTF8_codec = UTF8_CODEC (UTF8_UCS_validator) module XML_UTF8_codec = UTF8_CODEC (XML_UTF8_UCS_validator) (* === String Validators === *) module type STRING_VALIDATOR = sig - val is_valid : string -> bool + val validate : string -> unit - val longest_valid_prefix : string -> string + val longest_valid_prefix : string -> string end exception Validation_error of int * exn -module String_validator (Decoder : CHARACTER_DECODER) : STRING_VALIDATOR = struct - +module String_validator (Decoder : CHARACTER_DECODER) : STRING_VALIDATOR = +struct let validate string = let index = ref 0 and length = String.length string in - begin try + ( try while !index < length do let _, width = Decoder.decode_character string !index in index := !index + width - done; + done with - | Invalid_argument _ -> raise String_incomplete - | error -> raise (Validation_error (!index, error)) - end; assert (!index = length) + | Invalid_argument _ -> + raise String_incomplete + | error -> + raise (Validation_error (!index, error)) + ) ; + assert (!index = length) - let is_valid string = - try validate string; true with _ -> false + let is_valid string = try validate string ; true with _ -> false let longest_valid_prefix string = - try validate string; string + try validate string ; string with Validation_error (index, _) -> String.sub string 0 index - end -module UTF8 = String_validator ( UTF8_codec) +module UTF8 = String_validator (UTF8_codec) module UTF8_XML = String_validator (XML_UTF8_codec) diff --git a/lib/xapi-stdext-encodings/encodings.mli b/lib/xapi-stdext-encodings/encodings.mli index bc2bdd06..065f2f20 100644 --- a/lib/xapi-stdext-encodings/encodings.mli +++ b/lib/xapi-stdext-encodings/encodings.mli @@ -17,12 +17,19 @@ (** {2 Exceptions} *) exception UCS_value_out_of_range + exception UCS_value_prohibited_in_UTF8 + exception UCS_value_prohibited_in_XML + exception UTF8_character_incomplete + exception UTF8_header_byte_invalid + exception UTF8_continuation_byte_invalid + exception UTF8_encoding_not_canonical + exception String_incomplete (** {2 UCS Validators} *) @@ -42,119 +49,133 @@ module XML_UTF8_UCS_validator : UCS_VALIDATOR module UCS : sig val min_value : int32 + val max_value : int32 + val is_non_character : int32 -> bool (** Returns true if and only if the given value corresponds to a UCS * non-character. Such non-characters are forbidden for use in open * interchange of Unicode text data, and include the following: * 1. values from 0xFDD0 to 0xFDEF; and * 2. values 0xnFFFE and 0xnFFFF, where (0x0 <= n <= 0x10). * See the Unicode 5.0 Standard, section 16.7 for further details. *) - val is_non_character : int32 -> bool + val is_out_of_range : int32 -> bool (** Returns true if and only if the given value lies outside the * entire UCS range. *) - val is_out_of_range : int32 -> bool + val is_surrogate : int32 -> bool (** Returns true if and only if the given value corresponds to a UCS * surrogate code point, only for use in UTF-16 encoded strings. * See the Unicode 5.0 Standard, section 16.6 for further details. *) - val is_surrogate : int32 -> bool end -val (+++) : int32 -> int32 -> int32 -val (---) : int32 -> int32 -> int32 -val (&&&) : int32 -> int32 -> int32 -val (|||) : int32 -> int32 -> int32 -val (<<<) : int32 -> int -> int32 -val (>>>) : int32 -> int -> int32 +val ( +++ ) : int32 -> int32 -> int32 + +val ( --- ) : int32 -> int32 -> int32 + +val ( &&& ) : int32 -> int32 -> int32 + +val ( ||| ) : int32 -> int32 -> int32 + +val ( <<< ) : int32 -> int -> int32 + +val ( >>> ) : int32 -> int -> int32 module XML : sig + val is_forbidden_control_character : int32 -> bool (** Returns true if and only if the given value corresponds to * a forbidden control character as defined in section 2.2 of * the XML specification, version 1.0. *) - val is_forbidden_control_character : int32 -> bool end (** {2 Character Codecs} *) module type CHARACTER_ENCODER = sig - + val encode_character : int32 -> string (** Encodes a single character value, returning a string containing * the character. Raises an error if the character value is invalid. *) - val encode_character : int32 -> string - end module type CHARACTER_DECODER = sig + val decode_character : string -> int -> int32 * int (** Decodes a single character embedded within a string. Given a string * and an index into that string, returns a tuple (value, width) where: * value = the value of the character at the given index; and * width = the width of the character at the given index, in bytes. * Raises an appropriate error if the character is invalid. *) - val decode_character : string -> int -> int32 * int end module UTF8_CODEC (UCS_validator : UCS_VALIDATOR) : sig + val width_required_for_ucs_value : int32 -> int (** Given a valid UCS value, returns the canonical * number of bytes required to encode the value. *) - val width_required_for_ucs_value : int32 -> int (** {3 Decoding} *) + val decode_header_byte : int -> int * int (** Decodes a header byte, returning a tuple (v, w) where: * v = the (partial) value contained within the byte; and * w = the total width of the encoded character, in bytes. *) - val decode_header_byte : int -> int * int + val decode_continuation_byte : int -> int (** Decodes a continuation byte, returning the * 6-bit-wide value contained within the byte. *) - val decode_continuation_byte : int -> int + val decode_character : string -> int -> int32 * int (** Decodes a single character embedded within a string. Given a string * and an index into that string, returns a tuple (value, width) where: * value = the value of the character at the given index; and * width = the width of the character at the given index, in bytes. * Raises an appropriate error if the character is invalid. *) - val decode_character : string -> int -> int32 * int (** {3 Encoding} *) + val encode_header_byte : int -> int32 -> int32 (** Encodes a header byte for the given parameters, where: * width = the total width of the encoded character, in bytes; * value = the most significant bits of the original UCS value. *) - val encode_header_byte : int -> int32 -> int32 + val encode_continuation_byte : int32 -> int32 * int32 (** Encodes a continuation byte from the given UCS * remainder value, returning a tuple (b, r), where: * b = the continuation byte; * r = a new UCS remainder value. *) - val encode_continuation_byte : int32 -> int32 * int32 + val encode_character : int32 -> string (** Encodes a single character value, returning a string containing * the character. Raises an error if the character value is invalid. *) - val encode_character : int32 -> string end module UTF8_codec : sig val width_required_for_ucs_value : int32 -> int + val decode_header_byte : int -> int * int + val decode_continuation_byte : int -> int + val decode_character : string -> int -> int32 * int val encode_header_byte : int -> int32 -> int32 + val encode_continuation_byte : int32 -> int32 * int32 + val encode_character : int32 -> string end module XML_UTF8_codec : sig val width_required_for_ucs_value : int32 -> int + val decode_header_byte : int -> int * int + val decode_continuation_byte : int -> int + val decode_character : string -> int -> int32 * int val encode_header_byte : int -> int32 -> int32 + val encode_continuation_byte : int32 -> int32 * int32 + val encode_character : int32 -> string end @@ -163,16 +184,14 @@ end (** Provides functionality for validating and processing * strings according to a particular character encoding. *) module type STRING_VALIDATOR = sig - - (** Returns true if and only if the given string is validly-encoded. *) val is_valid : string -> bool + (** Returns true if and only if the given string is validly-encoded. *) + val validate : string -> unit (** Raises an encoding error if the given string is not validly-encoded. *) - val validate: string -> unit - (** Returns the longest validly-encoded prefix of the given string. *) val longest_valid_prefix : string -> string - + (** Returns the longest validly-encoded prefix of the given string. *) end module String_validator (Decoder : CHARACTER_DECODER) : STRING_VALIDATOR diff --git a/lib/xapi-stdext-encodings/test.ml b/lib/xapi-stdext-encodings/test.ml index 56bcaaac..b1d0f0cc 100644 --- a/lib/xapi-stdext-encodings/test.ml +++ b/lib/xapi-stdext-encodings/test.ml @@ -12,8 +12,9 @@ * GNU Lesser General Public License for more details. *) module E = Xapi_stdext_encodings.Encodings + (* Pull in the infix operators from Encodings used in this test *) -let (---), (+++), (<<<) = E.( (---), (+++), (<<<) ) +let ( --- ), ( +++ ), ( <<< ) = E.(( --- ), ( +++ ), ( <<< )) (* === Mock exceptions ==================================================== *) @@ -23,7 +24,9 @@ exception Decode_error (* === Mock types ===========================================================*) (** Generates mock character widths, in bytes. *) -module type WIDTH_GENERATOR = sig val next : unit -> int end +module type WIDTH_GENERATOR = sig + val next : unit -> int +end (* === Mock UCS validators ================================================= *) @@ -36,7 +39,6 @@ end (** A character decoder that logs every index it is called with. *) module Logged_character_decoder (W : WIDTH_GENERATOR) = struct - (** The indices already supplied to the decoder. *) let indices = ref ([] : int list) @@ -47,19 +49,25 @@ module Logged_character_decoder (W : WIDTH_GENERATOR) = struct let decode_character string index = let width = W.next () in for index = index to index + width - 1 do - ignore (string.[index]) - done; - indices := (index :: !indices); - 0l, width - + ignore string.[index] + done ; + indices := index :: !indices ; + (0l, width) end -module Logged_1_byte_character_decoder = Logged_character_decoder - (struct let next () = 1 end) -module Logged_2_byte_character_decoder = Logged_character_decoder - (struct let next () = 2 end) -module Logged_n_byte_character_decoder = Logged_character_decoder - (struct let last = ref 0 let next () = incr last; !last end) +module Logged_1_byte_character_decoder = Logged_character_decoder (struct + let next () = 1 +end) + +module Logged_2_byte_character_decoder = Logged_character_decoder (struct + let next () = 2 +end) + +module Logged_n_byte_character_decoder = Logged_character_decoder (struct + let last = ref 0 + + let next () = incr last ; !last +end) (** A decoder that succeeds for all characters. *) module Universal_character_decoder = struct @@ -83,244 +91,307 @@ module Lenient_UTF8_codec = E.UTF8_CODEC (Lenient_UCS_validator) (* === Mock string validators ============================================== *) -module Logged_1_byte_character_string_validator = E.String_validator - (Logged_1_byte_character_decoder) -module Logged_2_byte_character_string_validator = E.String_validator - (Logged_2_byte_character_decoder) -module Logged_n_byte_character_string_validator = E.String_validator - (Logged_n_byte_character_decoder) +module Logged_1_byte_character_string_validator = + E.String_validator (Logged_1_byte_character_decoder) +module Logged_2_byte_character_string_validator = + E.String_validator (Logged_2_byte_character_decoder) +module Logged_n_byte_character_string_validator = + E.String_validator (Logged_n_byte_character_decoder) (** A validator that accepts all strings. *) -module Universal_string_validator = E.String_validator - (Universal_character_decoder) +module Universal_string_validator = + E.String_validator (Universal_character_decoder) (** A validator that rejects all strings. *) -module Failing_string_validator = E.String_validator - (Failing_character_decoder) +module Failing_string_validator = E.String_validator (Failing_character_decoder) (** A validator that rejects strings containing the character 'F'. *) -module Selective_string_validator = E.String_validator - (Selective_character_decoder) +module Selective_string_validator = + E.String_validator (Selective_character_decoder) (* === Test helpers ======================================================== *) let assert_true = Alcotest.(check bool) "true" true + let assert_false = Alcotest.(check bool) "false" false + let check_indices = Alcotest.(check (list int)) "indices" let assert_raises_match exception_match fn = try - fn (); + fn () ; Alcotest.fail "assert_raises_match: failure expected" with failure -> - if not (exception_match failure) - then raise failure - else () + if not (exception_match failure) then + raise failure + else + () (* === Tests =============================================================== *) module String_validator = struct - let test_is_valid () = - assert_true (Universal_string_validator.is_valid "" ); - assert_true (Universal_string_validator.is_valid "123456789"); - assert_true (Selective_string_validator.is_valid "" ); - assert_true (Selective_string_validator.is_valid "123456789"); - assert_false (Selective_string_validator.is_valid "F23456789"); - assert_false (Selective_string_validator.is_valid "1234F6789"); - assert_false (Selective_string_validator.is_valid "12345678F"); + assert_true (Universal_string_validator.is_valid "") ; + assert_true (Universal_string_validator.is_valid "123456789") ; + assert_true (Selective_string_validator.is_valid "") ; + assert_true (Selective_string_validator.is_valid "123456789") ; + assert_false (Selective_string_validator.is_valid "F23456789") ; + assert_false (Selective_string_validator.is_valid "1234F6789") ; + assert_false (Selective_string_validator.is_valid "12345678F") ; assert_false (Selective_string_validator.is_valid "FFFFFFFFF") let test_longest_valid_prefix () = - Alcotest.(check string) "prefix" (Universal_string_validator.longest_valid_prefix "" ) "" ; - Alcotest.(check string) "prefix" (Universal_string_validator.longest_valid_prefix "123456789") "123456789"; - Alcotest.(check string) "prefix" (Selective_string_validator.longest_valid_prefix "" ) "" ; - Alcotest.(check string) "prefix" (Selective_string_validator.longest_valid_prefix "123456789") "123456789"; - Alcotest.(check string) "prefix" (Selective_string_validator.longest_valid_prefix "F23456789") "" ; - Alcotest.(check string) "prefix" (Selective_string_validator.longest_valid_prefix "1234F6789") "1234" ; - Alcotest.(check string) "prefix" (Selective_string_validator.longest_valid_prefix "12345678F") "12345678" ; - Alcotest.(check string) "prefix" (Selective_string_validator.longest_valid_prefix "FFFFFFFFF") "" + Alcotest.(check string) + "prefix" + (Universal_string_validator.longest_valid_prefix "") + "" ; + Alcotest.(check string) + "prefix" + (Universal_string_validator.longest_valid_prefix "123456789") + "123456789" ; + Alcotest.(check string) + "prefix" + (Selective_string_validator.longest_valid_prefix "") + "" ; + Alcotest.(check string) + "prefix" + (Selective_string_validator.longest_valid_prefix "123456789") + "123456789" ; + Alcotest.(check string) + "prefix" + (Selective_string_validator.longest_valid_prefix "F23456789") + "" ; + Alcotest.(check string) + "prefix" + (Selective_string_validator.longest_valid_prefix "1234F6789") + "1234" ; + Alcotest.(check string) + "prefix" + (Selective_string_validator.longest_valid_prefix "12345678F") + "12345678" ; + Alcotest.(check string) + "prefix" + (Selective_string_validator.longest_valid_prefix "FFFFFFFFF") + "" let test_validate_with_1_byte_characters () = - Logged_1_byte_character_decoder.reset (); - Logged_1_byte_character_string_validator.validate "0123456789"; - Alcotest.(check (list int)) "indices" !Logged_1_byte_character_decoder.indices [9;8;7;6;5;4;3;2;1;0] + Logged_1_byte_character_decoder.reset () ; + Logged_1_byte_character_string_validator.validate "0123456789" ; + Alcotest.(check (list int)) + "indices" + !Logged_1_byte_character_decoder.indices + [9; 8; 7; 6; 5; 4; 3; 2; 1; 0] let test_validate_with_2_byte_characters () = - Logged_2_byte_character_decoder.reset (); - Logged_2_byte_character_string_validator.validate "0123456789"; - Alcotest.(check (list int)) "indices" !Logged_2_byte_character_decoder.indices [8;6;4;2;0] + Logged_2_byte_character_decoder.reset () ; + Logged_2_byte_character_string_validator.validate "0123456789" ; + Alcotest.(check (list int)) + "indices" + !Logged_2_byte_character_decoder.indices + [8; 6; 4; 2; 0] let test_validate_with_n_byte_characters () = - Logged_n_byte_character_decoder.reset (); - Logged_n_byte_character_string_validator.validate "0123456789"; - check_indices !Logged_n_byte_character_decoder.indices [6;3;1;0] + Logged_n_byte_character_decoder.reset () ; + Logged_n_byte_character_string_validator.validate "0123456789" ; + check_indices !Logged_n_byte_character_decoder.indices [6; 3; 1; 0] (** Tests that validation does not fail for an empty string. *) let test_validate_with_empty_string () = - Logged_1_byte_character_decoder.reset (); - Logged_1_byte_character_string_validator.validate ""; - check_indices !Logged_1_byte_character_decoder.indices [] + Logged_1_byte_character_decoder.reset () ; + Logged_1_byte_character_string_validator.validate "" ; + check_indices !Logged_1_byte_character_decoder.indices [] let test_validate_with_incomplete_string () = - Logged_2_byte_character_decoder.reset (); - Alcotest.check_raises - "Validation fails correctly for an incomplete string" - E.String_incomplete - (fun () -> Logged_2_byte_character_string_validator.validate "0") + Logged_2_byte_character_decoder.reset () ; + Alcotest.check_raises "Validation fails correctly for an incomplete string" + E.String_incomplete (fun () -> + Logged_2_byte_character_string_validator.validate "0" + ) let test_validate_with_failing_decoders () = - Failing_string_validator.validate ""; - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "F"); - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "F12345678"); - assert_raises_match - (function E.Validation_error (4, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "0123F5678"); - assert_raises_match - (function E.Validation_error (8, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "01234567F"); - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "FFFFFFFFF") + Failing_string_validator.validate "" ; + assert_raises_match + (function E.Validation_error (0, Decode_error) -> true | _ -> false) + (fun () -> Selective_string_validator.validate "F") ; + assert_raises_match + (function E.Validation_error (0, Decode_error) -> true | _ -> false) + (fun () -> Selective_string_validator.validate "F12345678") ; + assert_raises_match + (function E.Validation_error (4, Decode_error) -> true | _ -> false) + (fun () -> Selective_string_validator.validate "0123F5678") ; + assert_raises_match + (function E.Validation_error (8, Decode_error) -> true | _ -> false) + (fun () -> Selective_string_validator.validate "01234567F") ; + assert_raises_match + (function E.Validation_error (0, Decode_error) -> true | _ -> false) + (fun () -> Selective_string_validator.validate "FFFFFFFFF") let tests = - [ "test_is_valid", `Quick, test_is_valid - ; "test_longest_valid_prefix", `Quick, test_longest_valid_prefix - ; "test_validate_with_1_byte_characters", `Quick, test_validate_with_1_byte_characters - ; "test_validate_with_2_byte_characters", `Quick, test_validate_with_2_byte_characters - ; "test_validate_with_n_byte_characters", `Quick, test_validate_with_n_byte_characters - ; "test_validate_with_empty_string", `Quick, test_validate_with_empty_string - ; "test_validate_with_incomplete_string", `Quick, test_validate_with_incomplete_string - ; "test_validate_with_failing_decoders", `Quick, test_validate_with_failing_decoders + [ + ("test_is_valid", `Quick, test_is_valid) + ; ("test_longest_valid_prefix", `Quick, test_longest_valid_prefix) + ; ( "test_validate_with_1_byte_characters" + , `Quick + , test_validate_with_1_byte_characters + ) + ; ( "test_validate_with_2_byte_characters" + , `Quick + , test_validate_with_2_byte_characters + ) + ; ( "test_validate_with_n_byte_characters" + , `Quick + , test_validate_with_n_byte_characters + ) + ; ( "test_validate_with_empty_string" + , `Quick + , test_validate_with_empty_string + ) + ; ( "test_validate_with_incomplete_string" + , `Quick + , test_validate_with_incomplete_string + ) + ; ( "test_validate_with_failing_decoders" + , `Quick + , test_validate_with_failing_decoders + ) ] - end -module UCS = struct include E.UCS +module UCS = struct + include E.UCS (** A list of UCS non-characters values, including: a. non-characters within the basic multilingual plane; b. non-characters at the end of the basic multilingual plane; c. non-characters at the end of the private use area. *) - let non_characters = [ - 0x00fdd0l; 0x00fdefl; (* case a. *) - 0x00fffel; 0x00ffffl; (* case b. *) - 0x1ffffel; 0x1fffffl; (* case c. *) - ] + let non_characters = + [ + 0x00fdd0l + ; 0x00fdefl + ; (* case a. *) + 0x00fffel + ; 0x00ffffl + ; (* case b. *) + 0x1ffffel + ; 0x1fffffl + (* case c. *) + ] (** A list of UCS character values located immediately before or after UCS non-character values, including: a. non-characters within the basic multilingual plane; b. non-characters at the end of the basic multilingual plane; c. non-characters at the end of the private use area. *) - let valid_characters_next_to_non_characters = [ - 0x00fdcfl; 0x00fdf0l; (* case a. *) - 0x00fffdl; 0x010000l; (* case b. *) - 0x1ffffdl; 0x200000l; (* case c. *) - ] + let valid_characters_next_to_non_characters = + [ + 0x00fdcfl + ; 0x00fdf0l + ; (* case a. *) + 0x00fffdl + ; 0x010000l + ; (* case b. *) + 0x1ffffdl + ; 0x200000l + (* case c. *) + ] let test_is_non_character () = - List.iter (fun value -> assert_true (is_non_character (value))) - non_characters; - List.iter (fun value -> assert_false (is_non_character (value))) - valid_characters_next_to_non_characters + List.iter (fun value -> assert_true (is_non_character value)) non_characters ; + List.iter + (fun value -> assert_false (is_non_character value)) + valid_characters_next_to_non_characters let test_is_out_of_range () = - assert_true (is_out_of_range (min_value --- 1l)); - assert_false (is_out_of_range (min_value)); - assert_false (is_out_of_range (max_value)); - assert_true (is_out_of_range (max_value +++ 1l)) + assert_true (is_out_of_range (min_value --- 1l)) ; + assert_false (is_out_of_range min_value) ; + assert_false (is_out_of_range max_value) ; + assert_true (is_out_of_range (max_value +++ 1l)) let test_is_surrogate () = - assert_false (is_surrogate (0xd7ffl)); - assert_true (is_surrogate (0xd800l)); - assert_true (is_surrogate (0xdfffl)); - assert_false (is_surrogate (0xe000l)) + assert_false (is_surrogate 0xd7ffl) ; + assert_true (is_surrogate 0xd800l) ; + assert_true (is_surrogate 0xdfffl) ; + assert_false (is_surrogate 0xe000l) let tests = - [ "test_is_non_character", `Quick, test_is_non_character - ; "test_is_out_of_range", `Quick, test_is_out_of_range - ; "test_is_surrogate", `Quick, test_is_surrogate + [ + ("test_is_non_character", `Quick, test_is_non_character) + ; ("test_is_out_of_range", `Quick, test_is_out_of_range) + ; ("test_is_surrogate", `Quick, test_is_surrogate) ] - end -module XML = struct include E.XML +module XML = struct + include E.XML let test_is_forbidden_control_character () = - assert_true (is_forbidden_control_character (0x00l)); - assert_true (is_forbidden_control_character (0x19l)); - assert_false (is_forbidden_control_character (0x09l)); - assert_false (is_forbidden_control_character (0x0al)); - assert_false (is_forbidden_control_character (0x0dl)); - assert_false (is_forbidden_control_character (0x20l)) + assert_true (is_forbidden_control_character 0x00l) ; + assert_true (is_forbidden_control_character 0x19l) ; + assert_false (is_forbidden_control_character 0x09l) ; + assert_false (is_forbidden_control_character 0x0al) ; + assert_false (is_forbidden_control_character 0x0dl) ; + assert_false (is_forbidden_control_character 0x20l) let tests = - [ "test_is_forbidden_control_character", `Quick, test_is_forbidden_control_character - ] - + [ + ( "test_is_forbidden_control_character" + , `Quick + , test_is_forbidden_control_character + ) + ] end -module UTF8_UCS_validator = struct include E.UTF8_UCS_validator +module UTF8_UCS_validator = struct + include E.UTF8_UCS_validator let test_validate () = - let value = ref (UCS.min_value --- 1l) in - while !value <= (UCS.max_value +++ 1l) do - if UCS.is_out_of_range !value - then Alcotest.check_raises "should fail" - E.UCS_value_out_of_range - (fun () -> validate !value) - else - if UCS.is_non_character !value - || UCS.is_surrogate !value - then Alcotest.check_raises "should fail" - E.UCS_value_prohibited_in_UTF8 - (fun () -> validate !value) - else - validate !value; - value := !value +++ 1l - done - - let tests = - [ "test_vaidate", `Quick, test_validate - ] - + let value = ref (UCS.min_value --- 1l) in + while !value <= UCS.max_value +++ 1l do + if UCS.is_out_of_range !value then + Alcotest.check_raises "should fail" E.UCS_value_out_of_range (fun () -> + validate !value + ) + else if UCS.is_non_character !value || UCS.is_surrogate !value then + Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_UTF8 + (fun () -> validate !value + ) + else + validate !value ; + value := !value +++ 1l + done + + let tests = [("test_vaidate", `Quick, test_validate)] end (** Tests the XML-specific UTF-8 UCS validation function. *) -module XML_UTF8_UCS_validator = struct include E.XML_UTF8_UCS_validator +module XML_UTF8_UCS_validator = struct + include E.XML_UTF8_UCS_validator let test_validate () = - let value = ref (UCS.min_value --- 1l) in - while !value <= (UCS.max_value +++ 1l) do - if UCS.is_out_of_range !value - then Alcotest.check_raises "should fail" E.UCS_value_out_of_range - (fun () -> validate !value) - else - if UCS.is_non_character !value - || UCS.is_surrogate !value - then Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_UTF8 - (fun () -> validate !value) - else - if XML.is_forbidden_control_character !value - then Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_XML - (fun () -> validate !value) - else - validate !value; - value := !value +++ 1l - done - - let tests = - [ "test_validate", `Quick, test_validate - ] - + let value = ref (UCS.min_value --- 1l) in + while !value <= UCS.max_value +++ 1l do + if UCS.is_out_of_range !value then + Alcotest.check_raises "should fail" E.UCS_value_out_of_range (fun () -> + validate !value + ) + else if UCS.is_non_character !value || UCS.is_surrogate !value then + Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_UTF8 + (fun () -> validate !value + ) + else if XML.is_forbidden_control_character !value then + Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_XML + (fun () -> validate !value + ) + else + validate !value ; + value := !value +++ 1l + done + + let tests = [("test_validate", `Quick, test_validate)] end -module UTF8_codec = struct include E.UTF8_codec +module UTF8_codec = struct + include E.UTF8_codec (** A list of canonical encoding widths of UCS values, represented by tuples of the form (v, w), where: @@ -328,17 +399,25 @@ module UTF8_codec = struct include E.UTF8_codec w = the width of the encoded character, in bytes. *) let valid_ucs_value_widths = [ - (1l , 1); ((1l <<< 7) --- 1l, 1); - (1l <<< 7, 2); ((1l <<< 11) --- 1l, 2); - (1l <<< 11, 3); ((1l <<< 16) --- 1l, 3); - (1l <<< 16, 4); ((1l <<< 21) --- 1l, 4); + (1l, 1) + ; ((1l <<< 7) --- 1l, 1) + ; (1l <<< 7, 2) + ; ((1l <<< 11) --- 1l, 2) + ; (1l <<< 11, 3) + ; ((1l <<< 16) --- 1l, 3) + ; (1l <<< 16, 4) + ; ((1l <<< 21) --- 1l, 4) ] let test_width_required_for_ucs_value () = - List.iter - (fun (value, width) -> - Alcotest.(check int) "same ints" (width_required_for_ucs_value value) width) - valid_ucs_value_widths + List.iter + (fun (value, width) -> + Alcotest.(check int) + "same ints" + (width_required_for_ucs_value value) + width + ) + valid_ucs_value_widths (** A list of valid header byte decodings, represented by tuples of the form (b, (v, w)), where: @@ -347,41 +426,48 @@ module UTF8_codec = struct include E.UTF8_codec w = the total width of the encoded character, in bytes. *) let valid_header_byte_decodings = [ - (0b00000000, (0b00000000, 1)); - (0b00000001, (0b00000001, 1)); - (0b01111111, (0b01111111, 1)); - (0b11000000, (0b00000000, 2)); - (0b11000001, (0b00000001, 2)); - (0b11011111, (0b00011111, 2)); - (0b11100000, (0b00000000, 3)); - (0b11100001, (0b00000001, 3)); - (0b11101111, (0b00001111, 3)); - (0b11110000, (0b00000000, 4)); - (0b11110001, (0b00000001, 4)); - (0b11110111, (0b00000111, 4)); + (0b00000000, (0b00000000, 1)) + ; (0b00000001, (0b00000001, 1)) + ; (0b01111111, (0b01111111, 1)) + ; (0b11000000, (0b00000000, 2)) + ; (0b11000001, (0b00000001, 2)) + ; (0b11011111, (0b00011111, 2)) + ; (0b11100000, (0b00000000, 3)) + ; (0b11100001, (0b00000001, 3)) + ; (0b11101111, (0b00001111, 3)) + ; (0b11110000, (0b00000000, 4)) + ; (0b11110001, (0b00000001, 4)) + ; (0b11110111, (0b00000111, 4)) ] (** A list of invalid header bytes that should not be decodable. *) let invalid_header_bytes = [ - 0b10000000; 0b10111111; - 0b11111000; 0b11111011; - 0b11111100; 0b11111101; - 0b11111110; 0b11111111; + 0b10000000 + ; 0b10111111 + ; 0b11111000 + ; 0b11111011 + ; 0b11111100 + ; 0b11111101 + ; 0b11111110 + ; 0b11111111 ] let test_decode_header_byte_when_valid () = - List.iter - (fun (b, (v, w)) -> - Alcotest.(check (pair int int)) "same ints" (decode_header_byte b) (v, w)) - valid_header_byte_decodings + List.iter + (fun (b, (v, w)) -> + Alcotest.(check (pair int int)) "same ints" (decode_header_byte b) (v, w) + ) + valid_header_byte_decodings let test_decode_header_byte_when_invalid () = - List.iter - (fun b -> - Alcotest.check_raises "should fail" E.UTF8_header_byte_invalid - (fun () -> decode_header_byte b |> ignore)) - invalid_header_bytes + List.iter + (fun b -> + Alcotest.check_raises "should fail" E.UTF8_header_byte_invalid + (fun () -> decode_header_byte b |> ignore + ) + ) + invalid_header_bytes (** A list of valid continuation byte decodings, represented by tuples of the form (b, v), where: @@ -389,36 +475,46 @@ module UTF8_codec = struct include E.UTF8_codec v = the partial value contained within the byte. *) let valid_continuation_byte_decodings = [ - (0b10000000, 0b00000000); - (0b10000001, 0b00000001); - (0b10111110, 0b00111110); - (0b10111111, 0b00111111); + (0b10000000, 0b00000000) + ; (0b10000001, 0b00000001) + ; (0b10111110, 0b00111110) + ; (0b10111111, 0b00111111) ] (** A list of invalid continuation bytes that should not be decodable. *) let invalid_continuation_bytes = [ - 0b00000000; 0b01111111; - 0b11000000; 0b11011111; - 0b11100000; 0b11101111; - 0b11110000; 0b11110111; - 0b11111000; 0b11111011; - 0b11111100; 0b11111101; - 0b11111111; 0b11111110; + 0b00000000 + ; 0b01111111 + ; 0b11000000 + ; 0b11011111 + ; 0b11100000 + ; 0b11101111 + ; 0b11110000 + ; 0b11110111 + ; 0b11111000 + ; 0b11111011 + ; 0b11111100 + ; 0b11111101 + ; 0b11111111 + ; 0b11111110 ] let test_decode_continuation_byte_when_valid () = - List.iter - (fun (byte, value) -> - Alcotest.(check int) "same ints" (decode_continuation_byte byte) value) - valid_continuation_byte_decodings + List.iter + (fun (byte, value) -> + Alcotest.(check int) "same ints" (decode_continuation_byte byte) value + ) + valid_continuation_byte_decodings let test_decode_continuation_byte_when_invalid () = - List.iter - (fun byte -> - Alcotest.check_raises "should fail" E.UTF8_continuation_byte_invalid - (fun () -> decode_continuation_byte byte |> ignore)) - invalid_continuation_bytes + List.iter + (fun byte -> + Alcotest.check_raises "should fail" E.UTF8_continuation_byte_invalid + (fun () -> decode_continuation_byte byte |> ignore + ) + ) + invalid_continuation_bytes (** A list of valid character decodings represented by tuples of the form (s, (v, w)), where: @@ -433,32 +529,55 @@ module UTF8_codec = struct include E.UTF8_codec v_min = the smallest UCS value encodable in b bytes. v_max = the greatest UCS value encodable in b bytes. *) - let valid_character_decodings = [ - (* 7654321 *) - (* 0b0xxxxxxx *) (* 00000000000000xxxxxxx *) - "\x00" (* 0b00000000 *), (0b000000000000000000000l, 1); - "\x7f" (* 0b01111111 *), (0b000000000000001111111l, 1); - (* 10987654321 *) - (* 0b110xxxsx 0b10xxxxxx *) (* 0000000000xxxsxxxxxxx *) - "\xc2\x80" (* 0b11000010 0b10000000 *), (0b000000000000010000000l, 2); - "\xdf\xbf" (* 0b11011111 0b10111111 *), (0b000000000011111111111l, 2); - (* 6543210987654321 *) - (* 0b1110xxxx 0b10sxxxxx 0b10xxxxxx *) (* xxxxsxxxxxxxxxxx *) - "\xe0\xa0\x80" (* 0b11100000 0b10100000 0b10000000 *), (0b000000000100000000000l, 3); - "\xef\xbf\xbf" (* 0b11101111 0b10111111 0b10111111 *), (0b000001111111111111111l, 3); - (* 109876543210987654321 *) - (* 0b11110xxx 0b10xsxxxx 0b10xxxxxx 0b10xxxxxx *) (* xxxxsxxxxxxxxxxxxxxxx *) - "\xf0\x90\x80\x80" (* 0b11110000 0b10010000 0b10000000 0b10000000 *), (0b000010000000000000000l, 4); - "\xf7\xbf\xbf\xbf" (* 0b11110111 0b10111111 0b10111111 0b10111111 *), (0b111111111111111111111l, 4); - ] + let valid_character_decodings = + [ + (* 7654321 *) + (* 0b0xxxxxxx *) + (* 00000000000000xxxxxxx *) + ( "\x00" (* 0b00000000 *) + , (0b000000000000000000000l, 1) + ) + ; ( "\x7f" (* 0b01111111 *) + , (0b000000000000001111111l, 1) + ) + ; (* 10987654321 *) + (* 0b110xxxsx 0b10xxxxxx *) + (* 0000000000xxxsxxxxxxx *) + ( "\xc2\x80" (* 0b11000010 0b10000000 *) + , (0b000000000000010000000l, 2) + ) + ; ( "\xdf\xbf" (* 0b11011111 0b10111111 *) + , (0b000000000011111111111l, 2) + ) + ; (* 6543210987654321 *) + (* 0b1110xxxx 0b10sxxxxx 0b10xxxxxx *) + (* xxxxsxxxxxxxxxxx *) + ( "\xe0\xa0\x80" (* 0b11100000 0b10100000 0b10000000 *) + , (0b000000000100000000000l, 3) + ) + ; ( "\xef\xbf\xbf" (* 0b11101111 0b10111111 0b10111111 *) + , (0b000001111111111111111l, 3) + ) + ; (* 109876543210987654321 *) + (* 0b11110xxx 0b10xsxxxx 0b10xxxxxx 0b10xxxxxx *) + (* xxxxsxxxxxxxxxxxxxxxx *) + ( "\xf0\x90\x80\x80" (* 0b11110000 0b10010000 0b10000000 0b10000000 *) + , (0b000010000000000000000l, 4) + ) + ; ( "\xf7\xbf\xbf\xbf" (* 0b11110111 0b10111111 0b10111111 0b10111111 *) + , (0b111111111111111111111l, 4) + ) + ] let test_decode_character_when_valid () = - List.iter - (fun (string, (value, width)) -> - Alcotest.(check (pair int32 int)) "same pair" - (Lenient_UTF8_codec.decode_character string 0) - (value, width)) - valid_character_decodings + List.iter + (fun (string, (value, width)) -> + Alcotest.(check (pair int32 int)) + "same pair" + (Lenient_UTF8_codec.decode_character string 0) + (value, width) + ) + valid_character_decodings (** A list of strings containing overlong character encodings. For each byte length b in [2...4], this list contains the @@ -466,17 +585,19 @@ module UTF8_codec = struct include E.UTF8_codec than the smallest UCS value validly-encodable in b bytes. *) let overlong_character_encodings = [ - "\xc1\xbf" (* 0b11000001 0b10111111 *); - "\xe0\x9f\xbf" (* 0b11100000 0b10011111 0b10111111 *); - "\xf0\x8f\xbf\xbf" (* 0b11110000 0b10001111 0b10111111 0b10111111 *); + "\xc1\xbf" (* 0b11000001 0b10111111 *) + ; "\xe0\x9f\xbf" (* 0b11100000 0b10011111 0b10111111 *) + ; "\xf0\x8f\xbf\xbf" (* 0b11110000 0b10001111 0b10111111 0b10111111 *) ] let test_decode_character_when_overlong () = - List.iter - (fun string -> - Alcotest.check_raises "should fail" E.UTF8_encoding_not_canonical - (fun () -> Lenient_UTF8_codec.decode_character string 0 |> ignore)) - overlong_character_encodings + List.iter + (fun string -> + Alcotest.check_raises "should fail" E.UTF8_encoding_not_canonical + (fun () -> Lenient_UTF8_codec.decode_character string 0 |> ignore + ) + ) + overlong_character_encodings (** Encodes a valid UCS value and then decodes it again, testing: a. that the encoded width is canonical for the given value. @@ -484,45 +605,68 @@ module UTF8_codec = struct include E.UTF8_codec let test_encode_decode_cycle_for_value value = let string = Lenient_UTF8_codec.encode_character value in let decoded_value, decoded_width = - Lenient_UTF8_codec.decode_character string 0 in + Lenient_UTF8_codec.decode_character string 0 + in let width = E.UTF8_codec.width_required_for_ucs_value value in - if (value <> decoded_value) then Alcotest.fail - (Printf.sprintf - "expected value %06lx but decoded value %06lx\n" - value decoded_value); - if (width <> decoded_width) then Alcotest.fail - (Printf.sprintf - "expected width %i but decoded width %i\n" - width decoded_width) + if value <> decoded_value then + Alcotest.fail + (Printf.sprintf "expected value %06lx but decoded value %06lx\n" value + decoded_value + ) ; + if width <> decoded_width then + Alcotest.fail + (Printf.sprintf "expected width %i but decoded width %i\n" width + decoded_width + ) let test_encode_decode_cycle () = - let value = ref UCS.min_value in - while !value <= UCS.max_value do - test_encode_decode_cycle_for_value !value; - value := Int32.add !value 1l; - done + let value = ref UCS.min_value in + while !value <= UCS.max_value do + test_encode_decode_cycle_for_value !value ; + value := Int32.add !value 1l + done let tests = - [ "test_width_required_for_ucs_value", `Quick, test_width_required_for_ucs_value - ; "test_decode_header_byte_when_valid", `Quick, test_decode_header_byte_when_valid - ; "test_decode_header_byte_when_invalid", `Quick, test_decode_header_byte_when_invalid - ; "test_decode_continuation_byte_when_valid", `Quick, test_decode_continuation_byte_when_valid - ; "test_decode_continuation_byte_when_invalid", `Quick, test_decode_continuation_byte_when_invalid - ; "test_decode_character_when_valid", `Quick, test_decode_character_when_valid - ; "test_decode_character_when_overlong", `Quick, test_decode_character_when_overlong - ; "test_encode_decode_cycle", `Quick, test_encode_decode_cycle + [ + ( "test_width_required_for_ucs_value" + , `Quick + , test_width_required_for_ucs_value + ) + ; ( "test_decode_header_byte_when_valid" + , `Quick + , test_decode_header_byte_when_valid + ) + ; ( "test_decode_header_byte_when_invalid" + , `Quick + , test_decode_header_byte_when_invalid + ) + ; ( "test_decode_continuation_byte_when_valid" + , `Quick + , test_decode_continuation_byte_when_valid + ) + ; ( "test_decode_continuation_byte_when_invalid" + , `Quick + , test_decode_continuation_byte_when_invalid + ) + ; ( "test_decode_character_when_valid" + , `Quick + , test_decode_character_when_valid + ) + ; ( "test_decode_character_when_overlong" + , `Quick + , test_decode_character_when_overlong + ) + ; ("test_encode_decode_cycle", `Quick, test_encode_decode_cycle) ] - end let () = - Alcotest.run - "Encodings" + Alcotest.run "Encodings" [ - "UCS", UCS.tests - ; "XML", XML.tests - ; "String_validator", String_validator.tests - ; "UTF8_UCS_validator", UTF8_UCS_validator.tests - ; "XML_UTF8_UCS_validator", XML_UTF8_UCS_validator.tests - ; "UTF8_codec", UTF8_codec.tests + ("UCS", UCS.tests) + ; ("XML", XML.tests) + ; ("String_validator", String_validator.tests) + ; ("UTF8_UCS_validator", UTF8_UCS_validator.tests) + ; ("XML_UTF8_UCS_validator", XML_UTF8_UCS_validator.tests) + ; ("UTF8_codec", UTF8_codec.tests) ] diff --git a/lib/xapi-stdext-pervasives/pervasiveext.ml b/lib/xapi-stdext-pervasives/pervasiveext.ml index 4840c11b..44d51b64 100644 --- a/lib/xapi-stdext-pervasives/pervasiveext.ml +++ b/lib/xapi-stdext-pervasives/pervasiveext.ml @@ -33,7 +33,8 @@ let finally fct clean_f = m "finally: Error while running cleanup after failure of main \ function: %s" - (Printexc.to_string cleanup_exn)) + (Printexc.to_string cleanup_exn) + ) ) ; raise exn in diff --git a/lib/xapi-stdext-pervasives/pervasiveext.mli b/lib/xapi-stdext-pervasives/pervasiveext.mli index d0e7fdc5..0c60fa58 100644 --- a/lib/xapi-stdext-pervasives/pervasiveext.mli +++ b/lib/xapi-stdext-pervasives/pervasiveext.mli @@ -53,5 +53,6 @@ val ignore_bool : bool -> unit val ( ++ ) : ('b -> 'c) -> ('a -> 'b) -> 'a -> 'c [@@ocaml.deprecated "Not a standard idiom. Define it locally if needed."] + val ( $ ) : ('a -> 'b) -> 'a -> 'b [@@ocaml.deprecated "Not right-associative. Replace with @@"] diff --git a/lib/xapi-stdext-std/listext.mli b/lib/xapi-stdext-std/listext.mli index e7b6092a..70e80660 100644 --- a/lib/xapi-stdext-std/listext.mli +++ b/lib/xapi-stdext-std/listext.mli @@ -148,14 +148,17 @@ module List : sig val between_tr : 'a -> 'a list -> 'a list (** Tail-recursive {!between}. *) - val randomize : 'a list -> 'a list [@@deprecated "Not used"] + val randomize : 'a list -> 'a list + [@@deprecated "Not used"] (** Generate a random permutation of the given list. *) - val distribute : 'a -> 'a list -> 'a list list [@@deprecated "Not used"] + val distribute : 'a -> 'a list -> 'a list list + [@@deprecated "Not used"] (** Distribute the given element over the given list, returning a list of lists with the new element in each position. *) - val permute : 'a list -> 'a list list [@@deprecated "Not used"] + val permute : 'a list -> 'a list list + [@@deprecated "Not used"] (** Generate all permutations of the given list. *) val inner : @@ -187,5 +190,4 @@ module List : sig val intersect : 'a list -> 'a list -> 'a list (** Returns the intersection of two lists. *) - end diff --git a/lib/xapi-stdext-std/listext_test.ml b/lib/xapi-stdext-std/listext_test.ml index 8fcedeb9..dc141f25 100644 --- a/lib/xapi-stdext-std/listext_test.ml +++ b/lib/xapi-stdext-std/listext_test.ml @@ -9,7 +9,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - *) +*) module Listext = Xapi_stdext_std.Listext.List diff --git a/lib/xapi-stdext-std/xstringext.ml b/lib/xapi-stdext-std/xstringext.ml index 4f85ba94..8f5b7130 100644 --- a/lib/xapi-stdext-std/xstringext.ml +++ b/lib/xapi-stdext-std/xstringext.ml @@ -170,7 +170,8 @@ module String = struct Bytes.blit_string s !orig_offset new_b !dest_offset len ; Bytes.blit_string t 0 new_b (!dest_offset + len) len_t ; orig_offset := !orig_offset + len + len_f ; - dest_offset := !dest_offset + len + len_t) + dest_offset := !dest_offset + len + len_t + ) indexes ; Bytes.blit_string s !orig_offset new_b !dest_offset (String.length s - !orig_offset) ; diff --git a/lib/xapi-stdext-std/xstringext.mli b/lib/xapi-stdext-std/xstringext.mli index 4c4c489d..e2587929 100644 --- a/lib/xapi-stdext-std/xstringext.mli +++ b/lib/xapi-stdext-std/xstringext.mli @@ -16,9 +16,9 @@ module String : sig val of_char : char -> string + val rev_map : (char -> char) -> string -> string (** Map a string to a string, applying the given function in reverse order. *) - val rev_map : (char -> char) -> string -> string val rev_iter : (char -> unit) -> string -> unit (** Iterate over the characters in a string in reverse order. *) diff --git a/lib/xapi-stdext-std/xstringext_test.ml b/lib/xapi-stdext-std/xstringext_test.ml index 096ed58a..7d2766cb 100644 --- a/lib/xapi-stdext-std/xstringext_test.ml +++ b/lib/xapi-stdext-std/xstringext_test.ml @@ -9,7 +9,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - *) +*) module XString = Xapi_stdext_std.Xstringext.String @@ -77,7 +77,8 @@ let test_split = , [ ('.', "...", [""; ""; "."]) ; ('.', "foo.bar.baz", ["foo"; "bar"; "baz"]) - ] ) + ] + ) ; (4, [('.', "...", [""; ""; ""; ""])]) ] in diff --git a/lib/xapi-stdext-threads/dune b/lib/xapi-stdext-threads/dune index ecf854e3..e9111bd6 100644 --- a/lib/xapi-stdext-threads/dune +++ b/lib/xapi-stdext-threads/dune @@ -2,6 +2,7 @@ (public_name xapi-stdext-threads) (name xapi_stdext_threads) (libraries + polly threads unix xapi-stdext-pervasives) diff --git a/lib/xapi-stdext-threads/semaphore.ml b/lib/xapi-stdext-threads/semaphore.ml index b1dc6707..06621049 100644 --- a/lib/xapi-stdext-threads/semaphore.ml +++ b/lib/xapi-stdext-threads/semaphore.ml @@ -12,51 +12,46 @@ * GNU Lesser General Public License for more details. *) -type t = { - mutable n : int; - m : Mutex.t; - c : Condition.t; -} +type t = {mutable n: int; m: Mutex.t; c: Condition.t} let create n = if n <= 0 then - invalid_arg (Printf.sprintf - "Semaphore value must be positive, got %d" n); - let m = Mutex.create () - and c = Condition.create () in - { n; m; c; } + invalid_arg (Printf.sprintf "Semaphore value must be positive, got %d" n) ; + let m = Mutex.create () and c = Condition.create () in + {n; m; c} exception Inconsistent_state of string -let inconsistent_state fmt = Printf.ksprintf (fun msg -> - raise (Inconsistent_state msg)) fmt + +let inconsistent_state fmt = + Printf.ksprintf (fun msg -> raise (Inconsistent_state msg)) fmt let acquire s k = if k <= 0 then - invalid_arg (Printf.sprintf - "Semaphore acquisition requires a positive value, got %d" k); - Mutex.lock s.m; + invalid_arg + (Printf.sprintf "Semaphore acquisition requires a positive value, got %d" + k + ) ; + Mutex.lock s.m ; while s.n < k do - Condition.wait s.c s.m; - done; + Condition.wait s.c s.m + done ; if not (s.n >= k) then - inconsistent_state "Semaphore value cannot be smaller than %d, got %d" k s.n; - s.n <- s.n - k; - Condition.signal s.c; + inconsistent_state "Semaphore value cannot be smaller than %d, got %d" k s.n ; + s.n <- s.n - k ; + Condition.signal s.c ; Mutex.unlock s.m let release s k = if k <= 0 then - invalid_arg (Printf.sprintf - "Semaphore release requires a positive value, got %d" k); - Mutex.lock s.m; - s.n <- s.n + k; - Condition.signal s.c; + invalid_arg + (Printf.sprintf "Semaphore release requires a positive value, got %d" k) ; + Mutex.lock s.m ; + s.n <- s.n + k ; + Condition.signal s.c ; Mutex.unlock s.m let execute_with_weight s k f = - acquire s k; - Xapi_stdext_pervasives.Pervasiveext.finally f - (fun () -> release s k) + acquire s k ; + Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> release s k) -let execute s f = - execute_with_weight s 1 f +let execute s f = execute_with_weight s 1 f diff --git a/lib/xapi-stdext-threads/semaphore.mli b/lib/xapi-stdext-threads/semaphore.mli index 0db704ce..207e6120 100644 --- a/lib/xapi-stdext-threads/semaphore.mli +++ b/lib/xapi-stdext-threads/semaphore.mli @@ -12,29 +12,29 @@ * GNU Lesser General Public License for more details. *) - type t + exception Inconsistent_state of string +val create : int -> t (** [create n] create a semaphore with initial value [n] (a positive integer). Raise {!Invalid_argument} if [n] <= 0 *) -val create : int -> t +val acquire : t -> int -> unit (** [acquire k s] block until the semaphore value is >= [k] (a positive integer), then atomically decrement the semaphore value by [k]. Raise {!Invalid_argument} if [k] <= 0 *) -val acquire : t -> int -> unit +val release : t -> int -> unit (** [release k s] atomically increment the semaphore value by [k] (a positive integer). Raise {!Invalid_argument} if [k] <= 0 *) -val release : t -> int -> unit +val execute_with_weight : t -> int -> (unit -> 'a) -> 'a (** [execute_with_weight s k f] {!acquire} the semaphore with [k], then run [f ()], and finally {!release} the semaphore with the same value [k] (even in case of failure in the execution of [f]). Return the value of [f ()] or re-raise the exception if any. *) -val execute_with_weight : t -> int -> (unit -> 'a) -> 'a -(** [execute s f] same as [{execute_with_weight} s 1 f] *) val execute : t -> (unit -> 'a) -> 'a +(** [execute s f] same as [{execute_with_weight} s 1 f] *) diff --git a/lib/xapi-stdext-threads/threadext.ml b/lib/xapi-stdext-threads/threadext.ml index a58b34c7..56025d51 100644 --- a/lib/xapi-stdext-threads/threadext.ml +++ b/lib/xapi-stdext-threads/threadext.ml @@ -17,7 +17,7 @@ module M = Mutex module Mutex = struct (** execute the function f with the mutex hold *) let execute lock f = - Mutex.lock lock; + Mutex.lock lock ; Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> Mutex.unlock lock) end @@ -26,87 +26,88 @@ end let thread_iter_all_exns f xs = let exns = ref [] in let m = M.create () in - List.iter - Thread.join + List.iter Thread.join (List.map (fun x -> - Thread.create - (fun () -> - try - f x - with e -> Mutex.execute m (fun () -> exns := (x, e) :: !exns) - ) - () - ) xs); + Thread.create + (fun () -> + try f x + with e -> Mutex.execute m (fun () -> exns := (x, e) :: !exns) + ) + () + ) + xs + ) ; !exns (** Parallel List.iter. Remembers one exception (at random) and throws it in the error case. *) -let thread_iter f xs = match thread_iter_all_exns f xs with - | [] -> () - | (_, e) :: _ -> raise e +let thread_iter f xs = + match thread_iter_all_exns f xs with [] -> () | (_, e) :: _ -> raise e module Delay = struct (* Concrete type is the ends of a pipe *) type t = { - (* A pipe is used to wake up a thread blocked in wait: *) - mutable pipe_out: Unix.file_descr option; - mutable pipe_in: Unix.file_descr option; - (* Indicates that a signal arrived before a wait: *) - mutable signalled: bool; - m: M.t + (* A pipe is used to wake up a thread blocked in wait: *) + mutable pipe_out: Unix.file_descr option + ; mutable pipe_in: Unix.file_descr option + ; (* Indicates that a signal arrived before a wait: *) + mutable signalled: bool + ; m: M.t } let make () = - { pipe_out = None; - pipe_in = None; - signalled = false; - m = M.create () } + {pipe_out= None; pipe_in= None; signalled= false; m= M.create ()} exception Pre_signalled - let wait (x: t) (seconds: float) = + let wait (x : t) (seconds : float) = let finally = Xapi_stdext_pervasives.Pervasiveext.finally in - let to_close = ref [ ] in + let to_close = ref [] in let close' fd = - if List.mem fd !to_close then Unix.close fd; - to_close := List.filter (fun x -> fd <> x) !to_close in + if List.mem fd !to_close then Unix.close fd ; + to_close := List.filter (fun x -> fd <> x) !to_close + in finally (fun () -> - try - let pipe_out = Mutex.execute x.m - (fun () -> - if x.signalled then begin - x.signalled <- false; - raise Pre_signalled; - end; - let pipe_out, pipe_in = Unix.pipe () in - (* these will be unconditionally closed on exit *) - to_close := [ pipe_out; pipe_in ]; - x.pipe_out <- Some pipe_out; - x.pipe_in <- Some pipe_in; - x.signalled <- false; - pipe_out) in - let r, _, _ = Unix.select [ pipe_out ] [] [] seconds in - (* flush the single byte from the pipe *) - if r <> [] then ignore(Unix.read pipe_out (Bytes.create 1) 0 1); - (* return true if we waited the full length of time, false if we were woken *) - r = [] - with Pre_signalled -> false + try + let pipe_out = + Mutex.execute x.m (fun () -> + if x.signalled then ( + x.signalled <- false ; + raise Pre_signalled + ) ; + let pipe_out, pipe_in = Unix.pipe () in + (* these will be unconditionally closed on exit *) + to_close := [pipe_out; pipe_in] ; + x.pipe_out <- Some pipe_out ; + x.pipe_in <- Some pipe_in ; + x.signalled <- false ; + pipe_out + ) + in + let r, _, _ = Unix.select [pipe_out] [] [] seconds in + (* flush the single byte from the pipe *) + if r <> [] then ignore (Unix.read pipe_out (Bytes.create 1) 0 1) ; + (* return true if we waited the full length of time, false if we were woken *) + r = [] + with Pre_signalled -> false ) (fun () -> - Mutex.execute x.m - (fun () -> - x.pipe_out <- None; - x.pipe_in <- None; - List.iter close' !to_close) + Mutex.execute x.m (fun () -> + x.pipe_out <- None ; + x.pipe_in <- None ; + List.iter close' !to_close + ) ) - let signal (x: t) = - Mutex.execute x.m - (fun () -> - match x.pipe_in with - | Some fd -> ignore(Unix.write fd (Bytes.of_string "X") 0 1) - | None -> x.signalled <- true (* If the wait hasn't happened yet then store up the signal *) - ) + let signal (x : t) = + Mutex.execute x.m (fun () -> + match x.pipe_in with + | Some fd -> + ignore (Unix.write fd (Bytes.of_string "X") 0 1) + | None -> + x.signalled <- true + (* If the wait hasn't happened yet then store up the signal *) + ) end diff --git a/lib/xapi-stdext-threads/threadext.mli b/lib/xapi-stdext-threads/threadext.mli index 7c154688..62bb50d8 100644 --- a/lib/xapi-stdext-threads/threadext.mli +++ b/lib/xapi-stdext-threads/threadext.mli @@ -11,17 +11,17 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) -module Mutex : -sig +module Mutex : sig val execute : Mutex.t -> (unit -> 'a) -> 'a end -val thread_iter_all_exns: ('a -> unit) -> 'a list -> ('a * exn) list -val thread_iter: ('a -> unit) -> 'a list -> unit +val thread_iter_all_exns : ('a -> unit) -> 'a list -> ('a * exn) list -module Delay : -sig +val thread_iter : ('a -> unit) -> 'a list -> unit + +module Delay : sig type t + val make : unit -> t (** Blocks the calling thread for a given period of time with the option of returning early if someone calls 'signal'. Returns true if the full time diff --git a/lib/xapi-stdext-unix/dune b/lib/xapi-stdext-unix/dune index 9cfcbb96..2aff2d32 100644 --- a/lib/xapi-stdext-unix/dune +++ b/lib/xapi-stdext-unix/dune @@ -2,6 +2,7 @@ (name xapi_stdext_unix) (public_name xapi-stdext-unix) (libraries + polly fd-send-recv unix xapi-stdext-pervasives) diff --git a/lib/xapi-stdext-unix/unixext.ml b/lib/xapi-stdext-unix/unixext.ml index 60160822..4680b6b4 100644 --- a/lib/xapi-stdext-unix/unixext.ml +++ b/lib/xapi-stdext-unix/unixext.ml @@ -29,38 +29,40 @@ let mkdir_safe dir perm = let mkdir_rec dir perm = let rec p_mkdir dir = let p_name = Filename.dirname dir in - if p_name <> "/" && p_name <> "." - then p_mkdir p_name; - mkdir_safe dir perm in + if p_name <> "/" && p_name <> "." then + p_mkdir p_name ; + mkdir_safe dir perm + in p_mkdir dir (** write a pidfile file *) let pidfile_write filename = - let fd = Unix.openfile filename - [ Unix.O_WRONLY; Unix.O_CREAT; Unix.O_TRUNC; ] - 0o640 in + let fd = + Unix.openfile filename [Unix.O_WRONLY; Unix.O_CREAT; Unix.O_TRUNC] 0o640 + in finally (fun () -> - let pid = Unix.getpid () in - let buf = string_of_int pid ^ "\n" in - let len = String.length buf in - if Unix.write fd (Bytes.unsafe_of_string buf) 0 len <> len - then failwith "pidfile_write failed"; + let pid = Unix.getpid () in + let buf = string_of_int pid ^ "\n" in + let len = String.length buf in + if Unix.write fd (Bytes.unsafe_of_string buf) 0 len <> len then + failwith "pidfile_write failed" ) (fun () -> Unix.close fd) (** read a pidfile file, return either Some pid or None *) let pidfile_read filename = - let fd = Unix.openfile filename [ Unix.O_RDONLY ] 0o640 in + let fd = Unix.openfile filename [Unix.O_RDONLY] 0o640 in finally (fun () -> - try - let buf = Bytes.create 80 in - let rd = Unix.read fd buf 0 (Bytes.length buf) in - if rd = 0 then - failwith "pidfile_read failed"; - Scanf.sscanf (Bytes.sub_string buf 0 rd) "%d" (fun i -> Some i) - with _ -> None) + try + let buf = Bytes.create 80 in + let rd = Unix.read fd buf 0 (Bytes.length buf) in + if rd = 0 then + failwith "pidfile_read failed" ; + Scanf.sscanf (Bytes.sub_string buf 0 rd) "%d" (fun i -> Some i) + with _ -> None + ) (fun () -> Unix.close fd) (** open a file, and make sure the close is always done *) @@ -70,24 +72,26 @@ let with_file file mode perms f = (fun () -> f fd) (fun () -> Unix.close fd) -(** daemonize a process *) (* !! Must call this before spawning any threads !! *) + +(** daemonize a process *) let daemonize () = match Unix.fork () with - | 0 -> - if Unix.setsid () == -1 then - failwith "Unix.setsid failed"; - - begin match Unix.fork () with + | 0 -> ( + if Unix.setsid () == -1 then + failwith "Unix.setsid failed" ; + match Unix.fork () with | 0 -> - with_file "/dev/null" [ Unix.O_WRONLY ] 0 - (fun nullfd -> - Unix.close Unix.stdin; - Unix.dup2 nullfd Unix.stdout; - Unix.dup2 nullfd Unix.stderr) - | _ -> exit 0 - end - | _ -> exit 0 + with_file "/dev/null" [Unix.O_WRONLY] 0 (fun nullfd -> + Unix.close Unix.stdin ; + Unix.dup2 nullfd Unix.stdout ; + Unix.dup2 nullfd Unix.stderr + ) + | _ -> + exit 0 + ) + | _ -> + exit 0 exception Break @@ -95,41 +99,33 @@ let lines_fold f start input = let accumulator = ref start in let running = ref true in while !running do - let line = - try Some (input_line input) - with End_of_file -> None - in + let line = try Some (input_line input) with End_of_file -> None in match line with - | Some line -> - begin - try accumulator := (f !accumulator line) - with Break -> running := false - end + | Some line -> ( + try accumulator := f !accumulator line with Break -> running := false + ) | None -> - running := false - done; + running := false + done ; !accumulator -let lines_iter f = lines_fold (fun () line -> ignore(f line)) () +let lines_iter f = lines_fold (fun () line -> ignore (f line)) () (** open a file, and make sure the close is always done *) let with_input_channel file f = let input = open_in file in - finally - (fun () -> f input) - (fun () -> close_in input) - + finally (fun () -> f input) (fun () -> close_in input) -let file_lines_fold f start file_path = with_input_channel file_path (lines_fold f start) +let file_lines_fold f start file_path = + with_input_channel file_path (lines_fold f start) let read_lines ~(path : string) : string list = - List.rev (file_lines_fold (fun acc line -> line::acc) [] path) + List.rev (file_lines_fold (fun acc line -> line :: acc) [] path) -let file_lines_iter f = file_lines_fold (fun () line -> ignore(f line)) () +let file_lines_iter f = file_lines_fold (fun () line -> ignore (f line)) () let readfile_line = file_lines_iter - (** [fd_blocks_fold block_size f start fd] folds [f] over blocks (strings) from the fd [fd] with initial value [start] *) let fd_blocks_fold block_size f start fd = @@ -138,7 +134,8 @@ let fd_blocks_fold block_size f start fd = let n = Unix.read fd block 0 block_size in (* Consider making the interface explicitly use Substrings *) let b = if n = block_size then block else Bytes.sub block 0 n in - if n = 0 then acc else fold (f acc b) in + if n = 0 then acc else fold (f acc b) + in fold start let with_directory dir f = @@ -148,60 +145,67 @@ let with_directory dir f = (fun () -> Unix.closedir dh) let buffer_of_fd fd = - fd_blocks_fold 1024 (fun b s -> Buffer.add_bytes b s; b) (Buffer.create 1024) fd + fd_blocks_fold 1024 + (fun b s -> Buffer.add_bytes b s ; b) + (Buffer.create 1024) fd let string_of_fd fd = Buffer.contents (buffer_of_fd fd) -let buffer_of_file file_path = with_file file_path [ Unix.O_RDONLY ] 0 buffer_of_fd +let buffer_of_file file_path = + with_file file_path [Unix.O_RDONLY] 0 buffer_of_fd let string_of_file file_path = Buffer.contents (buffer_of_file file_path) (** Write a file, ensures atomicity and durability. *) let atomic_write_to_file fname perms f = let dir_path = Filename.dirname fname in - let tmp_path, tmp_chan = Filename.open_temp_file ~temp_dir:dir_path "" ".tmp" in + let tmp_path, tmp_chan = + Filename.open_temp_file ~temp_dir:dir_path "" ".tmp" + in let tmp_fd = Unix.descr_of_out_channel tmp_chan in - let write_tmp_file () = let result = f tmp_fd in - Unix.fchmod tmp_fd perms; - Unix.fsync tmp_fd; - result + Unix.fchmod tmp_fd perms ; Unix.fsync tmp_fd ; result in let write_and_persist () = let result = finally write_tmp_file (fun () -> Stdlib.close_out tmp_chan) in - Unix.rename tmp_path fname; + Unix.rename tmp_path fname ; (* sync parent directory to make sure the file is persisted *) let dir_fd = Unix.openfile dir_path [O_RDONLY] 0 in - finally (fun () -> Unix.fsync dir_fd) (fun () -> Unix.close dir_fd); + finally (fun () -> Unix.fsync dir_fd) (fun () -> Unix.close dir_fd) ; result in finally write_and_persist (fun () -> unlink_safe tmp_path) - (** Atomically write a string to a file *) -let write_bytes_to_file ?(perms=0o644) fname b = +let write_bytes_to_file ?(perms = 0o644) fname b = atomic_write_to_file fname perms (fun fd -> let len = Bytes.length b in let written = Unix.write fd b 0 len in - if written <> len then (failwith "Short write occured!")) + if written <> len then failwith "Short write occured!" + ) -let write_string_to_file ?(perms=0o644) fname s = +let write_string_to_file ?(perms = 0o644) fname s = write_bytes_to_file fname ~perms (Bytes.unsafe_of_string s) let execv_get_output cmd args = - let (pipe_exit, pipe_entrance) = Unix.pipe () in - let r = try Unix.set_close_on_exec pipe_exit; true with _ -> false in + let pipe_exit, pipe_entrance = Unix.pipe () in + let r = + try + Unix.set_close_on_exec pipe_exit ; + true + with _ -> false + in match Unix.fork () with - | 0 -> - Unix.dup2 pipe_entrance Unix.stdout; - Unix.close pipe_entrance; - if not r then - Unix.close pipe_exit; - begin try Unix.execv cmd args with _ -> exit 127 end + | 0 -> ( + Unix.dup2 pipe_entrance Unix.stdout ; + Unix.close pipe_entrance ; + if not r then + Unix.close pipe_exit ; + try Unix.execv cmd args with _ -> exit 127 + ) | pid -> - Unix.close pipe_entrance; - pid, pipe_exit + Unix.close pipe_entrance ; (pid, pipe_exit) let copy_file_internal ?limit reader writer = let buffer = Bytes.make 65536 '\000' in @@ -209,226 +213,244 @@ let copy_file_internal ?limit reader writer = let finished = ref false in let total_bytes = ref 0L in let limit = ref limit in - while not(!finished) do + while not !finished do let requested = min (Option.value ~default:buffer_len !limit) buffer_len in let num = reader buffer 0 (Int64.to_int requested) in let num64 = Int64.of_int num in - - limit := Option.map (fun x -> Int64.sub x num64) !limit; - ignore_int (writer buffer 0 num); - total_bytes := Int64.add !total_bytes num64; - finished := num = 0 || !limit = Some 0L; - done; + limit := Option.map (fun x -> Int64.sub x num64) !limit ; + ignore_int (writer buffer 0 num) ; + total_bytes := Int64.add !total_bytes num64 ; + finished := num = 0 || !limit = Some 0L + done ; !total_bytes -let copy_file ?limit ifd ofd = copy_file_internal ?limit (Unix.read ifd) (Unix.write ofd) +let copy_file ?limit ifd ofd = + copy_file_internal ?limit (Unix.read ifd) (Unix.write ofd) let file_exists file_path = - try Unix.access file_path [Unix.F_OK]; true + try + Unix.access file_path [Unix.F_OK] ; + true with _ -> false let touch_file file_path = - let fd = Unix.openfile file_path - [Unix.O_WRONLY; Unix.O_CREAT; Unix.O_NOCTTY; Unix.O_NONBLOCK] 0o666 in - Unix.close fd; + let fd = + Unix.openfile file_path + [Unix.O_WRONLY; Unix.O_CREAT; Unix.O_NOCTTY; Unix.O_NONBLOCK] + 0o666 + in + Unix.close fd ; Unix.utimes file_path 0.0 0.0 let is_empty_file file_path = try let stats = Unix.stat file_path in stats.Unix.st_size = 0 - with Unix.Unix_error (Unix.ENOENT, _, _) -> - false + with Unix.Unix_error (Unix.ENOENT, _, _) -> false let delete_empty_file file_path = - if is_empty_file file_path - then (Sys.remove file_path; true) - else (false) + if is_empty_file file_path then ( + Sys.remove file_path ; true + ) else + false (** Create a new file descriptor, connect it to host:port and return it *) exception Host_not_found of string + let open_connection_fd host port = let open Unix in - let addrinfo = getaddrinfo host (string_of_int port) [AI_SOCKTYPE SOCK_STREAM] in + let addrinfo = + getaddrinfo host (string_of_int port) [AI_SOCKTYPE SOCK_STREAM] + in match addrinfo with | [] -> - failwith (Printf.sprintf "Couldn't resolve hostname: %s" host) - | ai :: _ -> - let s = socket ai.ai_family ai.ai_socktype 0 in - try - connect s ai.ai_addr; - s - with e -> - Backtrace.is_important e; - close s; - raise e + failwith (Printf.sprintf "Couldn't resolve hostname: %s" host) + | ai :: _ -> ( + let s = socket ai.ai_family ai.ai_socktype 0 in + try connect s ai.ai_addr ; s + with e -> Backtrace.is_important e ; close s ; raise e + ) let open_connection_unix_fd filename = let s = Unix.socket Unix.PF_UNIX Unix.SOCK_STREAM 0 in try - let addr = Unix.ADDR_UNIX(filename) in - Unix.connect s addr; - s - with e -> - Backtrace.is_important e; - Unix.close s; - raise e + let addr = Unix.ADDR_UNIX filename in + Unix.connect s addr ; s + with e -> Backtrace.is_important e ; Unix.close s ; raise e module CBuf = struct (** A circular buffer constructed from a string *) type t = { - mutable buffer: bytes; - mutable len: int; (** bytes of valid data in [buffer] *) - mutable start: int; (** index of first valid byte in [buffer] *) - mutable r_closed: bool; (** true if no more data can be read due to EOF *) - mutable w_closed: bool; (** true if no more data can be written due to EOF *) + mutable buffer: bytes + ; mutable len: int (** bytes of valid data in [buffer] *) + ; mutable start: int (** index of first valid byte in [buffer] *) + ; mutable r_closed: bool (** true if no more data can be read due to EOF *) + ; mutable w_closed: bool + (** true if no more data can be written due to EOF *) } - let empty length = { - buffer = Bytes.create length; - len = 0; - start = 0; - r_closed = false; - w_closed = false; - } - - let drop (x: t) n = - if n > x.len then failwith (Printf.sprintf "drop %d > %d" n x.len); - x.start <- (x.start + n) mod (Bytes.length x.buffer); + let empty length = + { + buffer= Bytes.create length + ; len= 0 + ; start= 0 + ; r_closed= false + ; w_closed= false + } + + let drop (x : t) n = + if n > x.len then failwith (Printf.sprintf "drop %d > %d" n x.len) ; + x.start <- (x.start + n) mod Bytes.length x.buffer ; x.len <- x.len - n - let should_read (x: t) = - not x.r_closed && (x.len < (Bytes.length x.buffer - 1)) - let should_write (x: t) = - not x.w_closed && (x.len > 0) + let should_read (x : t) = + (not x.r_closed) && x.len < Bytes.length x.buffer - 1 + + let should_write (x : t) = (not x.w_closed) && x.len > 0 + + let end_of_reads (x : t) = x.r_closed && x.len = 0 - let end_of_reads (x: t) = x.r_closed && x.len = 0 - let end_of_writes (x: t) = x.w_closed + let end_of_writes (x : t) = x.w_closed - let write (x: t) fd = + let write (x : t) fd = (* Offset of the character after the substring *) let next = min (Bytes.length x.buffer) (x.start + x.len) in let len = next - x.start in - let written = try Unix.single_write fd x.buffer x.start len with _ -> x.w_closed <- true; len in + let written = + try Unix.single_write fd x.buffer x.start len + with _ -> + x.w_closed <- true ; + len + in drop x written - let read (x: t) fd = + let read (x : t) fd = (* Offset of the next empty character *) - let next = (x.start + x.len) mod (Bytes.length x.buffer) in - let len = min (Bytes.length x.buffer - next) (Bytes.length x.buffer - x.len) in + let next = (x.start + x.len) mod Bytes.length x.buffer in + let len = + min (Bytes.length x.buffer - next) (Bytes.length x.buffer - x.len) + in let read = Unix.read fd x.buffer next len in - if read = 0 then x.r_closed <- true; + if read = 0 then x.r_closed <- true ; x.len <- x.len + read - end exception Process_still_alive -let kill_and_wait ?(signal = Sys.sigterm) ?(timeout=10.) pid = +let kill_and_wait ?(signal = Sys.sigterm) ?(timeout = 10.) pid = let proc_entry_exists pid = - try Unix.access (Printf.sprintf "/proc/%d" pid) [ Unix.F_OK ]; true + try + Unix.access (Printf.sprintf "/proc/%d" pid) [Unix.F_OK] ; + true with _ -> false in if pid > 0 && proc_entry_exists pid then ( let loop_time_waiting = 0.03 in let left = ref timeout in let readcmdline pid = - try string_of_file (Printf.sprintf "/proc/%d/cmdline" pid) - with _ -> "" + try string_of_file (Printf.sprintf "/proc/%d/cmdline" pid) with _ -> "" in let reference = readcmdline pid and quit = ref false in - Unix.kill pid signal; - + Unix.kill pid signal ; (* We cannot do a waitpid here, since we might not be parent of the process, so instead we are waiting for the /proc/%d to go away. Also we verify that the cmdline stay the same if it's still here to prevent the very very unlikely event that the pid get reused before we notice it's gone *) - while proc_entry_exists pid && not !quit && !left > 0. - do + while proc_entry_exists pid && (not !quit) && !left > 0. do let cmdline = readcmdline pid in if cmdline = reference then ( (* still up, let's sleep a bit *) - ignore (Unix.select [] [] [] loop_time_waiting); + ignore (Unix.select [] [] [] loop_time_waiting) ; left := !left -. loop_time_waiting - ) else ( - (* not the same, it's gone ! *) + ) else (* not the same, it's gone ! *) quit := true - ) - done; + done ; if !left <= 0. then - raise Process_still_alive; + raise Process_still_alive ) let string_of_signal x = - let table = [ - Sys.sigabrt, "SIGABRT"; - Sys.sigalrm, "SIGALRM"; - Sys.sigfpe, "SIGFPE"; - Sys.sighup, "SIGHUP"; - Sys.sigill, "SIGILL"; - Sys.sigint, "SIGINT"; - Sys.sigkill, "SIGKILL"; - Sys.sigpipe, "SIGPIPE"; - Sys.sigquit, "SIGQUIT"; - Sys.sigsegv, "SIGSEGV"; - Sys.sigterm, "SIGTERM"; - Sys.sigusr1, "SIGUSR1"; - Sys.sigusr2, "SIGUSR2"; - Sys.sigchld, "SIGCHLD"; - Sys.sigcont, "SIGCONT"; - Sys.sigstop, "SIGSTOP"; - Sys.sigttin, "SIGTTIN"; - Sys.sigttou, "SIGTTOU"; - Sys.sigvtalrm, "SIGVTALRM"; - Sys.sigprof, "SIGPROF"; - ] in - if List.mem_assoc x table - then List.assoc x table - else (Printf.sprintf "(ocaml signal %d with an unknown name)" x) - -let proxy (a: Unix.file_descr) (b: Unix.file_descr) = + let table = + [ + (Sys.sigabrt, "SIGABRT") + ; (Sys.sigalrm, "SIGALRM") + ; (Sys.sigfpe, "SIGFPE") + ; (Sys.sighup, "SIGHUP") + ; (Sys.sigill, "SIGILL") + ; (Sys.sigint, "SIGINT") + ; (Sys.sigkill, "SIGKILL") + ; (Sys.sigpipe, "SIGPIPE") + ; (Sys.sigquit, "SIGQUIT") + ; (Sys.sigsegv, "SIGSEGV") + ; (Sys.sigterm, "SIGTERM") + ; (Sys.sigusr1, "SIGUSR1") + ; (Sys.sigusr2, "SIGUSR2") + ; (Sys.sigchld, "SIGCHLD") + ; (Sys.sigcont, "SIGCONT") + ; (Sys.sigstop, "SIGSTOP") + ; (Sys.sigttin, "SIGTTIN") + ; (Sys.sigttou, "SIGTTOU") + ; (Sys.sigvtalrm, "SIGVTALRM") + ; (Sys.sigprof, "SIGPROF") + ] + in + if List.mem_assoc x table then + List.assoc x table + else + Printf.sprintf "(ocaml signal %d with an unknown name)" x + +let proxy (a : Unix.file_descr) (b : Unix.file_descr) = let size = 64 * 1024 in (* [a'] is read from [a] and will be written to [b] *) (* [b'] is read from [b] and will be written to [a] *) let a' = CBuf.empty size and b' = CBuf.empty size in - Unix.set_nonblock a; - Unix.set_nonblock b; - + Unix.set_nonblock a ; + Unix.set_nonblock b ; try while true do - let r = (if CBuf.should_read a' then [ a ] else []) @ (if CBuf.should_read b' then [ b ] else []) in - let w = (if CBuf.should_write a' then [ b ] else []) @ (if CBuf.should_write b' then [ a ] else []) in - + let r = + (if CBuf.should_read a' then [a] else []) + @ if CBuf.should_read b' then [b] else [] + in + let w = + (if CBuf.should_write a' then [b] else []) + @ if CBuf.should_write b' then [a] else [] + in (* If we can't make any progress (because fds have been closed), then stop *) - if r = [] && w = [] then raise End_of_file; - + if r = [] && w = [] then raise End_of_file ; let r, w, _ = Unix.select r w [] (-1.0) in (* Do the writing before the reading *) - List.iter (fun fd -> if a = fd then CBuf.write b' a else CBuf.write a' b) w; - List.iter (fun fd -> if a = fd then CBuf.read a' a else CBuf.read b' b) r; + List.iter + (fun fd -> if a = fd then CBuf.write b' a else CBuf.write a' b) + w ; + List.iter (fun fd -> if a = fd then CBuf.read a' a else CBuf.read b' b) r ; (* If there's nothing else to read or write then signal the other end *) List.iter (fun (buf, fd) -> - if CBuf.end_of_reads buf then Unix.shutdown fd Unix.SHUTDOWN_SEND; - if CBuf.end_of_writes buf then Unix.shutdown fd Unix.SHUTDOWN_RECEIVE - ) [ a', b; b', a ] + if CBuf.end_of_reads buf then Unix.shutdown fd Unix.SHUTDOWN_SEND ; + if CBuf.end_of_writes buf then Unix.shutdown fd Unix.SHUTDOWN_RECEIVE + ) + [(a', b); (b', a)] done - with _ -> - (try Unix.clear_nonblock a with _ -> ()); - (try Unix.clear_nonblock b with _ -> ()); - (try Unix.close a with _ -> ()); - (try Unix.close b with _ -> ()) + with _ -> ( + (try Unix.clear_nonblock a with _ -> ()) ; + (try Unix.clear_nonblock b with _ -> ()) ; + (try Unix.close a with _ -> ()) ; + try Unix.close b with _ -> () + ) let rec really_read fd string off n = - if n=0 then () else + if n = 0 then + () + else let m = Unix.read fd string off n in - if m = 0 then raise End_of_file; - really_read fd string (off+m) (n-m) + if m = 0 then raise End_of_file ; + really_read fd string (off + m) (n - m) let really_read_string fd length = let buf = Bytes.make length '\000' in - really_read fd buf 0 length; + really_read fd buf 0 length ; Bytes.unsafe_to_string buf let try_read_string ?limit fd = @@ -437,38 +459,43 @@ let try_read_string ?limit fd = let cache = Bytes.make chunk '\000' in let finished = ref false in while not !finished do - let to_read = match limit with - | Some x -> min (x - (Buffer.length buf)) chunk - | None -> chunk in + let to_read = + match limit with + | Some x -> + min (x - Buffer.length buf) chunk + | None -> + chunk + in let read_bytes = Unix.read fd cache 0 to_read in - Buffer.add_subbytes buf cache 0 read_bytes; + Buffer.add_subbytes buf cache 0 read_bytes ; if read_bytes = 0 then finished := true - done; + done ; Buffer.contents buf (* From https://ocaml.github.io/ocamlunix/ocamlunix.html#sec118 -The function write of the Unix module iterates the system call write until -all the requested bytes are effectively written. -val write : file_descr -> string -> int -> int -> int -However, when the descriptor is a pipe (or a socket, see chapter 6), writes -may block and the system call write may be interrupted by a signal. In this -case the OCaml call to Unix.write is interrupted and the error EINTR is raised. -The problem is that some of the data may already have been written by a -previous system call to write but the actual size that was transferred is -unknown and lost. This renders the function write of the Unix module useless -in the presence of signals. - -To address this problem, the Unix module also provides the “raw” system call -write under the name single_write. - -We can use multiple single_write calls to write exactly the requested -amount of data (but not atomically!). + The function write of the Unix module iterates the system call write until + all the requested bytes are effectively written. + val write : file_descr -> string -> int -> int -> int + However, when the descriptor is a pipe (or a socket, see chapter 6), writes + may block and the system call write may be interrupted by a signal. In this + case the OCaml call to Unix.write is interrupted and the error EINTR is raised. + The problem is that some of the data may already have been written by a + previous system call to write but the actual size that was transferred is + unknown and lost. This renders the function write of the Unix module useless + in the presence of signals. + + To address this problem, the Unix module also provides the “raw” system call + write under the name single_write. + + We can use multiple single_write calls to write exactly the requested + amount of data (but not atomically!). *) let rec restart_on_EINTR f x = try f x with Unix.Unix_error (Unix.EINTR, _, _) -> restart_on_EINTR f x + and really_write fd buffer offset len = let n = restart_on_EINTR (Unix.single_write_substring fd buffer offset) len in - if n < len then really_write fd buffer (offset + n) (len - n);; + if n < len then really_write fd buffer (offset + n) (len - n) (* Ideally, really_write would be implemented with optional arguments ?(off=0) ?(len=String.length string) *) let really_write_string fd string = @@ -482,28 +509,43 @@ exception Timeout (* Write as many bytes to a file descriptor as possible from data before a given clock time. *) (* Raises Timeout exception if the number of bytes written is less than the specified length. *) (* Writes into the file descriptor at the current cursor position. *) -let time_limited_write_internal (write : Unix.file_descr -> 'a -> int -> int -> int) filedesc length data target_response_time = +let time_limited_write_internal + (write : Unix.file_descr -> 'a -> int -> int -> int) filedesc length data + target_response_time = let total_bytes_to_write = length in let bytes_written = ref 0 in - let now = ref (Unix.gettimeofday()) in + let now = ref (Unix.gettimeofday ()) in while !bytes_written < total_bytes_to_write && !now < target_response_time do let remaining_time = target_response_time -. !now in - let (_, ready_to_write, _) = Unix.select [] [filedesc] [] remaining_time in (* Note: there is a possibility that the storage could go away after the select and before the write, so the write would block. *) - if List.mem filedesc ready_to_write then begin - let bytes_to_write = total_bytes_to_write - !bytes_written in - let bytes = (try write filedesc data !bytes_written bytes_to_write with Unix.Unix_error(Unix.EAGAIN,_,_) | Unix.Unix_error(Unix.EWOULDBLOCK,_,_) -> 0) in (* write from buffer=data from offset=bytes_written, length=bytes_to_write *) - bytes_written := bytes + !bytes_written; - end; - now := Unix.gettimeofday() - done; - if !bytes_written = total_bytes_to_write then () else (* we ran out of time *) raise Timeout + let _, ready_to_write, _ = Unix.select [] [filedesc] [] remaining_time in + (* Note: there is a possibility that the storage could go away after the select and before the write, so the write would block. *) + ( if List.mem filedesc ready_to_write then + let bytes_to_write = total_bytes_to_write - !bytes_written in + let bytes = + try write filedesc data !bytes_written bytes_to_write + with + | Unix.Unix_error (Unix.EAGAIN, _, _) + | Unix.Unix_error (Unix.EWOULDBLOCK, _, _) + -> + 0 + in + (* write from buffer=data from offset=bytes_written, length=bytes_to_write *) + bytes_written := bytes + !bytes_written + ) ; + now := Unix.gettimeofday () + done ; + if !bytes_written = total_bytes_to_write then + () + else (* we ran out of time *) + raise Timeout let time_limited_write filedesc length data target_response_time = - time_limited_write_internal Unix.write filedesc length data target_response_time + time_limited_write_internal Unix.write filedesc length data + target_response_time let time_limited_write_substring filedesc length data target_response_time = - time_limited_write_internal Unix.write_substring filedesc length data target_response_time - + time_limited_write_internal Unix.write_substring filedesc length data + target_response_time (* Read as many bytes to a file descriptor as possible before a given clock time. *) (* Raises Timeout exception if the number of bytes read is less than the desired number. *) @@ -512,130 +554,173 @@ let time_limited_read filedesc length target_response_time = let total_bytes_to_read = length in let bytes_read = ref 0 in let buf = Bytes.make total_bytes_to_read '\000' in - let now = ref (Unix.gettimeofday()) in + let now = ref (Unix.gettimeofday ()) in while !bytes_read < total_bytes_to_read && !now < target_response_time do let remaining_time = target_response_time -. !now in - let (ready_to_read, _, _) = Unix.select [filedesc] [] [] remaining_time in - if List.mem filedesc ready_to_read then begin - let bytes_to_read = total_bytes_to_read - !bytes_read in - let bytes = (try Unix.read filedesc buf !bytes_read bytes_to_read with Unix.Unix_error(Unix.EAGAIN,_,_) | Unix.Unix_error(Unix.EWOULDBLOCK,_,_) -> 0) in (* read into buffer=buf from offset=bytes_read, length=bytes_to_read *) - if bytes = 0 then raise End_of_file (* End of file has been reached *) - else bytes_read := bytes + !bytes_read - end; - now := Unix.gettimeofday() - done; - if !bytes_read = total_bytes_to_read then (Bytes.unsafe_to_string buf) else (* we ran out of time *) raise Timeout + let ready_to_read, _, _ = Unix.select [filedesc] [] [] remaining_time in + ( if List.mem filedesc ready_to_read then + let bytes_to_read = total_bytes_to_read - !bytes_read in + let bytes = + try Unix.read filedesc buf !bytes_read bytes_to_read + with + | Unix.Unix_error (Unix.EAGAIN, _, _) + | Unix.Unix_error (Unix.EWOULDBLOCK, _, _) + -> + 0 + in + (* read into buffer=buf from offset=bytes_read, length=bytes_to_read *) + if bytes = 0 then + raise End_of_file (* End of file has been reached *) + else + bytes_read := bytes + !bytes_read + ) ; + now := Unix.gettimeofday () + done ; + if !bytes_read = total_bytes_to_read then + Bytes.unsafe_to_string buf + else (* we ran out of time *) + raise Timeout (* --------------------------------------------------------------------------------------- *) (* Read a given number of bytes of data from the fd, or stop at EOF, whichever comes first. *) (* A negative ~max_bytes indicates that all the data should be read from the fd until EOF. This is the default. *) -let read_data_in_chunks_internal (sub : bytes -> int -> int -> 'a) (f : 'a -> int -> unit) ?(block_size = 1024) ?(max_bytes = -1) from_fd = +let read_data_in_chunks_internal (sub : bytes -> int -> int -> 'a) + (f : 'a -> int -> unit) ?(block_size = 1024) ?(max_bytes = -1) from_fd = let buf = Bytes.make block_size '\000' in let rec do_read acc = let remaining_bytes = max_bytes - acc in - if remaining_bytes = 0 then acc (* we've read the amount requested *) - else begin - let bytes_to_read = (if max_bytes < 0 || remaining_bytes > block_size then block_size else remaining_bytes) in + if remaining_bytes = 0 then + acc (* we've read the amount requested *) + else + let bytes_to_read = + if max_bytes < 0 || remaining_bytes > block_size then + block_size + else + remaining_bytes + in let bytes_read = Unix.read from_fd buf 0 bytes_to_read in - if bytes_read = 0 then acc (* we reached EOF *) - else begin - f (sub buf 0 bytes_read) bytes_read; + if bytes_read = 0 then + acc (* we reached EOF *) + else ( + f (sub buf 0 bytes_read) bytes_read ; do_read (acc + bytes_read) - end - end in + ) + in do_read 0 -let read_data_in_string_chunks (f : string -> int -> unit) ?(block_size = 1024) ?(max_bytes = -1) from_fd = +let read_data_in_string_chunks (f : string -> int -> unit) ?(block_size = 1024) + ?(max_bytes = -1) from_fd = read_data_in_chunks_internal Bytes.sub_string f ~block_size ~max_bytes from_fd -let read_data_in_chunks (f : bytes -> int -> unit) ?(block_size = 1024) ?(max_bytes = -1) from_fd = +let read_data_in_chunks (f : bytes -> int -> unit) ?(block_size = 1024) + ?(max_bytes = -1) from_fd = read_data_in_chunks_internal Bytes.sub f ~block_size ~max_bytes from_fd -let spawnvp ?(pid_callback=(fun _ -> ())) cmd args = +let spawnvp ?(pid_callback = fun _ -> ()) cmd args = match Unix.fork () with | 0 -> - Unix.execvp cmd args + Unix.execvp cmd args | pid -> - begin try pid_callback pid with _ -> () end; - snd (Unix.waitpid [] pid) + (try pid_callback pid with _ -> ()) ; + snd (Unix.waitpid [] pid) let double_fork f = match Unix.fork () with - | 0 -> - begin match Unix.fork () with - (* NB: use _exit (calls C lib _exit directly) to avoid - calling at_exit handlers and flushing output channels - which wouild cause intermittent deadlocks if we - forked from a threaded program *) - | 0 -> (try f () with _ -> ()); _exit 0 - | _ -> _exit 0 - end - | pid -> ignore(Unix.waitpid [] pid) - -external set_tcp_nodelay : Unix.file_descr -> bool -> unit = "stub_unixext_set_tcp_nodelay" -external set_sock_keepalives : Unix.file_descr -> int -> int -> int -> unit = "stub_unixext_set_sock_keepalives" + | 0 -> ( + match Unix.fork () with + (* NB: use _exit (calls C lib _exit directly) to avoid + calling at_exit handlers and flushing output channels + which wouild cause intermittent deadlocks if we + forked from a threaded program *) + | 0 -> + (try f () with _ -> ()) ; + _exit 0 + | _ -> + _exit 0 + ) + | pid -> + ignore (Unix.waitpid [] pid) + +external set_tcp_nodelay : Unix.file_descr -> bool -> unit + = "stub_unixext_set_tcp_nodelay" + +external set_sock_keepalives : Unix.file_descr -> int -> int -> int -> unit + = "stub_unixext_set_sock_keepalives" + external fsync : Unix.file_descr -> unit = "stub_unixext_fsync" + external blkgetsize64 : Unix.file_descr -> int64 = "stub_unixext_blkgetsize64" external get_max_fd : unit -> int = "stub_unixext_get_max_fd" -let int_of_file_descr (x: Unix.file_descr) : int = Obj.magic x -let file_descr_of_int (x: int) : Unix.file_descr = Obj.magic x +let int_of_file_descr (x : Unix.file_descr) : int = Obj.magic x + +let file_descr_of_int (x : int) : Unix.file_descr = Obj.magic x (** Forcibly closes all open file descriptors except those explicitly passed in as arguments. Useful to avoid accidentally passing a file descriptor opened in another thread to a process being concurrently fork()ed (there's a race between open/set_close_on_exec). NB this assumes that 'type Unix.file_descr = int' *) -let close_all_fds_except (fds: Unix.file_descr list) = +let close_all_fds_except (fds : Unix.file_descr list) = (* get at the file descriptor within *) let fds' = List.map int_of_file_descr fds in - let close' (x: int) = - try Unix.close(file_descr_of_int x) with _ -> () in - + let close' (x : int) = try Unix.close (file_descr_of_int x) with _ -> () in let highest_to_keep = List.fold_left max (-1) fds' in (* close all the fds higher than the one we want to keep *) - for i = highest_to_keep + 1 to get_max_fd () do close' i done; + for i = highest_to_keep + 1 to get_max_fd () do + close' i + done ; (* close all the rest *) for i = 0 to highest_to_keep - 1 do - if not(List.mem i fds') then close' i + if not (List.mem i fds') then close' i done - (** Remove "." and ".." from paths (NB doesn't attempt to resolve symlinks) *) -let resolve_dot_and_dotdot (path: string) : string = - let of_string (x: string): string list = +let resolve_dot_and_dotdot (path : string) : string = + let of_string (x : string) : string list = let rec rev_split path = let basename = Filename.basename path and dirname = Filename.dirname path in - let rest = if Filename.dirname dirname = dirname then [] else rev_split dirname in - basename :: rest in + let rest = + if Filename.dirname dirname = dirname then [] else rev_split dirname + in + basename :: rest + in let abs_path path = - if Filename.is_relative path - then Filename.concat "/" path (* no notion of a cwd *) - else path in - rev_split (abs_path x) in - - let to_string (x: string list) = List.fold_left Filename.concat "/" (List.rev x) in - + if Filename.is_relative path then + Filename.concat "/" path (* no notion of a cwd *) + else + path + in + rev_split (abs_path x) + in + let to_string (x : string list) = + List.fold_left Filename.concat "/" (List.rev x) + in (* Process all "." and ".." references *) - let rec remove_dots (n: int) (x: string list) = - match x, n with - | [], _ -> [] - | "." :: rest, _ -> remove_dots n rest (* throw away ".", don't count as parent for ".." *) - | ".." :: rest, _ -> remove_dots (n + 1) rest (* note the number of ".." *) - | x :: rest, 0 -> x :: (remove_dots 0 rest) - | _ :: rest, n -> remove_dots (n - 1) rest (* munch *) in + let rec remove_dots (n : int) (x : string list) = + match (x, n) with + | [], _ -> + [] + | "." :: rest, _ -> + remove_dots n rest (* throw away ".", don't count as parent for ".." *) + | ".." :: rest, _ -> + remove_dots (n + 1) rest (* note the number of ".." *) + | x :: rest, 0 -> + x :: remove_dots 0 rest + | _ :: rest, n -> + remove_dots (n - 1) rest + (* munch *) + in to_string (remove_dots 0 (of_string path)) (** Seek to an absolute offset within a file descriptor *) -let seek_to fd pos = - Unix.lseek fd pos Unix.SEEK_SET +let seek_to fd pos = Unix.lseek fd pos Unix.SEEK_SET (** Seek to an offset within a file descriptor, relative to the current cursor position *) -let seek_rel fd diff = - Unix.lseek fd diff Unix.SEEK_CUR +let seek_rel fd diff = Unix.lseek fd diff Unix.SEEK_CUR (** Return the current cursor position within a file descriptor *) let current_cursor_pos fd = @@ -644,34 +729,34 @@ let current_cursor_pos fd = let wait_for_path path delay timeout = let rec inner ttl = - if ttl=0 then failwith "No path!"; - try - ignore(Unix.stat path) + if ttl = 0 then failwith "No path!" ; + try ignore (Unix.stat path) with _ -> - delay 0.5; + delay 0.5 ; inner (ttl - 1) in inner (timeout * 2) - -let _ = Callback.register_exception "unixext.unix_error" (Unix_error (0)) +let _ = Callback.register_exception "unixext.unix_error" (Unix_error 0) let send_fd = Fd_send_recv.send_fd + let send_fd_substring = Fd_send_recv.send_fd_substring + let recv_fd = Fd_send_recv.recv_fd type statvfs_t = { - f_bsize : int64; - f_frsize : int64; - f_blocks : int64; - f_bfree : int64; - f_bavail : int64; - f_files : int64; - f_ffree : int64; - f_favail : int64; - f_fsid : int64; - f_flag : int64; - f_namemax : int64; + f_bsize: int64 + ; f_frsize: int64 + ; f_blocks: int64 + ; f_bfree: int64 + ; f_bavail: int64 + ; f_files: int64 + ; f_ffree: int64 + ; f_favail: int64 + ; f_fsid: int64 + ; f_flag: int64 + ; f_namemax: int64 } external statvfs : string -> statvfs_t = "stub_statvfs" @@ -686,7 +771,8 @@ let domain_of_addr str = module Direct = struct type t = Unix.file_descr - external openfile : string -> Unix.open_flag list -> Unix.file_perm -> t = "stub_stdext_unix_open_direct" + external openfile : string -> Unix.open_flag list -> Unix.file_perm -> t + = "stub_stdext_unix_open_direct" let close = Unix.close @@ -694,14 +780,17 @@ module Direct = struct let t = openfile path flags perms in finally (fun () -> f t) (fun () -> close t) - external unsafe_write : t -> bytes -> int -> int -> int = "stub_stdext_unix_write" + external unsafe_write : t -> bytes -> int -> int -> int + = "stub_stdext_unix_write" let write fd buf ofs len = - if ofs < 0 || len < 0 || ofs > Bytes.length buf - len - then invalid_arg "Unixext.write" - else unsafe_write fd buf ofs len + if ofs < 0 || len < 0 || ofs > Bytes.length buf - len then + invalid_arg "Unixext.write" + else + unsafe_write fd buf ofs len - let copy_from_fd ?limit socket fd = copy_file_internal ?limit (Unix.read socket) (write fd) + let copy_from_fd ?limit socket fd = + copy_file_internal ?limit (Unix.read socket) (write fd) let fsync x = fsync x diff --git a/lib/xapi-stdext-unix/unixext.mli b/lib/xapi-stdext-unix/unixext.mli index ce9c7750..cde55184 100644 --- a/lib/xapi-stdext-unix/unixext.mli +++ b/lib/xapi-stdext-unix/unixext.mli @@ -14,163 +14,237 @@ (** A collection of extensions to the [Unix] module. *) val _exit : int -> unit + val unlink_safe : string -> unit + val mkdir_safe : string -> Unix.file_perm -> unit + val mkdir_rec : string -> Unix.file_perm -> unit + val pidfile_write : string -> unit + val pidfile_read : string -> int option + val daemonize : unit -> unit -val with_file : string -> Unix.open_flag list -> Unix.file_perm -> (Unix.file_descr -> 'a) -> 'a + +val with_file : + string + -> Unix.open_flag list + -> Unix.file_perm + -> (Unix.file_descr -> 'a) + -> 'a + val with_input_channel : string -> (in_channel -> 'a) -> 'a + val with_directory : string -> (Unix.dir_handle -> 'a) -> 'a (** Exception to be raised in function to break out of [file_lines_fold]. *) exception Break -(** Folds function [f] over every line in the input channel *) val lines_fold : ('a -> string -> 'a) -> 'a -> in_channel -> 'a +(** Folds function [f] over every line in the input channel *) -(** Applies function [f] to every line in the input channel *) val lines_iter : (string -> unit) -> in_channel -> unit +(** Applies function [f] to every line in the input channel *) +val file_lines_fold : ('a -> string -> 'a) -> 'a -> string -> 'a (** Folds function [f] over every line in the file at [file_path] using the starting value [start]. *) -val file_lines_fold : ('a -> string -> 'a) -> 'a -> string -> 'a -(** [read_lines path] returns a list of lines in the file at [path]. *) val read_lines : path:string -> string list +(** [read_lines path] returns a list of lines in the file at [path]. *) -(** Applies function [f] to every line in the file at [file_path]. *) val file_lines_iter : (string -> unit) -> string -> unit +(** Applies function [f] to every line in the file at [file_path]. *) +val fd_blocks_fold : int -> ('a -> bytes -> 'a) -> 'a -> Unix.file_descr -> 'a (** [fd_blocks_fold block_size f start fd] folds [f] over blocks (strings) from the fd [fd] with initial value [start] *) -val fd_blocks_fold: int -> ('a -> bytes -> 'a) -> 'a -> Unix.file_descr -> 'a -(** Alias for function [file_lines_iter]. *) val readfile_line : (string -> 'a) -> string -> unit +(** Alias for function [file_lines_iter]. *) -(** [buffer_of_fd fd] returns a Buffer.t containing all data read from [fd] up to EOF *) val buffer_of_fd : Unix.file_descr -> Buffer.t +(** [buffer_of_fd fd] returns a Buffer.t containing all data read from [fd] up to EOF *) -(** [string_of_fd fd] returns a string containing all data read from [fd] up to EOF *) val string_of_fd : Unix.file_descr -> string +(** [string_of_fd fd] returns a string containing all data read from [fd] up to EOF *) -(** [buffer_of_file file] returns a Buffer.t containing the contents of [file] *) val buffer_of_file : string -> Buffer.t +(** [buffer_of_file file] returns a Buffer.t containing the contents of [file] *) -(** [string_of_file file] returns a string containing the contents of [file] *) val string_of_file : string -> string +(** [string_of_file file] returns a string containing the contents of [file] *) +val atomic_write_to_file : + string -> Unix.file_perm -> (Unix.file_descr -> 'a) -> 'a (** [atomic_write_to_file fname perms f] writes a file to path [fname] using the function [f] with permissions [perms]. In case of error during the operation the file with the path [fname] is not modified at all. *) -val atomic_write_to_file : string -> Unix.file_perm -> (Unix.file_descr -> 'a) -> 'a +val write_string_to_file : ?perms:Unix.file_perm -> string -> string -> unit (** [write_string_to_file fname contents] creates a file with path [fname] with the string [contents] as its contents, atomically *) -val write_string_to_file : ?perms:Unix.file_perm -> string -> string -> unit +val write_bytes_to_file : ?perms:Unix.file_perm -> string -> bytes -> unit (** [write_string_to_file fname contents] creates a file with path [fname] with the buffer [contents] as its contents, atomically *) -val write_bytes_to_file : ?perms:Unix.file_perm -> string -> bytes -> unit + val execv_get_output : string -> string array -> int * Unix.file_descr + val copy_file : ?limit:int64 -> Unix.file_descr -> Unix.file_descr -> int64 -(** Returns true if and only if a file exists at the given path. *) val file_exists : string -> bool +(** Returns true if and only if a file exists at the given path. *) +val touch_file : string -> unit (** Sets both the access and modification times of the file * at the given path to the current time. Creates an empty * file at the given path if no such file already exists. *) -val touch_file : string -> unit -(** Returns true if and only if an empty file exists at the given path. *) val is_empty_file : string -> bool +(** Returns true if and only if an empty file exists at the given path. *) +val delete_empty_file : string -> bool (** Safely deletes a file at the given path if (and only if) the * file exists and is empty. Returns true if a file was deleted. *) -val delete_empty_file : string -> bool exception Host_not_found of string + val open_connection_fd : string -> int -> Unix.file_descr -val open_connection_unix_fd : string -> Unix.file_descr +val open_connection_unix_fd : string -> Unix.file_descr exception Process_still_alive + val kill_and_wait : ?signal:int -> ?timeout:float -> int -> unit +val string_of_signal : int -> string (** [string_of_signal x] translates an ocaml signal number into * a string suitable for logging. *) -val string_of_signal : int -> string val proxy : Unix.file_descr -> Unix.file_descr -> unit + val really_read : Unix.file_descr -> bytes -> int -> int -> unit + val really_read_string : Unix.file_descr -> int -> string +val really_write : Unix.file_descr -> string -> int -> int -> unit (** [really_write] keeps repeating the write operation until all bytes * have been written or an error occurs. This is not atomic but is * robust against EINTR errors. * See: https://ocaml.github.io/ocamlunix/ocamlunix.html#sec118 *) -val really_write : Unix.file_descr -> string -> int -> int -> unit + val really_write_string : Unix.file_descr -> string -> unit -val try_read_string : ?limit: int -> Unix.file_descr -> string + +val try_read_string : ?limit:int -> Unix.file_descr -> string + exception Timeout + val time_limited_write : Unix.file_descr -> int -> bytes -> float -> unit -val time_limited_write_substring : Unix.file_descr -> int -> string -> float -> unit + +val time_limited_write_substring : + Unix.file_descr -> int -> string -> float -> unit + val time_limited_read : Unix.file_descr -> int -> float -> string -val read_data_in_string_chunks : (string -> int -> unit) -> ?block_size:int -> ?max_bytes:int -> Unix.file_descr -> int -val read_data_in_chunks : (bytes -> int -> unit) -> ?block_size:int -> ?max_bytes:int -> Unix.file_descr -> int + +val read_data_in_string_chunks : + (string -> int -> unit) + -> ?block_size:int + -> ?max_bytes:int + -> Unix.file_descr + -> int + +val read_data_in_chunks : + (bytes -> int -> unit) + -> ?block_size:int + -> ?max_bytes:int + -> Unix.file_descr + -> int + val spawnvp : - ?pid_callback:(int -> unit) -> - string -> string array -> Unix.process_status + ?pid_callback:(int -> unit) -> string -> string array -> Unix.process_status + val double_fork : (unit -> unit) -> unit + external set_tcp_nodelay : Unix.file_descr -> bool -> unit = "stub_unixext_set_tcp_nodelay" -external set_sock_keepalives : Unix.file_descr -> int -> int -> int -> unit = "stub_unixext_set_sock_keepalives" + +external set_sock_keepalives : Unix.file_descr -> int -> int -> int -> unit + = "stub_unixext_set_sock_keepalives" + external fsync : Unix.file_descr -> unit = "stub_unixext_fsync" + external get_max_fd : unit -> int = "stub_unixext_get_max_fd" + external blkgetsize64 : Unix.file_descr -> int64 = "stub_unixext_blkgetsize64" val int_of_file_descr : Unix.file_descr -> int + val file_descr_of_int : int -> Unix.file_descr + val close_all_fds_except : Unix.file_descr list -> unit + val resolve_dot_and_dotdot : string -> string val seek_to : Unix.file_descr -> int -> int + val seek_rel : Unix.file_descr -> int -> int + val current_cursor_pos : Unix.file_descr -> int val wait_for_path : string -> (float -> unit) -> int -> unit -val send_fd : Unix.file_descr -> bytes -> int -> int -> Unix.msg_flag list -> Unix.file_descr -> int -val send_fd_substring : Unix.file_descr -> string -> int -> int -> Unix.msg_flag list -> Unix.file_descr -> int -val recv_fd : Unix.file_descr -> bytes -> int -> int -> Unix.msg_flag list -> int * Unix.sockaddr * Unix.file_descr +val send_fd : + Unix.file_descr + -> bytes + -> int + -> int + -> Unix.msg_flag list + -> Unix.file_descr + -> int + +val send_fd_substring : + Unix.file_descr + -> string + -> int + -> int + -> Unix.msg_flag list + -> Unix.file_descr + -> int + +val recv_fd : + Unix.file_descr + -> bytes + -> int + -> int + -> Unix.msg_flag list + -> int * Unix.sockaddr * Unix.file_descr type statvfs_t = { - f_bsize : int64; - f_frsize : int64; - f_blocks : int64; - f_bfree : int64; - f_bavail : int64; - f_files : int64; - f_ffree : int64; - f_favail : int64; - f_fsid : int64; - f_flag : int64; - f_namemax : int64; + f_bsize: int64 + ; f_frsize: int64 + ; f_blocks: int64 + ; f_bfree: int64 + ; f_bavail: int64 + ; f_files: int64 + ; f_ffree: int64 + ; f_favail: int64 + ; f_fsid: int64 + ; f_flag: int64 + ; f_namemax: int64 } val statvfs : string -> statvfs_t -(** Returns Some Unix.PF_INET or Some Unix.PF_INET6 if passed a valid IP address, otherwise returns None. *) val domain_of_addr : string -> Unix.socket_domain option +(** Returns Some Unix.PF_INET or Some Unix.PF_INET6 if passed a valid IP address, otherwise returns None. *) module Direct : sig (** Perform I/O in O_DIRECT mode using 4KiB page-aligned buffers *) - type t (** represents a file open in O_DIRECT mode *) + type t val openfile : string -> Unix.open_flag list -> Unix.file_perm -> t (** [openfile name flags perm] behaves the same as [Unix.openfile] but includes the O_DIRECT flag *) @@ -178,7 +252,8 @@ module Direct : sig val close : t -> unit (** [close t] closes [t], a file open in O_DIRECT mode *) - val with_openfile : string -> Unix.open_flag list -> Unix.file_perm -> (t -> 'a) -> 'a + val with_openfile : + string -> Unix.open_flag list -> Unix.file_perm -> (t -> 'a) -> 'a (** [with_openfile name flags perm f] opens [name], applies the result to [f] and closes *) val write : t -> bytes -> int -> int -> int diff --git a/lib/xapi-stdext-zerocheck/zerocheck.mli b/lib/xapi-stdext-zerocheck/zerocheck.mli index 84489e63..08eb9b73 100644 --- a/lib/xapi-stdext-zerocheck/zerocheck.mli +++ b/lib/xapi-stdext-zerocheck/zerocheck.mli @@ -12,5 +12,5 @@ * GNU Lesser General Public License for more details. *) -(** [is_all_zeroes x len] returns true if the substring is all zeroes *) external is_all_zeros : string -> int -> bool = "is_all_zeros" +(** [is_all_zeroes x len] returns true if the substring is all zeroes *) diff --git a/xapi-stdext.opam b/xapi-stdext.opam index 40429802..c79ab1f7 100644 --- a/xapi-stdext.opam +++ b/xapi-stdext.opam @@ -9,6 +9,7 @@ homepage: "https://github.com/xapi-project/stdext" bug-reports: "https://github.com/xapi-project/stdext/issues" depends: [ "dune" {>= "2.7"} + "polly" {= version} "xapi-stdext-date" {= version} "xapi-stdext-encodings" {= version} "xapi-stdext-pervasives" {= version} From a922278d658fe85fc9b950f7cfd7f498b5df302f Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Mon, 13 Feb 2023 17:22:22 +0000 Subject: [PATCH 2/3] CP-32622: Remove select using polly and thread.delay Signed-off-by: Steven Woods --- lib/xapi-stdext-threads/threadext.ml | 19 ++++- lib/xapi-stdext-unix/unixext.ml | 119 +++++++++++++++++---------- 2 files changed, 91 insertions(+), 47 deletions(-) diff --git a/lib/xapi-stdext-threads/threadext.ml b/lib/xapi-stdext-threads/threadext.ml index 56025d51..fde371a9 100644 --- a/lib/xapi-stdext-threads/threadext.ml +++ b/lib/xapi-stdext-threads/threadext.ml @@ -86,11 +86,22 @@ module Delay = struct pipe_out ) in - let r, _, _ = Unix.select [pipe_out] [] [] seconds in - (* flush the single byte from the pipe *) - if r <> [] then ignore (Unix.read pipe_out (Bytes.create 1) 0 1) ; + let epoll = Polly.create () in + Polly.add epoll pipe_out Polly.Events.inp ; + let num_fds = + Fun.protect + ~finally:(fun () -> Polly.close epoll) + (fun () -> + Polly.wait epoll 1 + (int_of_float (seconds *. 1000.)) + (fun _ fd _ -> + (* flush the single byte from the pipe *) + ignore (Unix.read fd (Bytes.create 1) 0 1) + ) + ) + in (* return true if we waited the full length of time, false if we were woken *) - r = [] + num_fds = 0 with Pre_signalled -> false ) (fun () -> diff --git a/lib/xapi-stdext-unix/unixext.ml b/lib/xapi-stdext-unix/unixext.ml index 4680b6b4..1d3775a6 100644 --- a/lib/xapi-stdext-unix/unixext.ml +++ b/lib/xapi-stdext-unix/unixext.ml @@ -361,7 +361,7 @@ let kill_and_wait ?(signal = Sys.sigterm) ?(timeout = 10.) pid = let cmdline = readcmdline pid in if cmdline = reference then ( (* still up, let's sleep a bit *) - ignore (Unix.select [] [] [] loop_time_waiting) ; + Thread.delay loop_time_waiting ; left := !left -. loop_time_waiting ) else (* not the same, it's gone ! *) quit := true @@ -419,19 +419,30 @@ let proxy (a : Unix.file_descr) (b : Unix.file_descr) = in (* If we can't make any progress (because fds have been closed), then stop *) if r = [] && w = [] then raise End_of_file ; - let r, w, _ = Unix.select r w [] (-1.0) in - (* Do the writing before the reading *) - List.iter - (fun fd -> if a = fd then CBuf.write b' a else CBuf.write a' b) - w ; - List.iter (fun fd -> if a = fd then CBuf.read a' a else CBuf.read b' b) r ; - (* If there's nothing else to read or write then signal the other end *) - List.iter - (fun (buf, fd) -> - if CBuf.end_of_reads buf then Unix.shutdown fd Unix.SHUTDOWN_SEND ; - if CBuf.end_of_writes buf then Unix.shutdown fd Unix.SHUTDOWN_RECEIVE + let epoll = Polly.create () in + List.iter (fun fd -> Polly.add epoll fd Polly.Events.inp) r ; + List.iter (fun fd -> Polly.add epoll fd Polly.Events.out) w ; + Fun.protect + ~finally:(fun () -> Polly.close epoll) + (fun () -> + ignore + @@ Polly.wait epoll 4 (-1) (fun _ file_desc event -> + (* Do the writing before the reading *) + if event = Polly.Events.out then + if a = file_desc then CBuf.write b' a else CBuf.write a' b ; + if event = Polly.Events.inp then + if a = file_desc then CBuf.read a' a else CBuf.read b' b ; + (* If there's nothing else to read or write then signal the other end *) + List.iter + (fun (buf, fd) -> + if CBuf.end_of_reads buf then + Unix.shutdown fd Unix.SHUTDOWN_SEND ; + if CBuf.end_of_writes buf then + Unix.shutdown fd Unix.SHUTDOWN_RECEIVE + ) + [(a', b); (b', a)] + ) ) - [(a', b); (b', a)] done with _ -> ( (try Unix.clear_nonblock a with _ -> ()) ; @@ -517,21 +528,32 @@ let time_limited_write_internal let now = ref (Unix.gettimeofday ()) in while !bytes_written < total_bytes_to_write && !now < target_response_time do let remaining_time = target_response_time -. !now in - let _, ready_to_write, _ = Unix.select [] [filedesc] [] remaining_time in - (* Note: there is a possibility that the storage could go away after the select and before the write, so the write would block. *) - ( if List.mem filedesc ready_to_write then - let bytes_to_write = total_bytes_to_write - !bytes_written in - let bytes = - try write filedesc data !bytes_written bytes_to_write - with - | Unix.Unix_error (Unix.EAGAIN, _, _) - | Unix.Unix_error (Unix.EWOULDBLOCK, _, _) - -> - 0 + let epoll = Polly.create () in + Polly.add epoll filedesc Polly.Events.out ; + Fun.protect + ~finally:(fun () -> Polly.close epoll) + (fun () -> + let (_ : int) = + Polly.wait epoll 1 + (int_of_float (remaining_time *. 1000.)) + (fun _ fd _ -> + (* Note: there is a possibility that the storage could go away after the epoll and before the write, so the write would block. *) + if fd = filedesc then + let bytes_to_write = total_bytes_to_write - !bytes_written in + let bytes = + try write filedesc data !bytes_written bytes_to_write + with + | Unix.Unix_error (Unix.EAGAIN, _, _) + | Unix.Unix_error (Unix.EWOULDBLOCK, _, _) + -> + 0 + in + (* write from buffer=data from offset=bytes_written, length=bytes_to_write *) + bytes_written := bytes + !bytes_written + ) in - (* write from buffer=data from offset=bytes_written, length=bytes_to_write *) - bytes_written := bytes + !bytes_written - ) ; + () + ) ; now := Unix.gettimeofday () done ; if !bytes_written = total_bytes_to_write then @@ -557,23 +579,34 @@ let time_limited_read filedesc length target_response_time = let now = ref (Unix.gettimeofday ()) in while !bytes_read < total_bytes_to_read && !now < target_response_time do let remaining_time = target_response_time -. !now in - let ready_to_read, _, _ = Unix.select [filedesc] [] [] remaining_time in - ( if List.mem filedesc ready_to_read then - let bytes_to_read = total_bytes_to_read - !bytes_read in - let bytes = - try Unix.read filedesc buf !bytes_read bytes_to_read - with - | Unix.Unix_error (Unix.EAGAIN, _, _) - | Unix.Unix_error (Unix.EWOULDBLOCK, _, _) - -> - 0 + let epoll = Polly.create () in + Polly.add epoll filedesc Polly.Events.inp ; + Fun.protect + ~finally:(fun () -> Polly.close epoll) + (fun () -> + let (_ : int) = + Polly.wait epoll 1 + (int_of_float (remaining_time *. 1000.)) + (fun _ fd _ -> + if fd = filedesc then + let bytes_to_read = total_bytes_to_read - !bytes_read in + let bytes = + try Unix.read filedesc buf !bytes_read bytes_to_read + with + | Unix.Unix_error (Unix.EAGAIN, _, _) + | Unix.Unix_error (Unix.EWOULDBLOCK, _, _) + -> + 0 + in + (* read into buffer=buf from offset=bytes_read, length=bytes_to_read *) + if bytes = 0 then + raise End_of_file (* End of file has been reached *) + else + bytes_read := bytes + !bytes_read + ) in - (* read into buffer=buf from offset=bytes_read, length=bytes_to_read *) - if bytes = 0 then - raise End_of_file (* End of file has been reached *) - else - bytes_read := bytes + !bytes_read - ) ; + () + ) ; now := Unix.gettimeofday () done ; if !bytes_read = total_bytes_to_read then From d2019b565786d1b81b22f9be723bf3bdcd8ff344 Mon Sep 17 00:00:00 2001 From: Steven Woods Date: Tue, 14 Feb 2023 13:38:31 +0000 Subject: [PATCH 3/3] Add unit tests for threadext Signed-off-by: Steven Woods --- lib/xapi-stdext-threads/dune | 6 +++++ lib/xapi-stdext-threads/threadext.mli | 6 ++--- lib/xapi-stdext-threads/threadext_test.ml | 30 +++++++++++++++++++++++ xapi-stdext-threads.opam | 1 + xapi-stdext-unix.opam | 1 + 5 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 lib/xapi-stdext-threads/threadext_test.ml diff --git a/lib/xapi-stdext-threads/dune b/lib/xapi-stdext-threads/dune index e9111bd6..9751cb51 100644 --- a/lib/xapi-stdext-threads/dune +++ b/lib/xapi-stdext-threads/dune @@ -1,9 +1,15 @@ (library (public_name xapi-stdext-threads) (name xapi_stdext_threads) + (modules :standard \ threadext_test) (libraries polly threads unix xapi-stdext-pervasives) ) +(test + (name threadext_test) + (modules threadext_test) + (libraries xapi_stdext_threads alcotest mtime.clock.os) +) \ No newline at end of file diff --git a/lib/xapi-stdext-threads/threadext.mli b/lib/xapi-stdext-threads/threadext.mli index 62bb50d8..8349ab71 100644 --- a/lib/xapi-stdext-threads/threadext.mli +++ b/lib/xapi-stdext-threads/threadext.mli @@ -23,13 +23,13 @@ module Delay : sig type t val make : unit -> t + + val wait : t -> float -> bool (** Blocks the calling thread for a given period of time with the option of returning early if someone calls 'signal'. Returns true if the full time period elapsed and false if signalled. Note that multple 'signals' are coalesced; 'signals' sent before 'wait' is called are not lost. *) - val wait : t -> float -> bool - (** Sends a signal to a waiting thread. See 'wait' *) - val signal : t -> unit + (** Sends a signal to a waiting thread. See 'wait' *) end diff --git a/lib/xapi-stdext-threads/threadext_test.ml b/lib/xapi-stdext-threads/threadext_test.ml new file mode 100644 index 00000000..5f181f2d --- /dev/null +++ b/lib/xapi-stdext-threads/threadext_test.ml @@ -0,0 +1,30 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Delay = Xapi_stdext_threads.Threadext.Delay + +let test_wait () = + let m = Delay.make () in + let c = Mtime_clock.counter () in + let expected = Mtime.Span.(5 * s) in + let max_error = Mtime.Span.(10 * ms) in + let _ = Delay.wait m 5.0 in + let wait_time = Mtime_clock.count c in + let diff = Mtime.Span.abs_diff expected wait_time in + let cmp = Mtime.Span.compare diff max_error in + Alcotest.(check bool) "diff is smaller than max error" true (cmp < 0) + + +let () = +Alcotest.run "Threadext" [("wait", [("wait", `Quick, test_wait)])] \ No newline at end of file diff --git a/xapi-stdext-threads.opam b/xapi-stdext-threads.opam index 04cd8fea..c5f5d9ec 100644 --- a/xapi-stdext-threads.opam +++ b/xapi-stdext-threads.opam @@ -8,6 +8,7 @@ homepage: "https://github.com/xapi-project/stdext" bug-reports: "https://github.com/xapi-project/stdext/issues" depends: [ "dune" {>= "2.7"} + "polly" "ocaml" "base-threads" "base-unix" diff --git a/xapi-stdext-unix.opam b/xapi-stdext-unix.opam index e7e2b807..205d1724 100644 --- a/xapi-stdext-unix.opam +++ b/xapi-stdext-unix.opam @@ -8,6 +8,7 @@ homepage: "https://github.com/xapi-project/stdext" bug-reports: "https://github.com/xapi-project/stdext/issues" depends: [ "dune" {>= "2.7"} + "polly" "ocaml" "base-unix" "fd-send-recv" {>= "2.0.0"}