diff --git a/CHANGES.md b/CHANGES.md index 4a97c9c..741ef72 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ ## 8.1.2 (February 2022) * Add the `noargs` constructor for declaring interfaces that do not take any parameters. +* Allow Xmlrpc callers to override the base64 decoding function. ## 8.1.1 (November 2021) * Ignore error about using f-strings in python bindings (@psafont) diff --git a/README.md b/README.md index d57f95e..48be573 100644 --- a/README.md +++ b/README.md @@ -283,6 +283,46 @@ used to generate something from an interface defined following the above pattern. For example, it is possible to write an `RPC` implementation that generates a GUI for a given interface. +## Base64 Decoding + +The treatment of line feeds (and other characters) in +[XML-RPC](http://xmlrpc.com) base64-encoded data is underspecified. + +By default, this library decodes values using the `Base64.decode_exn` +function of [ocaml-base64](https://github.com/mirage/ocaml-base64). This +function implements [RFC4648](https://datatracker.ietf.org/doc/html/rfc4648) +which requires the rejection of non-alphabet characters for security reasons +(see [section 3.3](https://datatracker.ietf.org/doc/html/rfc4648#section-3.3) +and also [section 3.1](https://datatracker.ietf.org/doc/html/rfc4648#section-3.1)). + +This is problematic when communicating with servers that are less strict. +For instance, the +[encode](https://docs.python.org/3/library/xmlrpc.client.html#xmlrpc.client.Binary.encode) +function of the Python `xmlrpc.client` refers to [section 6.8 of +RFC2045](https://datatracker.ietf.org/doc/html/rfc2045.html#section-6.8) to +justify inserting a newline character every 76 characters. +For this reasons, the functions in `Xmlrpc` allow the caller to override the +`base64_decoder`. The following declaration gives a rough-and-ready +“dangerous” implementation based on the `Base64.rfc2045` package. +A better implementation would only accept a `\n` every 76 characters. + +``` +let base64_2045_decoder s = + let open Base64_rfc2045 in + let buf = Buffer.create 1024 in + let d = decoder (`String s) in + let rec go () = + match decode d with + | `Flush s -> (Buffer.add_string buf s; go ()) + | `End -> Buffer.contents buf + (* best-effort *) + | `Malformed _ (* ignore missing '\r' before '\n', etc. *) + | `Wrong_padding (* ignore *) + | `Await -> go () + in + go () +``` + ## Building To build, first install the dependencies: diff --git a/src/lib/xmlrpc.ml b/src/lib/xmlrpc.ml index 6167d73..ab8c470 100644 --- a/src/lib/xmlrpc.ml +++ b/src/lib/xmlrpc.ml @@ -313,13 +313,14 @@ module Parser = struct let make_float = make (fun data -> Float (float_of_string data)) let make_string = make (fun data -> String data) let make_dateTime = make (fun data -> DateTime data) - let make_base64 = make (fun data -> Base64 (Base64.decode_exn data)) + let make_base64 ?(base64_decoder=fun s -> Base64.decode_exn s) = + make (fun data -> Base64 (base64_decoder data)) let make_enum = make (fun data -> Enum data) let make_dict = make (fun data -> Dict data) (* General parser functions *) - let rec of_xml ?callback accu input = - try value (map_tags (basic_types ?callback accu)) input with + let rec of_xml ?callback ?base64_decoder accu input = + try value (map_tags (basic_types ?callback ?base64_decoder accu)) input with | Xmlm.Error ((a, b), e) as exn -> eprintf "Characters %i--%i: %s\n%!" a b (Xmlm.error_message e); raise exn @@ -328,13 +329,13 @@ module Parser = struct raise e - and basic_types ?callback accu input = function + and basic_types ?callback ?base64_decoder accu input = function | "int" | "i8" | "i4" -> make_int ?callback accu (get_data input) | "boolean" -> make_bool ?callback accu (get_data input) | "double" -> make_float ?callback accu (get_data input) | "string" -> make_string ?callback accu (get_data input) | "dateTime.iso8601" -> make_dateTime ?callback accu (get_data input) - | "base64" -> make_base64 ?callback accu (get_data input) + | "base64" -> make_base64 ?callback ?base64_decoder accu (get_data input) | "array" -> make_enum ?callback accu (data (of_xmls ?callback accu) input) | "struct" -> make_dict @@ -359,25 +360,25 @@ module Parser = struct List.rev !r end -let of_string ?callback str = +let of_string ?callback ?base64_decoder str = let input = Xmlm.make_input (`String (0, str)) in (match Xmlm.peek input with | `Dtd _ -> ignore (Xmlm.input input) | _ -> ()); - Parser.of_xml ?callback [] input + Parser.of_xml ?callback ?base64_decoder [] input -let of_a ?callback ~next_char b = +let of_a ?callback ?base64_decoder ~next_char b = let aux () = match next_char b with | Some c -> int_of_char c | None -> raise End_of_file in let input = Xmlm.make_input (`Fun aux) in - Parser.of_xml ?callback [] input + Parser.of_xml ?callback ?base64_decoder [] input -let call_of_string ?callback str = +let call_of_string ?callback ?base64_decoder str = let input = Xmlm.make_input (`String (0, str)) in (match Xmlm.peek input with | `Dtd _ -> ignore (Xmlm.input input) @@ -395,7 +396,8 @@ let call_of_string ?callback str = while Xmlm.peek input <> `El_end do Parser.map_tag "param" - (fun input -> params := Parser.of_xml ?callback [] input :: !params) + (fun input -> params := + Parser.of_xml ?callback ?base64_decoder [] input :: !params) input; Parser.skip_empty input done) @@ -404,11 +406,11 @@ let call_of_string ?callback str = call !name (List.rev !params) -let response_of_fault ?callback input = +let response_of_fault ?callback ?base64_decoder input = Parser.map_tag "fault" (fun input -> - match Parser.of_xml ?callback [] input with + match Parser.of_xml ?callback ?base64_decoder [] input with | Dict d -> let fault_code = List.assoc "faultCode" d in let fault_string = List.assoc "faultString" d in @@ -417,14 +419,14 @@ let response_of_fault ?callback input = input -let response_of_success ?callback input = +let response_of_success ?callback ?base64_decoder input = Parser.map_tag "params" (fun input -> Parser.map_tag "param" (fun input -> - match Parser.of_xml ?callback [] input with + match Parser.of_xml ?callback ?base64_decoder [] input with | Dict d -> if List.mem_assoc "Status" d && List.assoc "Status" d = String "Success" @@ -440,7 +442,7 @@ let response_of_success ?callback input = input -let response_of_input ?callback input = +let response_of_input ?callback ?base64_decoder input = (match Xmlm.peek input with | `Dtd _ -> ignore (Xmlm.input input) | _ -> ()); @@ -449,8 +451,10 @@ let response_of_input ?callback input = (fun input -> Parser.skip_empty input; match Xmlm.peek input with - | `El_start ((_, "params"), _) -> response_of_success ?callback input - | `El_start ((_, "fault"), _) -> response_of_fault ?callback input + | `El_start ((_, "params"), _) -> + response_of_success ?callback ?base64_decoder input + | `El_start ((_, "fault"), _) -> + response_of_fault ?callback ?base64_decoder input | `El_start ((_, tag), _) -> parse_error (sprintf "open_tag(%s)" tag) "open_tag(fault/params)" input | `Data d -> parse_error (String.escaped d) "open_tag(fault/params)" input @@ -459,11 +463,11 @@ let response_of_input ?callback input = input -let response_of_string ?callback str = +let response_of_string ?callback ?base64_decoder str = let input = Xmlm.make_input (`String (0, str)) in - response_of_input ?callback input + response_of_input ?callback ?base64_decoder input -let response_of_in_channel ?callback chan = +let response_of_in_channel ?callback ?base64_decoder chan = let input = Xmlm.make_input (`Channel chan) in - response_of_input ?callback input + response_of_input ?callback ?base64_decoder input diff --git a/src/lib/xmlrpc.mli b/src/lib/xmlrpc.mli index 09c91b5..ca78f22 100644 --- a/src/lib/xmlrpc.mli +++ b/src/lib/xmlrpc.mli @@ -22,40 +22,58 @@ val a_of_response exception Parse_error of string * string * Xmlm.input +(** The parsing functions make it possible to specify the routine used to + decode base64 values. The default is to use [Base64.decode_exn] which + strictly interprets the standard. A different function will be required if + the XMLRPC server inserts line breaks into the base64 encoding (as per + RFC2045). *) + val pretty_string_of_error : string -> string -> Xmlm.input -> string val parse_error : string -> string -> Xmlm.input -> unit -val of_string : ?callback:(string list -> Rpc.t -> unit) -> string -> Rpc.t +val of_string + : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) + -> string -> Rpc.t val of_a : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) -> next_char:('b -> char option) -> 'b -> Rpc.t [@@ocaml.deprecated] -val call_of_string : ?callback:(string list -> Rpc.t -> unit) -> string -> Rpc.call +val call_of_string + : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) + -> string -> Rpc.call val response_of_fault : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) -> Xmlm.input -> Rpc.response val response_of_success : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) -> Xmlm.input -> Rpc.response val response_of_input : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) -> Xmlm.input -> Rpc.response val response_of_string : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) -> string -> Rpc.response val response_of_in_channel : ?callback:(string list -> Rpc.t -> unit) + -> ?base64_decoder:(string -> string) -> in_channel -> Rpc.response