From: Stephane Glondu Date: Wed, 10 Jul 2019 12:49:53 +0000 (+0200) Subject: New upstream version 4.07.0 X-Git-Tag: archive/raspbian/4.08.1-4+rpi1~2^2~17^2~5^2~3 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=27938ef6584aa9182ca144cdc7747c5e0ab61190;p=ocaml.git New upstream version 4.07.0 --- diff --git a/.depend b/.depend index 8efabcb0..891a9c9b 100644 --- a/.depend +++ b/.depend @@ -1,6 +1,9 @@ utils/arg_helper.cmo : utils/arg_helper.cmi utils/arg_helper.cmx : utils/arg_helper.cmi utils/arg_helper.cmi : +utils/build_path_prefix_map.cmo : utils/build_path_prefix_map.cmi +utils/build_path_prefix_map.cmx : utils/build_path_prefix_map.cmi +utils/build_path_prefix_map.cmi : utils/ccomp.cmo : utils/misc.cmi utils/config.cmi utils/clflags.cmi \ utils/ccomp.cmi utils/ccomp.cmx : utils/misc.cmx utils/config.cmx utils/clflags.cmx \ @@ -105,9 +108,9 @@ parsing/lexer.cmx : utils/warnings.cmx parsing/parser.cmx utils/misc.cmx \ parsing/location.cmx parsing/docstrings.cmx parsing/lexer.cmi parsing/lexer.cmi : parsing/parser.cmi parsing/location.cmi parsing/location.cmo : utils/warnings.cmi utils/terminfo.cmi utils/misc.cmi \ - utils/clflags.cmi parsing/location.cmi + utils/clflags.cmi utils/build_path_prefix_map.cmi parsing/location.cmi parsing/location.cmx : utils/warnings.cmx utils/terminfo.cmx utils/misc.cmx \ - utils/clflags.cmx parsing/location.cmi + utils/clflags.cmx utils/build_path_prefix_map.cmx parsing/location.cmi parsing/location.cmi : utils/warnings.cmi parsing/longident.cmo : utils/misc.cmi parsing/longident.cmi parsing/longident.cmx : utils/misc.cmx parsing/longident.cmi @@ -199,16 +202,16 @@ typing/env.cmx : utils/warnings.cmx typing/types.cmx utils/tbl.cmx \ typing/cmi_format.cmx utils/clflags.cmx parsing/builtin_attributes.cmx \ typing/btype.cmx parsing/asttypes.cmi typing/env.cmi typing/env.cmi : utils/warnings.cmi typing/types.cmi typing/subst.cmi \ - typing/path.cmi parsing/longident.cmi parsing/location.cmi \ + typing/path.cmi utils/misc.cmi parsing/longident.cmi parsing/location.cmi \ typing/ident.cmi utils/consistbl.cmi typing/cmi_format.cmi \ parsing/asttypes.cmi typing/envaux.cmo : typing/subst.cmi typing/printtyp.cmi typing/path.cmi \ - typing/ident.cmi typing/env.cmi parsing/asttypes.cmi typing/envaux.cmi + typing/ident.cmi typing/env.cmi typing/envaux.cmi typing/envaux.cmx : typing/subst.cmx typing/printtyp.cmx typing/path.cmx \ - typing/ident.cmx typing/env.cmx parsing/asttypes.cmi typing/envaux.cmi + typing/ident.cmx typing/env.cmx typing/envaux.cmi typing/envaux.cmi : typing/subst.cmi typing/path.cmi typing/env.cmi -typing/ident.cmo : utils/identifiable.cmi typing/ident.cmi -typing/ident.cmx : utils/identifiable.cmx typing/ident.cmi +typing/ident.cmo : utils/identifiable.cmi utils/clflags.cmi typing/ident.cmi +typing/ident.cmx : utils/identifiable.cmx utils/clflags.cmx typing/ident.cmi typing/ident.cmi : utils/identifiable.cmi typing/includeclass.cmo : typing/types.cmi typing/printtyp.cmi \ typing/path.cmi typing/ctype.cmi parsing/builtin_attributes.cmi \ @@ -261,21 +264,21 @@ typing/oprint.cmi : typing/outcometree.cmi typing/outcometree.cmi : parsing/asttypes.cmi typing/parmatch.cmo : utils/warnings.cmi typing/untypeast.cmi \ typing/types.cmi typing/typedtreeIter.cmi typing/typedtree.cmi \ - typing/subst.cmi typing/predef.cmi typing/path.cmi parsing/parsetree.cmi \ - utils/misc.cmi parsing/longident.cmi parsing/location.cmi \ - typing/ident.cmi typing/env.cmi typing/ctype.cmi utils/config.cmi \ - typing/btype.cmi parsing/asttypes.cmi parsing/ast_helper.cmi \ - typing/parmatch.cmi + typing/subst.cmi typing/printpat.cmi typing/predef.cmi typing/path.cmi \ + parsing/parsetree.cmi utils/misc.cmi parsing/longident.cmi \ + parsing/location.cmi typing/ident.cmi typing/env.cmi typing/ctype.cmi \ + utils/config.cmi typing/btype.cmi parsing/asttypes.cmi \ + parsing/ast_helper.cmi typing/parmatch.cmi typing/parmatch.cmx : utils/warnings.cmx typing/untypeast.cmx \ typing/types.cmx typing/typedtreeIter.cmx typing/typedtree.cmx \ - typing/subst.cmx typing/predef.cmx typing/path.cmx parsing/parsetree.cmi \ - utils/misc.cmx parsing/longident.cmx parsing/location.cmx \ - typing/ident.cmx typing/env.cmx typing/ctype.cmx utils/config.cmx \ - typing/btype.cmx parsing/asttypes.cmi parsing/ast_helper.cmx \ - typing/parmatch.cmi + typing/subst.cmx typing/printpat.cmx typing/predef.cmx typing/path.cmx \ + parsing/parsetree.cmi utils/misc.cmx parsing/longident.cmx \ + parsing/location.cmx typing/ident.cmx typing/env.cmx typing/ctype.cmx \ + utils/config.cmx typing/btype.cmx parsing/asttypes.cmi \ + parsing/ast_helper.cmx typing/parmatch.cmi typing/parmatch.cmi : typing/types.cmi typing/typedtree.cmi \ - parsing/parsetree.cmi parsing/longident.cmi parsing/location.cmi \ - typing/env.cmi parsing/asttypes.cmi + parsing/parsetree.cmi parsing/location.cmi typing/env.cmi \ + parsing/asttypes.cmi typing/path.cmo : typing/ident.cmi typing/path.cmi typing/path.cmx : typing/ident.cmx typing/path.cmi typing/path.cmi : typing/ident.cmi @@ -294,6 +297,11 @@ typing/primitive.cmx : parsing/parsetree.cmi typing/outcometree.cmi \ typing/primitive.cmi typing/primitive.cmi : parsing/parsetree.cmi typing/outcometree.cmi \ parsing/location.cmi +typing/printpat.cmo : typing/types.cmi typing/typedtree.cmi typing/ident.cmi \ + parsing/asttypes.cmi typing/printpat.cmi +typing/printpat.cmx : typing/types.cmx typing/typedtree.cmx typing/ident.cmx \ + parsing/asttypes.cmi typing/printpat.cmi +typing/printpat.cmi : typing/typedtree.cmi parsing/asttypes.cmi typing/printtyp.cmo : typing/types.cmi typing/primitive.cmi \ typing/predef.cmi typing/path.cmi parsing/parsetree.cmi \ typing/outcometree.cmi typing/oprint.cmi utils/misc.cmi \ @@ -363,23 +371,25 @@ typing/typeclass.cmi : typing/types.cmi typing/typedtree.cmi \ typing/typecore.cmo : utils/warnings.cmi typing/typetexp.cmi \ typing/types.cmi typing/typeopt.cmi typing/typedtree.cmi \ typing/typedecl.cmi typing/subst.cmi typing/stypes.cmi \ - typing/printtyp.cmi typing/primitive.cmi typing/predef.cmi \ - typing/path.cmi parsing/parsetree.cmi typing/parmatch.cmi \ - typing/oprint.cmi utils/misc.cmi parsing/longident.cmi \ - parsing/location.cmi typing/ident.cmi typing/env.cmi typing/ctype.cmi \ - typing/cmt_format.cmi utils/clflags.cmi parsing/builtin_attributes.cmi \ - typing/btype.cmi parsing/asttypes.cmi parsing/ast_helper.cmi \ - typing/annot.cmi typing/typecore.cmi + typing/printtyp.cmi typing/printpat.cmi typing/primitive.cmi \ + typing/predef.cmi typing/path.cmi parsing/parsetree.cmi \ + typing/parmatch.cmi typing/oprint.cmi typing/mtype.cmi utils/misc.cmi \ + parsing/longident.cmi parsing/location.cmi typing/ident.cmi \ + typing/env.cmi typing/ctype.cmi utils/config.cmi typing/cmt_format.cmi \ + utils/clflags.cmi parsing/builtin_attributes.cmi typing/btype.cmi \ + parsing/asttypes.cmi parsing/ast_helper.cmi typing/annot.cmi \ + typing/typecore.cmi typing/typecore.cmx : utils/warnings.cmx typing/typetexp.cmx \ typing/types.cmx typing/typeopt.cmx typing/typedtree.cmx \ typing/typedecl.cmx typing/subst.cmx typing/stypes.cmx \ - typing/printtyp.cmx typing/primitive.cmx typing/predef.cmx \ - typing/path.cmx parsing/parsetree.cmi typing/parmatch.cmx \ - typing/oprint.cmx utils/misc.cmx parsing/longident.cmx \ - parsing/location.cmx typing/ident.cmx typing/env.cmx typing/ctype.cmx \ - typing/cmt_format.cmx utils/clflags.cmx parsing/builtin_attributes.cmx \ - typing/btype.cmx parsing/asttypes.cmi parsing/ast_helper.cmx \ - typing/annot.cmi typing/typecore.cmi + typing/printtyp.cmx typing/printpat.cmx typing/primitive.cmx \ + typing/predef.cmx typing/path.cmx parsing/parsetree.cmi \ + typing/parmatch.cmx typing/oprint.cmx typing/mtype.cmx utils/misc.cmx \ + parsing/longident.cmx parsing/location.cmx typing/ident.cmx \ + typing/env.cmx typing/ctype.cmx utils/config.cmx typing/cmt_format.cmx \ + utils/clflags.cmx parsing/builtin_attributes.cmx typing/btype.cmx \ + parsing/asttypes.cmi parsing/ast_helper.cmx typing/annot.cmi \ + typing/typecore.cmi typing/typecore.cmi : typing/types.cmi typing/typedtree.cmi typing/path.cmi \ parsing/parsetree.cmi parsing/longident.cmi parsing/location.cmi \ typing/ident.cmi typing/env.cmi parsing/asttypes.cmi typing/annot.cmi @@ -434,7 +444,8 @@ typing/typemod.cmo : utils/warnings.cmi typing/typetexp.cmi typing/types.cmi \ parsing/location.cmi typing/includemod.cmi typing/ident.cmi \ typing/env.cmi typing/ctype.cmi utils/config.cmi typing/cmt_format.cmi \ typing/cmi_format.cmi utils/clflags.cmi parsing/builtin_attributes.cmi \ - typing/btype.cmi parsing/asttypes.cmi typing/annot.cmi typing/typemod.cmi + typing/btype.cmi parsing/attr_helper.cmi parsing/asttypes.cmi \ + typing/annot.cmi typing/typemod.cmi typing/typemod.cmx : utils/warnings.cmx typing/typetexp.cmx typing/types.cmx \ typing/typedtree.cmx typing/typedecl.cmx typing/typecore.cmx \ typing/typeclass.cmx typing/subst.cmx typing/stypes.cmx \ @@ -443,7 +454,8 @@ typing/typemod.cmx : utils/warnings.cmx typing/typetexp.cmx typing/types.cmx \ parsing/location.cmx typing/includemod.cmx typing/ident.cmx \ typing/env.cmx typing/ctype.cmx utils/config.cmx typing/cmt_format.cmx \ typing/cmi_format.cmx utils/clflags.cmx parsing/builtin_attributes.cmx \ - typing/btype.cmx parsing/asttypes.cmi typing/annot.cmi typing/typemod.cmi + typing/btype.cmx parsing/attr_helper.cmx parsing/asttypes.cmi \ + typing/annot.cmi typing/typemod.cmi typing/typemod.cmi : typing/types.cmi typing/typedtree.cmi typing/path.cmi \ parsing/parsetree.cmi utils/misc.cmi parsing/longident.cmi \ parsing/location.cmi typing/includemod.cmi typing/ident.cmi \ @@ -470,18 +482,20 @@ typing/types.cmi : typing/primitive.cmi typing/path.cmi \ typing/typetexp.cmo : typing/types.cmi typing/typedtree.cmi utils/tbl.cmi \ typing/printtyp.cmi typing/predef.cmi typing/path.cmi \ parsing/parsetree.cmi utils/misc.cmi parsing/longident.cmi \ - parsing/location.cmi typing/env.cmi typing/ctype.cmi utils/clflags.cmi \ - parsing/builtin_attributes.cmi typing/btype.cmi parsing/asttypes.cmi \ - parsing/ast_helper.cmi typing/typetexp.cmi + parsing/location.cmi typing/includemod.cmi typing/env.cmi \ + typing/ctype.cmi utils/clflags.cmi parsing/builtin_attributes.cmi \ + typing/btype.cmi parsing/asttypes.cmi parsing/ast_helper.cmi \ + typing/typetexp.cmi typing/typetexp.cmx : typing/types.cmx typing/typedtree.cmx utils/tbl.cmx \ typing/printtyp.cmx typing/predef.cmx typing/path.cmx \ parsing/parsetree.cmi utils/misc.cmx parsing/longident.cmx \ - parsing/location.cmx typing/env.cmx typing/ctype.cmx utils/clflags.cmx \ - parsing/builtin_attributes.cmx typing/btype.cmx parsing/asttypes.cmi \ - parsing/ast_helper.cmx typing/typetexp.cmi + parsing/location.cmx typing/includemod.cmx typing/env.cmx \ + typing/ctype.cmx utils/clflags.cmx parsing/builtin_attributes.cmx \ + typing/btype.cmx parsing/asttypes.cmi parsing/ast_helper.cmx \ + typing/typetexp.cmi typing/typetexp.cmi : typing/types.cmi typing/typedtree.cmi typing/path.cmi \ parsing/parsetree.cmi parsing/longident.cmi parsing/location.cmi \ - typing/env.cmi parsing/asttypes.cmi + typing/includemod.cmi typing/env.cmi parsing/asttypes.cmi typing/untypeast.cmo : typing/typedtree.cmi typing/path.cmi \ parsing/parsetree.cmi utils/misc.cmi parsing/longident.cmi \ parsing/location.cmi typing/ident.cmi typing/env.cmi parsing/asttypes.cmi \ @@ -511,16 +525,16 @@ bytecomp/bytelibrarian.cmx : utils/misc.cmx parsing/location.cmx \ bytecomp/bytelibrarian.cmi : bytecomp/bytelink.cmo : utils/warnings.cmi bytecomp/symtable.cmi \ bytecomp/opcodes.cmo utils/misc.cmi parsing/location.cmi \ - bytecomp/lambda.cmi bytecomp/instruct.cmi typing/ident.cmi \ - bytecomp/emitcode.cmi bytecomp/dll.cmi utils/consistbl.cmi \ - utils/config.cmi bytecomp/cmo_format.cmi utils/clflags.cmi \ - utils/ccomp.cmi bytecomp/bytesections.cmi bytecomp/bytelink.cmi + bytecomp/instruct.cmi typing/ident.cmi bytecomp/emitcode.cmi \ + bytecomp/dll.cmi utils/consistbl.cmi utils/config.cmi \ + bytecomp/cmo_format.cmi utils/clflags.cmi utils/ccomp.cmi \ + bytecomp/bytesections.cmi bytecomp/bytelink.cmi bytecomp/bytelink.cmx : utils/warnings.cmx bytecomp/symtable.cmx \ bytecomp/opcodes.cmx utils/misc.cmx parsing/location.cmx \ - bytecomp/lambda.cmx bytecomp/instruct.cmx typing/ident.cmx \ - bytecomp/emitcode.cmx bytecomp/dll.cmx utils/consistbl.cmx \ - utils/config.cmx bytecomp/cmo_format.cmi utils/clflags.cmx \ - utils/ccomp.cmx bytecomp/bytesections.cmx bytecomp/bytelink.cmi + bytecomp/instruct.cmx typing/ident.cmx bytecomp/emitcode.cmx \ + bytecomp/dll.cmx utils/consistbl.cmx utils/config.cmx \ + bytecomp/cmo_format.cmi utils/clflags.cmx utils/ccomp.cmx \ + bytecomp/bytesections.cmx bytecomp/bytelink.cmi bytecomp/bytelink.cmi : bytecomp/symtable.cmi bytecomp/cmo_format.cmi bytecomp/bytepackager.cmo : typing/typemod.cmi bytecomp/translmod.cmi \ typing/subst.cmi bytecomp/printlambda.cmi typing/path.cmi utils/misc.cmi \ @@ -546,14 +560,14 @@ bytecomp/emitcode.cmo : bytecomp/translmod.cmi typing/primitive.cmi \ bytecomp/opcodes.cmo utils/misc.cmi bytecomp/meta.cmi \ parsing/location.cmi bytecomp/lambda.cmi bytecomp/instruct.cmi \ typing/ident.cmi typing/env.cmi utils/config.cmi bytecomp/cmo_format.cmi \ - utils/clflags.cmi typing/btype.cmi parsing/asttypes.cmi \ - bytecomp/emitcode.cmi + utils/clflags.cmi bytecomp/bytegen.cmi typing/btype.cmi \ + parsing/asttypes.cmi bytecomp/emitcode.cmi bytecomp/emitcode.cmx : bytecomp/translmod.cmx typing/primitive.cmx \ bytecomp/opcodes.cmx utils/misc.cmx bytecomp/meta.cmx \ parsing/location.cmx bytecomp/lambda.cmx bytecomp/instruct.cmx \ typing/ident.cmx typing/env.cmx utils/config.cmx bytecomp/cmo_format.cmi \ - utils/clflags.cmx typing/btype.cmx parsing/asttypes.cmi \ - bytecomp/emitcode.cmi + utils/clflags.cmx bytecomp/bytegen.cmx typing/btype.cmx \ + parsing/asttypes.cmi bytecomp/emitcode.cmi bytecomp/emitcode.cmi : bytecomp/instruct.cmi typing/ident.cmi \ bytecomp/cmo_format.cmi bytecomp/instruct.cmo : typing/types.cmi typing/subst.cmi \ @@ -573,16 +587,16 @@ bytecomp/lambda.cmx : typing/types.cmx typing/primitive.cmx typing/path.cmx \ bytecomp/lambda.cmi : typing/types.cmi typing/primitive.cmi typing/path.cmi \ parsing/location.cmi typing/ident.cmi typing/env.cmi parsing/asttypes.cmi bytecomp/matching.cmo : typing/types.cmi typing/typeopt.cmi \ - typing/typedtree.cmi bytecomp/switch.cmi bytecomp/printlambda.cmi \ - typing/primitive.cmi typing/predef.cmi typing/path.cmi \ - typing/parmatch.cmi utils/misc.cmi parsing/longident.cmi \ + typing/typedtree.cmi bytecomp/switch.cmi typing/printpat.cmi \ + bytecomp/printlambda.cmi typing/primitive.cmi typing/predef.cmi \ + typing/path.cmi typing/parmatch.cmi utils/misc.cmi parsing/longident.cmi \ parsing/location.cmi bytecomp/lambda.cmi typing/ident.cmi typing/env.cmi \ utils/clflags.cmi typing/btype.cmi parsing/asttypes.cmi \ bytecomp/matching.cmi bytecomp/matching.cmx : typing/types.cmx typing/typeopt.cmx \ - typing/typedtree.cmx bytecomp/switch.cmx bytecomp/printlambda.cmx \ - typing/primitive.cmx typing/predef.cmx typing/path.cmx \ - typing/parmatch.cmx utils/misc.cmx parsing/longident.cmx \ + typing/typedtree.cmx bytecomp/switch.cmx typing/printpat.cmx \ + bytecomp/printlambda.cmx typing/primitive.cmx typing/predef.cmx \ + typing/path.cmx typing/parmatch.cmx utils/misc.cmx parsing/longident.cmx \ parsing/location.cmx bytecomp/lambda.cmx typing/ident.cmx typing/env.cmx \ utils/clflags.cmx typing/btype.cmx parsing/asttypes.cmi \ bytecomp/matching.cmi @@ -663,36 +677,38 @@ bytecomp/translclass.cmx : typing/types.cmx typing/typeopt.cmx \ bytecomp/translclass.cmi : typing/typedtree.cmi parsing/location.cmi \ bytecomp/lambda.cmi typing/ident.cmi parsing/asttypes.cmi bytecomp/translcore.cmo : typing/types.cmi typing/typeopt.cmi \ - typing/typedtree.cmi typing/typecore.cmi bytecomp/translobj.cmi \ - bytecomp/translattribute.cmi typing/primitive.cmi typing/predef.cmi \ - typing/path.cmi typing/parmatch.cmi utils/misc.cmi bytecomp/matching.cmi \ + typing/typedtree.cmi typing/typecore.cmi bytecomp/translprim.cmi \ + bytecomp/translobj.cmi bytecomp/translattribute.cmi typing/printtyp.cmi \ + typing/primitive.cmi typing/predef.cmi typing/path.cmi \ + typing/parmatch.cmi utils/misc.cmi bytecomp/matching.cmi \ parsing/longident.cmi parsing/location.cmi bytecomp/lambda.cmi \ typing/ident.cmi typing/env.cmi utils/config.cmi utils/clflags.cmi \ typing/btype.cmi parsing/asttypes.cmi bytecomp/translcore.cmi bytecomp/translcore.cmx : typing/types.cmx typing/typeopt.cmx \ - typing/typedtree.cmx typing/typecore.cmx bytecomp/translobj.cmx \ - bytecomp/translattribute.cmx typing/primitive.cmx typing/predef.cmx \ - typing/path.cmx typing/parmatch.cmx utils/misc.cmx bytecomp/matching.cmx \ + typing/typedtree.cmx typing/typecore.cmx bytecomp/translprim.cmx \ + bytecomp/translobj.cmx bytecomp/translattribute.cmx typing/printtyp.cmx \ + typing/primitive.cmx typing/predef.cmx typing/path.cmx \ + typing/parmatch.cmx utils/misc.cmx bytecomp/matching.cmx \ parsing/longident.cmx parsing/location.cmx bytecomp/lambda.cmx \ typing/ident.cmx typing/env.cmx utils/config.cmx utils/clflags.cmx \ typing/btype.cmx parsing/asttypes.cmi bytecomp/translcore.cmi -bytecomp/translcore.cmi : typing/types.cmi typing/typedtree.cmi \ - typing/primitive.cmi typing/path.cmi parsing/location.cmi \ - bytecomp/lambda.cmi typing/ident.cmi typing/env.cmi parsing/asttypes.cmi +bytecomp/translcore.cmi : typing/typedtree.cmi typing/path.cmi \ + parsing/location.cmi bytecomp/lambda.cmi typing/ident.cmi typing/env.cmi \ + parsing/asttypes.cmi bytecomp/translmod.cmo : typing/types.cmi typing/typedtree.cmi \ - bytecomp/translobj.cmi bytecomp/translcore.cmi bytecomp/translclass.cmi \ - bytecomp/translattribute.cmi typing/printtyp.cmi typing/primitive.cmi \ - typing/predef.cmi typing/path.cmi typing/mtype.cmi utils/misc.cmi \ - parsing/longident.cmi parsing/location.cmi bytecomp/lambda.cmi \ - typing/ident.cmi typing/env.cmi typing/ctype.cmi utils/clflags.cmi \ - parsing/asttypes.cmi bytecomp/translmod.cmi + bytecomp/translprim.cmi bytecomp/translobj.cmi bytecomp/translcore.cmi \ + bytecomp/translclass.cmi bytecomp/translattribute.cmi typing/printtyp.cmi \ + typing/primitive.cmi typing/predef.cmi typing/path.cmi typing/mtype.cmi \ + utils/misc.cmi parsing/longident.cmi parsing/location.cmi \ + bytecomp/lambda.cmi typing/ident.cmi typing/env.cmi typing/ctype.cmi \ + utils/clflags.cmi parsing/asttypes.cmi bytecomp/translmod.cmi bytecomp/translmod.cmx : typing/types.cmx typing/typedtree.cmx \ - bytecomp/translobj.cmx bytecomp/translcore.cmx bytecomp/translclass.cmx \ - bytecomp/translattribute.cmx typing/printtyp.cmx typing/primitive.cmx \ - typing/predef.cmx typing/path.cmx typing/mtype.cmx utils/misc.cmx \ - parsing/longident.cmx parsing/location.cmx bytecomp/lambda.cmx \ - typing/ident.cmx typing/env.cmx typing/ctype.cmx utils/clflags.cmx \ - parsing/asttypes.cmi bytecomp/translmod.cmi + bytecomp/translprim.cmx bytecomp/translobj.cmx bytecomp/translcore.cmx \ + bytecomp/translclass.cmx bytecomp/translattribute.cmx typing/printtyp.cmx \ + typing/primitive.cmx typing/predef.cmx typing/path.cmx typing/mtype.cmx \ + utils/misc.cmx parsing/longident.cmx parsing/location.cmx \ + bytecomp/lambda.cmx typing/ident.cmx typing/env.cmx typing/ctype.cmx \ + utils/clflags.cmx parsing/asttypes.cmi bytecomp/translmod.cmi bytecomp/translmod.cmi : typing/typedtree.cmi typing/primitive.cmi \ parsing/location.cmi bytecomp/lambda.cmi typing/ident.cmi bytecomp/translobj.cmo : typing/primitive.cmi utils/misc.cmi \ @@ -704,6 +720,19 @@ bytecomp/translobj.cmx : typing/primitive.cmx utils/misc.cmx \ typing/ident.cmx typing/env.cmx utils/config.cmx utils/clflags.cmx \ typing/btype.cmx parsing/asttypes.cmi bytecomp/translobj.cmi bytecomp/translobj.cmi : bytecomp/lambda.cmi typing/ident.cmi typing/env.cmi +bytecomp/translprim.cmo : typing/types.cmi typing/typeopt.cmi \ + typing/typedtree.cmi typing/primitive.cmi typing/predef.cmi \ + typing/path.cmi utils/misc.cmi bytecomp/matching.cmi parsing/location.cmi \ + bytecomp/lambda.cmi typing/ident.cmi typing/env.cmi utils/config.cmi \ + utils/clflags.cmi parsing/asttypes.cmi bytecomp/translprim.cmi +bytecomp/translprim.cmx : typing/types.cmx typing/typeopt.cmx \ + typing/typedtree.cmx typing/primitive.cmx typing/predef.cmx \ + typing/path.cmx utils/misc.cmx bytecomp/matching.cmx parsing/location.cmx \ + bytecomp/lambda.cmx typing/ident.cmx typing/env.cmx utils/config.cmx \ + utils/clflags.cmx parsing/asttypes.cmi bytecomp/translprim.cmi +bytecomp/translprim.cmi : typing/types.cmi typing/typedtree.cmi \ + typing/primitive.cmi typing/path.cmi parsing/location.cmi \ + bytecomp/lambda.cmi typing/ident.cmi typing/env.cmi asmcomp/CSE.cmo : asmcomp/mach.cmi asmcomp/CSEgen.cmi asmcomp/arch.cmo asmcomp/CSE.cmx : asmcomp/mach.cmx asmcomp/CSEgen.cmx asmcomp/arch.cmx asmcomp/CSEgen.cmo : asmcomp/reg.cmi asmcomp/proc.cmi asmcomp/mach.cmi \ @@ -766,13 +795,13 @@ asmcomp/asmlibrarian.cmi : asmcomp/asmlink.cmo : bytecomp/runtimedef.cmi utils/profile.cmi \ utils/misc.cmi parsing/location.cmi asmcomp/emitaux.cmi asmcomp/emit.cmi \ utils/consistbl.cmi utils/config.cmi asmcomp/compilenv.cmi \ - asmcomp/cmx_format.cmi asmcomp/cmmgen.cmi utils/clflags.cmi \ - utils/ccomp.cmi asmcomp/asmgen.cmi asmcomp/asmlink.cmi + asmcomp/cmx_format.cmi asmcomp/cmmgen.cmi asmcomp/cmm.cmi \ + utils/clflags.cmi utils/ccomp.cmi asmcomp/asmgen.cmi asmcomp/asmlink.cmi asmcomp/asmlink.cmx : bytecomp/runtimedef.cmx utils/profile.cmx \ utils/misc.cmx parsing/location.cmx asmcomp/emitaux.cmx asmcomp/emit.cmx \ utils/consistbl.cmx utils/config.cmx asmcomp/compilenv.cmx \ - asmcomp/cmx_format.cmi asmcomp/cmmgen.cmx utils/clflags.cmx \ - utils/ccomp.cmx asmcomp/asmgen.cmx asmcomp/asmlink.cmi + asmcomp/cmx_format.cmi asmcomp/cmmgen.cmx asmcomp/cmm.cmx \ + utils/clflags.cmx utils/ccomp.cmx asmcomp/asmgen.cmx asmcomp/asmlink.cmi asmcomp/asmlink.cmi : asmcomp/cmx_format.cmi asmcomp/asmpackager.cmo : typing/typemod.cmi bytecomp/translmod.cmi \ utils/profile.cmi utils/misc.cmi middle_end/middle_end.cmi \ @@ -805,21 +834,27 @@ asmcomp/branch_relaxation_intf.cmx : asmcomp/linearize.cmx asmcomp/cmm.cmx \ asmcomp/arch.cmx asmcomp/build_export_info.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ - middle_end/base_types/tag.cmi middle_end/base_types/symbol.cmi \ + asmcomp/traverse_for_exported_symbols.cmi middle_end/base_types/tag.cmi \ + middle_end/base_types/symbol.cmi middle_end/simple_value_approx.cmi \ middle_end/base_types/set_of_closures_id.cmi utils/misc.cmi \ - middle_end/invariant_params.cmi middle_end/flambda_utils.cmi \ - middle_end/flambda.cmi asmcomp/export_info.cmi \ + middle_end/invariant_params.cmi middle_end/inline_and_simplify_aux.cmi \ + middle_end/flambda_utils.cmi middle_end/flambda.cmi \ + middle_end/find_recursive_functions.cmi asmcomp/export_info.cmi \ middle_end/base_types/export_id.cmi asmcomp/compilenv.cmi \ + middle_end/base_types/compilation_unit.cmi \ middle_end/base_types/closure_id.cmi utils/clflags.cmi \ middle_end/backend_intf.cmi middle_end/allocated_const.cmi \ asmcomp/build_export_info.cmi asmcomp/build_export_info.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/var_within_closure.cmx \ - middle_end/base_types/tag.cmx middle_end/base_types/symbol.cmx \ + asmcomp/traverse_for_exported_symbols.cmx middle_end/base_types/tag.cmx \ + middle_end/base_types/symbol.cmx middle_end/simple_value_approx.cmx \ middle_end/base_types/set_of_closures_id.cmx utils/misc.cmx \ - middle_end/invariant_params.cmx middle_end/flambda_utils.cmx \ - middle_end/flambda.cmx asmcomp/export_info.cmx \ + middle_end/invariant_params.cmx middle_end/inline_and_simplify_aux.cmx \ + middle_end/flambda_utils.cmx middle_end/flambda.cmx \ + middle_end/find_recursive_functions.cmx asmcomp/export_info.cmx \ middle_end/base_types/export_id.cmx asmcomp/compilenv.cmx \ + middle_end/base_types/compilation_unit.cmx \ middle_end/base_types/closure_id.cmx utils/clflags.cmx \ middle_end/backend_intf.cmi middle_end/allocated_const.cmx \ asmcomp/build_export_info.cmi @@ -848,14 +883,12 @@ asmcomp/closure.cmx : utils/warnings.cmx utils/tbl.cmx bytecomp/switch.cmx \ asmcomp/closure.cmi : bytecomp/lambda.cmi asmcomp/clambda.cmi asmcomp/closure_offsets.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi utils/misc.cmi \ - middle_end/flambda_utils.cmi middle_end/flambda_iterators.cmi \ - middle_end/flambda.cmi middle_end/base_types/closure_id.cmi \ - asmcomp/closure_offsets.cmi + middle_end/flambda_utils.cmi middle_end/flambda.cmi \ + middle_end/base_types/closure_id.cmi asmcomp/closure_offsets.cmi asmcomp/closure_offsets.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/var_within_closure.cmx utils/misc.cmx \ - middle_end/flambda_utils.cmx middle_end/flambda_iterators.cmx \ - middle_end/flambda.cmx middle_end/base_types/closure_id.cmx \ - asmcomp/closure_offsets.cmi + middle_end/flambda_utils.cmx middle_end/flambda.cmx \ + middle_end/base_types/closure_id.cmx asmcomp/closure_offsets.cmi asmcomp/closure_offsets.cmi : middle_end/base_types/var_within_closure.cmi \ middle_end/flambda.cmi middle_end/base_types/closure_id.cmi asmcomp/cmm.cmo : bytecomp/lambda.cmi typing/ident.cmi \ @@ -892,27 +925,28 @@ asmcomp/comballoc.cmx : asmcomp/reg.cmx asmcomp/mach.cmx utils/config.cmx \ asmcomp/arch.cmx asmcomp/comballoc.cmi asmcomp/comballoc.cmi : asmcomp/mach.cmi asmcomp/compilenv.cmo : utils/warnings.cmi middle_end/base_types/symbol.cmi \ + middle_end/simple_value_approx.cmi \ middle_end/base_types/set_of_closures_id.cmi utils/misc.cmi \ parsing/location.cmi middle_end/base_types/linkage_name.cmi \ - typing/ident.cmi middle_end/flambda.cmi asmcomp/export_info.cmi \ - typing/env.cmi utils/config.cmi \ + typing/ident.cmi asmcomp/export_info.cmi typing/env.cmi utils/config.cmi \ middle_end/base_types/compilation_unit.cmi asmcomp/cmx_format.cmi \ middle_end/base_types/closure_id.cmi utils/clflags.cmi \ asmcomp/clambda.cmi asmcomp/compilenv.cmi asmcomp/compilenv.cmx : utils/warnings.cmx middle_end/base_types/symbol.cmx \ + middle_end/simple_value_approx.cmx \ middle_end/base_types/set_of_closures_id.cmx utils/misc.cmx \ parsing/location.cmx middle_end/base_types/linkage_name.cmx \ - typing/ident.cmx middle_end/flambda.cmx asmcomp/export_info.cmx \ - typing/env.cmx utils/config.cmx \ + typing/ident.cmx asmcomp/export_info.cmx typing/env.cmx utils/config.cmx \ middle_end/base_types/compilation_unit.cmx asmcomp/cmx_format.cmi \ middle_end/base_types/closure_id.cmx utils/clflags.cmx \ asmcomp/clambda.cmx asmcomp/compilenv.cmi asmcomp/compilenv.cmi : middle_end/base_types/symbol.cmi \ + middle_end/simple_value_approx.cmi \ middle_end/base_types/set_of_closures_id.cmi \ middle_end/base_types/linkage_name.cmi typing/ident.cmi \ - middle_end/flambda.cmi asmcomp/export_info.cmi \ - middle_end/base_types/compilation_unit.cmi asmcomp/cmx_format.cmi \ - middle_end/base_types/closure_id.cmi asmcomp/clambda.cmi + asmcomp/export_info.cmi middle_end/base_types/compilation_unit.cmi \ + asmcomp/cmx_format.cmi middle_end/base_types/closure_id.cmi \ + asmcomp/clambda.cmi asmcomp/deadcode.cmo : asmcomp/reg.cmi asmcomp/proc.cmi asmcomp/mach.cmi \ utils/config.cmi asmcomp/deadcode.cmi asmcomp/deadcode.cmx : asmcomp/reg.cmx asmcomp/proc.cmx asmcomp/mach.cmx \ @@ -962,22 +996,20 @@ asmcomp/export_info.cmi : middle_end/base_types/variable.cmi \ middle_end/base_types/closure_id.cmi asmcomp/export_info_for_pack.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ - middle_end/base_types/symbol.cmi \ + middle_end/base_types/symbol.cmi middle_end/simple_value_approx.cmi \ middle_end/base_types/set_of_closures_origin.cmi \ middle_end/base_types/set_of_closures_id.cmi utils/misc.cmi \ - middle_end/flambda_utils.cmi middle_end/flambda_iterators.cmi \ - middle_end/flambda.cmi asmcomp/export_info.cmi \ - middle_end/base_types/export_id.cmi \ + middle_end/flambda_iterators.cmi middle_end/flambda.cmi \ + asmcomp/export_info.cmi middle_end/base_types/export_id.cmi \ middle_end/base_types/compilation_unit.cmi \ middle_end/base_types/closure_id.cmi asmcomp/export_info_for_pack.cmi asmcomp/export_info_for_pack.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/var_within_closure.cmx \ - middle_end/base_types/symbol.cmx \ + middle_end/base_types/symbol.cmx middle_end/simple_value_approx.cmx \ middle_end/base_types/set_of_closures_origin.cmx \ middle_end/base_types/set_of_closures_id.cmx utils/misc.cmx \ - middle_end/flambda_utils.cmx middle_end/flambda_iterators.cmx \ - middle_end/flambda.cmx asmcomp/export_info.cmx \ - middle_end/base_types/export_id.cmx \ + middle_end/flambda_iterators.cmx middle_end/flambda.cmx \ + asmcomp/export_info.cmx middle_end/base_types/export_id.cmx \ middle_end/base_types/compilation_unit.cmx \ middle_end/base_types/closure_id.cmx asmcomp/export_info_for_pack.cmi asmcomp/export_info_for_pack.cmi : asmcomp/export_info.cmi \ @@ -986,10 +1018,12 @@ asmcomp/flambda_to_clambda.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ middle_end/base_types/tag.cmi middle_end/base_types/symbol.cmi \ middle_end/base_types/static_exception.cmi \ + middle_end/simple_value_approx.cmi \ middle_end/base_types/set_of_closures_id.cmi typing/primitive.cmi \ middle_end/parameter.cmi utils/numbers.cmi \ middle_end/base_types/mutable_variable.cmi utils/misc.cmi \ - middle_end/base_types/linkage_name.cmi typing/ident.cmi \ + middle_end/base_types/linkage_name.cmi \ + middle_end/initialize_symbol_to_let_symbol.cmi typing/ident.cmi \ middle_end/flambda_utils.cmi middle_end/flambda.cmi \ asmcomp/export_info.cmi middle_end/debuginfo.cmi asmcomp/compilenv.cmi \ asmcomp/closure_offsets.cmi middle_end/base_types/closure_id.cmi \ @@ -999,10 +1033,12 @@ asmcomp/flambda_to_clambda.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/var_within_closure.cmx \ middle_end/base_types/tag.cmx middle_end/base_types/symbol.cmx \ middle_end/base_types/static_exception.cmx \ + middle_end/simple_value_approx.cmx \ middle_end/base_types/set_of_closures_id.cmx typing/primitive.cmx \ middle_end/parameter.cmx utils/numbers.cmx \ middle_end/base_types/mutable_variable.cmx utils/misc.cmx \ - middle_end/base_types/linkage_name.cmx typing/ident.cmx \ + middle_end/base_types/linkage_name.cmx \ + middle_end/initialize_symbol_to_let_symbol.cmx typing/ident.cmx \ middle_end/flambda_utils.cmx middle_end/flambda.cmx \ asmcomp/export_info.cmx middle_end/debuginfo.cmx asmcomp/compilenv.cmx \ asmcomp/closure_offsets.cmx middle_end/base_types/closure_id.cmx \ @@ -1017,6 +1053,7 @@ asmcomp/import_approx.cmo : middle_end/base_types/variable.cmi \ middle_end/freshening.cmi middle_end/flambda_iterators.cmi \ middle_end/flambda.cmi asmcomp/export_info.cmi \ middle_end/base_types/export_id.cmi asmcomp/compilenv.cmi \ + middle_end/base_types/compilation_unit.cmi \ middle_end/base_types/closure_id.cmi asmcomp/import_approx.cmi asmcomp/import_approx.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/var_within_closure.cmx \ @@ -1025,6 +1062,7 @@ asmcomp/import_approx.cmx : middle_end/base_types/variable.cmx \ middle_end/freshening.cmx middle_end/flambda_iterators.cmx \ middle_end/flambda.cmx asmcomp/export_info.cmx \ middle_end/base_types/export_id.cmx asmcomp/compilenv.cmx \ + middle_end/base_types/compilation_unit.cmx \ middle_end/base_types/closure_id.cmx asmcomp/import_approx.cmi asmcomp/import_approx.cmi : middle_end/base_types/symbol.cmi \ middle_end/simple_value_approx.cmi @@ -1177,6 +1215,32 @@ asmcomp/strmatch.cmx : parsing/location.cmx bytecomp/lambda.cmx \ parsing/asttypes.cmi asmcomp/arch.cmx asmcomp/strmatch.cmi asmcomp/strmatch.cmi : parsing/location.cmi middle_end/debuginfo.cmi \ asmcomp/cmm.cmi +asmcomp/traverse_for_exported_symbols.cmo : \ + middle_end/base_types/variable.cmi \ + middle_end/base_types/var_within_closure.cmi \ + middle_end/base_types/symbol.cmi middle_end/simple_value_approx.cmi \ + middle_end/base_types/set_of_closures_id.cmi utils/misc.cmi \ + middle_end/flambda_iterators.cmi middle_end/flambda.cmi \ + asmcomp/export_info.cmi middle_end/base_types/export_id.cmi \ + middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_id.cmi \ + asmcomp/traverse_for_exported_symbols.cmi +asmcomp/traverse_for_exported_symbols.cmx : \ + middle_end/base_types/variable.cmx \ + middle_end/base_types/var_within_closure.cmx \ + middle_end/base_types/symbol.cmx middle_end/simple_value_approx.cmx \ + middle_end/base_types/set_of_closures_id.cmx utils/misc.cmx \ + middle_end/flambda_iterators.cmx middle_end/flambda.cmx \ + asmcomp/export_info.cmx middle_end/base_types/export_id.cmx \ + middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_id.cmx \ + asmcomp/traverse_for_exported_symbols.cmi +asmcomp/traverse_for_exported_symbols.cmi : \ + middle_end/base_types/var_within_closure.cmi \ + middle_end/base_types/symbol.cmi middle_end/simple_value_approx.cmi \ + middle_end/base_types/set_of_closures_id.cmi middle_end/flambda.cmi \ + asmcomp/export_info.cmi middle_end/base_types/export_id.cmi \ + middle_end/base_types/closure_id.cmi asmcomp/un_anf.cmo : bytecomp/semantics_of_primitives.cmi \ asmcomp/printclambda.cmi utils/misc.cmi bytecomp/lambda.cmi \ typing/ident.cmi middle_end/debuginfo.cmi utils/clflags.cmi \ @@ -1228,20 +1292,22 @@ middle_end/allocated_const.cmx : middle_end/allocated_const.cmi middle_end/allocated_const.cmi : middle_end/augment_specialised_args.cmo : middle_end/base_types/variable.cmi \ middle_end/projection.cmi middle_end/pass_wrapper.cmi \ - middle_end/parameter.cmi utils/misc.cmi middle_end/inlining_cost.cmi \ + middle_end/parameter.cmi utils/misc.cmi \ + middle_end/internal_variable_names.cmi middle_end/inlining_cost.cmi \ middle_end/inline_and_simplify_aux.cmi utils/identifiable.cmi \ middle_end/flambda_utils.cmi middle_end/flambda.cmi \ - middle_end/debuginfo.cmi middle_end/base_types/closure_id.cmi \ - utils/clflags.cmi middle_end/backend_intf.cmi \ - middle_end/augment_specialised_args.cmi + middle_end/debuginfo.cmi middle_end/base_types/closure_origin.cmi \ + middle_end/base_types/closure_id.cmi utils/clflags.cmi \ + middle_end/backend_intf.cmi middle_end/augment_specialised_args.cmi middle_end/augment_specialised_args.cmx : middle_end/base_types/variable.cmx \ middle_end/projection.cmx middle_end/pass_wrapper.cmx \ - middle_end/parameter.cmx utils/misc.cmx middle_end/inlining_cost.cmx \ + middle_end/parameter.cmx utils/misc.cmx \ + middle_end/internal_variable_names.cmx middle_end/inlining_cost.cmx \ middle_end/inline_and_simplify_aux.cmx utils/identifiable.cmx \ middle_end/flambda_utils.cmx middle_end/flambda.cmx \ - middle_end/debuginfo.cmx middle_end/base_types/closure_id.cmx \ - utils/clflags.cmx middle_end/backend_intf.cmi \ - middle_end/augment_specialised_args.cmi + middle_end/debuginfo.cmx middle_end/base_types/closure_origin.cmx \ + middle_end/base_types/closure_id.cmx utils/clflags.cmx \ + middle_end/backend_intf.cmi middle_end/augment_specialised_args.cmi middle_end/augment_specialised_args.cmi : middle_end/base_types/variable.cmi \ middle_end/projection.cmi middle_end/inlining_cost.cmi \ middle_end/inline_and_simplify_aux.cmi middle_end/flambda.cmi @@ -1251,26 +1317,28 @@ middle_end/backend_intf.cmi : middle_end/base_types/symbol.cmi \ middle_end/closure_conversion.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/tag.cmi middle_end/base_types/symbol.cmi \ middle_end/base_types/static_exception.cmi bytecomp/simplif.cmi \ - bytecomp/printlambda.cmi typing/predef.cmi middle_end/parameter.cmi \ - utils/numbers.cmi middle_end/base_types/mutable_variable.cmi \ - utils/misc.cmi parsing/location.cmi \ - middle_end/base_types/linkage_name.cmi middle_end/lift_code.cmi \ - bytecomp/lambda.cmi typing/ident.cmi middle_end/flambda_utils.cmi \ - middle_end/flambda.cmi middle_end/debuginfo.cmi utils/config.cmi \ + typing/predef.cmi middle_end/parameter.cmi utils/numbers.cmi \ + middle_end/base_types/mutable_variable.cmi utils/misc.cmi \ + middle_end/lift_code.cmi bytecomp/lambda.cmi \ + middle_end/internal_variable_names.cmi typing/ident.cmi \ + middle_end/flambda_utils.cmi middle_end/flambda.cmi \ + middle_end/debuginfo.cmi utils/config.cmi \ middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi \ middle_end/closure_conversion_aux.cmi utils/clflags.cmi \ middle_end/backend_intf.cmi middle_end/closure_conversion.cmi middle_end/closure_conversion.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/tag.cmx middle_end/base_types/symbol.cmx \ middle_end/base_types/static_exception.cmx bytecomp/simplif.cmx \ - bytecomp/printlambda.cmx typing/predef.cmx middle_end/parameter.cmx \ - utils/numbers.cmx middle_end/base_types/mutable_variable.cmx \ - utils/misc.cmx parsing/location.cmx \ - middle_end/base_types/linkage_name.cmx middle_end/lift_code.cmx \ - bytecomp/lambda.cmx typing/ident.cmx middle_end/flambda_utils.cmx \ - middle_end/flambda.cmx middle_end/debuginfo.cmx utils/config.cmx \ + typing/predef.cmx middle_end/parameter.cmx utils/numbers.cmx \ + middle_end/base_types/mutable_variable.cmx utils/misc.cmx \ + middle_end/lift_code.cmx bytecomp/lambda.cmx \ + middle_end/internal_variable_names.cmx typing/ident.cmx \ + middle_end/flambda_utils.cmx middle_end/flambda.cmx \ + middle_end/debuginfo.cmx utils/config.cmx \ middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_origin.cmx \ middle_end/base_types/closure_id.cmx \ middle_end/closure_conversion_aux.cmx utils/clflags.cmx \ middle_end/backend_intf.cmi middle_end/closure_conversion.cmi @@ -1335,6 +1403,7 @@ middle_end/flambda.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/mutable_variable.cmi utils/misc.cmi \ bytecomp/lambda.cmi utils/identifiable.cmi middle_end/debuginfo.cmi \ middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi utils/clflags.cmi \ parsing/asttypes.cmi middle_end/allocated_const.cmi \ middle_end/flambda.cmi @@ -1347,6 +1416,7 @@ middle_end/flambda.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/mutable_variable.cmx utils/misc.cmx \ bytecomp/lambda.cmx utils/identifiable.cmx middle_end/debuginfo.cmx \ middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_origin.cmx \ middle_end/base_types/closure_id.cmx utils/clflags.cmx \ parsing/asttypes.cmi middle_end/allocated_const.cmx \ middle_end/flambda.cmi @@ -1358,6 +1428,7 @@ middle_end/flambda.cmi : middle_end/base_types/variable.cmi \ middle_end/parameter.cmi utils/numbers.cmi \ middle_end/base_types/mutable_variable.cmi bytecomp/lambda.cmi \ utils/identifiable.cmi middle_end/debuginfo.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi parsing/asttypes.cmi \ middle_end/allocated_const.cmi middle_end/flambda_invariants.cmo : middle_end/base_types/variable.cmi \ @@ -1399,9 +1470,10 @@ middle_end/flambda_utils.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/static_exception.cmi \ middle_end/base_types/set_of_closures_id.cmi middle_end/projection.cmi \ middle_end/parameter.cmi middle_end/base_types/mutable_variable.cmi \ - utils/misc.cmi middle_end/base_types/linkage_name.cmi bytecomp/lambda.cmi \ + utils/misc.cmi bytecomp/lambda.cmi middle_end/internal_variable_names.cmi \ middle_end/flambda_iterators.cmi middle_end/flambda.cmi \ middle_end/debuginfo.cmi middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi middle_end/backend_intf.cmi \ middle_end/allocated_const.cmi middle_end/flambda_utils.cmi middle_end/flambda_utils.cmx : middle_end/base_types/variable.cmx \ @@ -1410,9 +1482,10 @@ middle_end/flambda_utils.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/static_exception.cmx \ middle_end/base_types/set_of_closures_id.cmx middle_end/projection.cmx \ middle_end/parameter.cmx middle_end/base_types/mutable_variable.cmx \ - utils/misc.cmx middle_end/base_types/linkage_name.cmx bytecomp/lambda.cmx \ + utils/misc.cmx bytecomp/lambda.cmx middle_end/internal_variable_names.cmx \ middle_end/flambda_iterators.cmx middle_end/flambda.cmx \ middle_end/debuginfo.cmx middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_origin.cmx \ middle_end/base_types/closure_id.cmx middle_end/backend_intf.cmi \ middle_end/allocated_const.cmx middle_end/flambda_utils.cmi middle_end/flambda_utils.cmi : middle_end/base_types/variable.cmi \ @@ -1420,8 +1493,9 @@ middle_end/flambda_utils.cmi : middle_end/base_types/variable.cmi \ middle_end/base_types/tag.cmi middle_end/base_types/symbol.cmi \ bytecomp/switch.cmi middle_end/base_types/static_exception.cmi \ middle_end/base_types/set_of_closures_id.cmi middle_end/projection.cmi \ - middle_end/parameter.cmi middle_end/flambda.cmi \ - middle_end/base_types/closure_id.cmi middle_end/backend_intf.cmi + middle_end/parameter.cmi middle_end/internal_variable_names.cmi \ + middle_end/flambda.cmi middle_end/base_types/closure_id.cmi \ + middle_end/backend_intf.cmi middle_end/freshening.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ middle_end/base_types/symbol.cmi \ @@ -1482,12 +1556,13 @@ middle_end/inline_and_simplify.cmo : utils/warnings.cmi \ middle_end/remove_free_vars_equal_to_args.cmi middle_end/projection.cmi \ typing/predef.cmi middle_end/parameter.cmi utils/misc.cmi \ parsing/location.cmi middle_end/lift_code.cmi bytecomp/lambda.cmi \ - middle_end/invariant_params.cmi middle_end/inlining_stats.cmi \ - middle_end/inlining_decision.cmi middle_end/inlining_cost.cmi \ - middle_end/inline_and_simplify_aux.cmi typing/ident.cmi \ - middle_end/freshening.cmi middle_end/flambda_utils.cmi \ - middle_end/flambda.cmi middle_end/effect_analysis.cmi \ - middle_end/debuginfo.cmi utils/config.cmi \ + middle_end/invariant_params.cmi middle_end/internal_variable_names.cmi \ + middle_end/inlining_stats.cmi middle_end/inlining_decision.cmi \ + middle_end/inlining_cost.cmi middle_end/inline_and_simplify_aux.cmi \ + typing/ident.cmi middle_end/freshening.cmi middle_end/flambda_utils.cmi \ + middle_end/flambda.cmi middle_end/find_recursive_functions.cmi \ + middle_end/effect_analysis.cmi middle_end/debuginfo.cmi utils/config.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi utils/clflags.cmi \ middle_end/backend_intf.cmi middle_end/allocated_const.cmi \ middle_end/inline_and_simplify.cmi @@ -1503,12 +1578,13 @@ middle_end/inline_and_simplify.cmx : utils/warnings.cmx \ middle_end/remove_free_vars_equal_to_args.cmx middle_end/projection.cmx \ typing/predef.cmx middle_end/parameter.cmx utils/misc.cmx \ parsing/location.cmx middle_end/lift_code.cmx bytecomp/lambda.cmx \ - middle_end/invariant_params.cmx middle_end/inlining_stats.cmx \ - middle_end/inlining_decision.cmx middle_end/inlining_cost.cmx \ - middle_end/inline_and_simplify_aux.cmx typing/ident.cmx \ - middle_end/freshening.cmx middle_end/flambda_utils.cmx \ - middle_end/flambda.cmx middle_end/effect_analysis.cmx \ - middle_end/debuginfo.cmx utils/config.cmx \ + middle_end/invariant_params.cmx middle_end/internal_variable_names.cmx \ + middle_end/inlining_stats.cmx middle_end/inlining_decision.cmx \ + middle_end/inlining_cost.cmx middle_end/inline_and_simplify_aux.cmx \ + typing/ident.cmx middle_end/freshening.cmx middle_end/flambda_utils.cmx \ + middle_end/flambda.cmx middle_end/find_recursive_functions.cmx \ + middle_end/effect_analysis.cmx middle_end/debuginfo.cmx utils/config.cmx \ + middle_end/base_types/closure_origin.cmx \ middle_end/base_types/closure_id.cmx utils/clflags.cmx \ middle_end/backend_intf.cmi middle_end/allocated_const.cmx \ middle_end/inline_and_simplify.cmi @@ -1524,8 +1600,10 @@ middle_end/inline_and_simplify_aux.cmo : middle_end/base_types/variable.cmi \ middle_end/projection.cmi middle_end/parameter.cmi \ middle_end/base_types/mutable_variable.cmi utils/misc.cmi \ middle_end/inlining_stats.cmi middle_end/inlining_cost.cmi \ - middle_end/freshening.cmi middle_end/flambda.cmi middle_end/debuginfo.cmi \ + middle_end/freshening.cmi middle_end/flambda_utils.cmi \ + middle_end/flambda.cmi middle_end/debuginfo.cmi \ middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi utils/clflags.cmi \ middle_end/backend_intf.cmi middle_end/inline_and_simplify_aux.cmi middle_end/inline_and_simplify_aux.cmx : middle_end/base_types/variable.cmx \ @@ -1537,8 +1615,10 @@ middle_end/inline_and_simplify_aux.cmx : middle_end/base_types/variable.cmx \ middle_end/projection.cmx middle_end/parameter.cmx \ middle_end/base_types/mutable_variable.cmx utils/misc.cmx \ middle_end/inlining_stats.cmx middle_end/inlining_cost.cmx \ - middle_end/freshening.cmx middle_end/flambda.cmx middle_end/debuginfo.cmx \ + middle_end/freshening.cmx middle_end/flambda_utils.cmx \ + middle_end/flambda.cmx middle_end/debuginfo.cmx \ middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_origin.cmx \ middle_end/base_types/closure_id.cmx utils/clflags.cmx \ middle_end/backend_intf.cmi middle_end/inline_and_simplify_aux.cmi middle_end/inline_and_simplify_aux.cmi : middle_end/base_types/variable.cmi \ @@ -1549,6 +1629,7 @@ middle_end/inline_and_simplify_aux.cmi : middle_end/base_types/variable.cmi \ middle_end/projection.cmi middle_end/base_types/mutable_variable.cmi \ middle_end/inlining_stats_types.cmi middle_end/inlining_cost.cmi \ middle_end/freshening.cmi middle_end/flambda.cmi middle_end/debuginfo.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi middle_end/backend_intf.cmi middle_end/inlining_cost.cmo : middle_end/base_types/variable.cmi \ middle_end/projection.cmi typing/primitive.cmi utils/misc.cmi \ @@ -1565,8 +1646,7 @@ middle_end/inlining_decision.cmo : middle_end/base_types/variable.cmi \ middle_end/simple_value_approx.cmi middle_end/parameter.cmi \ utils/misc.cmi bytecomp/lambda.cmi middle_end/inlining_transforms.cmi \ middle_end/inlining_stats_types.cmi middle_end/inlining_cost.cmi \ - middle_end/inline_and_simplify_aux.cmi middle_end/flambda_utils.cmi \ - middle_end/flambda.cmi middle_end/find_recursive_functions.cmi \ + middle_end/inline_and_simplify_aux.cmi middle_end/flambda.cmi \ middle_end/base_types/closure_id.cmi utils/clflags.cmi \ middle_end/inlining_decision.cmi middle_end/inlining_decision.cmx : middle_end/base_types/variable.cmx \ @@ -1574,8 +1654,7 @@ middle_end/inlining_decision.cmx : middle_end/base_types/variable.cmx \ middle_end/simple_value_approx.cmx middle_end/parameter.cmx \ utils/misc.cmx bytecomp/lambda.cmx middle_end/inlining_transforms.cmx \ middle_end/inlining_stats_types.cmx middle_end/inlining_cost.cmx \ - middle_end/inline_and_simplify_aux.cmx middle_end/flambda_utils.cmx \ - middle_end/flambda.cmx middle_end/find_recursive_functions.cmx \ + middle_end/inline_and_simplify_aux.cmx middle_end/flambda.cmx \ middle_end/base_types/closure_id.cmx utils/clflags.cmx \ middle_end/inlining_decision.cmi middle_end/inlining_decision.cmi : middle_end/base_types/variable.cmi \ @@ -1604,27 +1683,37 @@ middle_end/inlining_stats_types.cmx : middle_end/inlining_cost.cmx \ middle_end/inlining_stats_types.cmi : middle_end/inlining_cost.cmi middle_end/inlining_transforms.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ - middle_end/simple_value_approx.cmi middle_end/parameter.cmi \ - utils/misc.cmi bytecomp/lambda.cmi middle_end/inlining_cost.cmi \ - middle_end/inline_and_simplify_aux.cmi middle_end/freshening.cmi \ - middle_end/flambda_utils.cmi middle_end/flambda_iterators.cmi \ - middle_end/flambda.cmi middle_end/base_types/compilation_unit.cmi \ - middle_end/base_types/closure_id.cmi middle_end/backend_intf.cmi \ - middle_end/inlining_transforms.cmi + middle_end/simple_value_approx.cmi middle_end/projection.cmi \ + middle_end/parameter.cmi bytecomp/lambda.cmi \ + middle_end/internal_variable_names.cmi \ + middle_end/inlining_decision_intf.cmi middle_end/inlining_cost.cmi \ + middle_end/inline_and_simplify_aux.cmi middle_end/flambda_utils.cmi \ + middle_end/flambda_iterators.cmi middle_end/flambda.cmi \ + middle_end/debuginfo.cmi middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_origin.cmi \ + middle_end/base_types/closure_id.cmi middle_end/inlining_transforms.cmi middle_end/inlining_transforms.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/var_within_closure.cmx \ - middle_end/simple_value_approx.cmx middle_end/parameter.cmx \ - utils/misc.cmx bytecomp/lambda.cmx middle_end/inlining_cost.cmx \ - middle_end/inline_and_simplify_aux.cmx middle_end/freshening.cmx \ - middle_end/flambda_utils.cmx middle_end/flambda_iterators.cmx \ - middle_end/flambda.cmx middle_end/base_types/compilation_unit.cmx \ - middle_end/base_types/closure_id.cmx middle_end/backend_intf.cmi \ - middle_end/inlining_transforms.cmi + middle_end/simple_value_approx.cmx middle_end/projection.cmx \ + middle_end/parameter.cmx bytecomp/lambda.cmx \ + middle_end/internal_variable_names.cmx \ + middle_end/inlining_decision_intf.cmi middle_end/inlining_cost.cmx \ + middle_end/inline_and_simplify_aux.cmx middle_end/flambda_utils.cmx \ + middle_end/flambda_iterators.cmx middle_end/flambda.cmx \ + middle_end/debuginfo.cmx middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_origin.cmx \ + middle_end/base_types/closure_id.cmx middle_end/inlining_transforms.cmi middle_end/inlining_transforms.cmi : middle_end/base_types/variable.cmi \ middle_end/simple_value_approx.cmi bytecomp/lambda.cmi \ middle_end/inlining_decision_intf.cmi \ middle_end/inline_and_simplify_aux.cmi middle_end/flambda.cmi \ middle_end/debuginfo.cmi middle_end/base_types/closure_id.cmi +middle_end/internal_variable_names.cmo : parsing/location.cmi \ + bytecomp/lambda.cmi middle_end/internal_variable_names.cmi +middle_end/internal_variable_names.cmx : parsing/location.cmx \ + bytecomp/lambda.cmx middle_end/internal_variable_names.cmi +middle_end/internal_variable_names.cmi : parsing/location.cmi \ + bytecomp/lambda.cmi middle_end/invariant_params.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/symbol.cmi middle_end/parameter.cmi \ middle_end/flambda_utils.cmi middle_end/flambda_iterators.cmi \ @@ -1648,13 +1737,13 @@ middle_end/lift_code.cmx : middle_end/base_types/variable.cmx \ middle_end/flambda.cmx middle_end/base_types/compilation_unit.cmx \ middle_end/lift_code.cmi middle_end/lift_code.cmi : middle_end/base_types/variable.cmi \ - middle_end/flambda.cmi + middle_end/internal_variable_names.cmi middle_end/flambda.cmi middle_end/lift_constants.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ middle_end/base_types/tag.cmi middle_end/base_types/symbol.cmi \ utils/strongly_connected_components.cmi \ middle_end/simple_value_approx.cmi utils/misc.cmi \ - middle_end/base_types/linkage_name.cmi middle_end/inconstant_idents.cmi \ + middle_end/internal_variable_names.cmi middle_end/inconstant_idents.cmi \ middle_end/flambda_utils.cmi middle_end/flambda_iterators.cmi \ middle_end/flambda.cmi middle_end/base_types/compilation_unit.cmi \ middle_end/base_types/closure_id.cmi middle_end/backend_intf.cmi \ @@ -1665,7 +1754,7 @@ middle_end/lift_constants.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/tag.cmx middle_end/base_types/symbol.cmx \ utils/strongly_connected_components.cmx \ middle_end/simple_value_approx.cmx utils/misc.cmx \ - middle_end/base_types/linkage_name.cmx middle_end/inconstant_idents.cmx \ + middle_end/internal_variable_names.cmx middle_end/inconstant_idents.cmx \ middle_end/flambda_utils.cmx middle_end/flambda_iterators.cmx \ middle_end/flambda.cmx middle_end/base_types/compilation_unit.cmx \ middle_end/base_types/closure_id.cmx middle_end/backend_intf.cmi \ @@ -1675,13 +1764,15 @@ middle_end/lift_constants.cmi : middle_end/flambda.cmi \ middle_end/backend_intf.cmi middle_end/lift_let_to_initialize_symbol.cmo : \ middle_end/base_types/variable.cmi middle_end/base_types/tag.cmi \ - middle_end/base_types/symbol.cmi middle_end/flambda_utils.cmi \ - middle_end/flambda.cmi middle_end/debuginfo.cmi parsing/asttypes.cmi \ + middle_end/base_types/symbol.cmi middle_end/internal_variable_names.cmi \ + middle_end/flambda_utils.cmi middle_end/flambda.cmi \ + middle_end/debuginfo.cmi parsing/asttypes.cmi \ middle_end/lift_let_to_initialize_symbol.cmi middle_end/lift_let_to_initialize_symbol.cmx : \ middle_end/base_types/variable.cmx middle_end/base_types/tag.cmx \ - middle_end/base_types/symbol.cmx middle_end/flambda_utils.cmx \ - middle_end/flambda.cmx middle_end/debuginfo.cmx parsing/asttypes.cmi \ + middle_end/base_types/symbol.cmx middle_end/internal_variable_names.cmx \ + middle_end/flambda_utils.cmx middle_end/flambda.cmx \ + middle_end/debuginfo.cmx parsing/asttypes.cmi \ middle_end/lift_let_to_initialize_symbol.cmi middle_end/lift_let_to_initialize_symbol.cmi : middle_end/flambda.cmi \ middle_end/backend_intf.cmi @@ -1735,14 +1826,14 @@ middle_end/projection.cmi : middle_end/base_types/variable.cmi \ middle_end/base_types/closure_id.cmi middle_end/ref_to_variables.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/mutable_variable.cmi utils/misc.cmi \ - bytecomp/lambda.cmi middle_end/flambda_iterators.cmi \ - middle_end/flambda.cmi parsing/asttypes.cmi \ - middle_end/ref_to_variables.cmi + bytecomp/lambda.cmi middle_end/internal_variable_names.cmi \ + middle_end/flambda_iterators.cmi middle_end/flambda.cmi \ + parsing/asttypes.cmi middle_end/ref_to_variables.cmi middle_end/ref_to_variables.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/mutable_variable.cmx utils/misc.cmx \ - bytecomp/lambda.cmx middle_end/flambda_iterators.cmx \ - middle_end/flambda.cmx parsing/asttypes.cmi \ - middle_end/ref_to_variables.cmi + bytecomp/lambda.cmx middle_end/internal_variable_names.cmx \ + middle_end/flambda_iterators.cmx middle_end/flambda.cmx \ + parsing/asttypes.cmi middle_end/ref_to_variables.cmi middle_end/ref_to_variables.cmi : middle_end/flambda.cmi middle_end/remove_free_vars_equal_to_args.cmo : \ middle_end/base_types/variable.cmi middle_end/pass_wrapper.cmi \ @@ -1759,6 +1850,7 @@ middle_end/remove_unused_arguments.cmo : middle_end/base_types/variable.cmi \ middle_end/flambda_iterators.cmi middle_end/flambda.cmi \ middle_end/find_recursive_functions.cmi \ middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_origin.cmi \ middle_end/base_types/closure_id.cmi utils/clflags.cmi \ middle_end/remove_unused_arguments.cmi middle_end/remove_unused_arguments.cmx : middle_end/base_types/variable.cmx \ @@ -1767,6 +1859,7 @@ middle_end/remove_unused_arguments.cmx : middle_end/base_types/variable.cmx \ middle_end/flambda_iterators.cmx middle_end/flambda.cmx \ middle_end/find_recursive_functions.cmx \ middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_origin.cmx \ middle_end/base_types/closure_id.cmx utils/clflags.cmx \ middle_end/remove_unused_arguments.cmi middle_end/remove_unused_arguments.cmi : middle_end/flambda.cmi \ @@ -1803,27 +1896,38 @@ middle_end/share_constants.cmi : middle_end/flambda.cmi middle_end/simple_value_approx.cmo : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ middle_end/base_types/tag.cmi middle_end/base_types/symbol.cmi \ + middle_end/base_types/set_of_closures_origin.cmi \ middle_end/base_types/set_of_closures_id.cmi middle_end/parameter.cmi \ - utils/misc.cmi bytecomp/lambda.cmi middle_end/inlining_cost.cmi \ - middle_end/freshening.cmi middle_end/flambda_utils.cmi \ - middle_end/flambda.cmi middle_end/base_types/export_id.cmi \ - middle_end/effect_analysis.cmi middle_end/base_types/closure_id.cmi \ - middle_end/allocated_const.cmi middle_end/simple_value_approx.cmi + utils/misc.cmi bytecomp/lambda.cmi middle_end/internal_variable_names.cmi \ + middle_end/inlining_cost.cmi middle_end/freshening.cmi \ + middle_end/flambda_utils.cmi middle_end/flambda.cmi \ + middle_end/base_types/export_id.cmi middle_end/effect_analysis.cmi \ + middle_end/debuginfo.cmi middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_origin.cmi \ + middle_end/base_types/closure_id.cmi middle_end/allocated_const.cmi \ + middle_end/simple_value_approx.cmi middle_end/simple_value_approx.cmx : middle_end/base_types/variable.cmx \ middle_end/base_types/var_within_closure.cmx \ middle_end/base_types/tag.cmx middle_end/base_types/symbol.cmx \ + middle_end/base_types/set_of_closures_origin.cmx \ middle_end/base_types/set_of_closures_id.cmx middle_end/parameter.cmx \ - utils/misc.cmx bytecomp/lambda.cmx middle_end/inlining_cost.cmx \ - middle_end/freshening.cmx middle_end/flambda_utils.cmx \ - middle_end/flambda.cmx middle_end/base_types/export_id.cmx \ - middle_end/effect_analysis.cmx middle_end/base_types/closure_id.cmx \ - middle_end/allocated_const.cmx middle_end/simple_value_approx.cmi + utils/misc.cmx bytecomp/lambda.cmx middle_end/internal_variable_names.cmx \ + middle_end/inlining_cost.cmx middle_end/freshening.cmx \ + middle_end/flambda_utils.cmx middle_end/flambda.cmx \ + middle_end/base_types/export_id.cmx middle_end/effect_analysis.cmx \ + middle_end/debuginfo.cmx middle_end/base_types/compilation_unit.cmx \ + middle_end/base_types/closure_origin.cmx \ + middle_end/base_types/closure_id.cmx middle_end/allocated_const.cmx \ + middle_end/simple_value_approx.cmi middle_end/simple_value_approx.cmi : middle_end/base_types/variable.cmi \ middle_end/base_types/var_within_closure.cmi \ middle_end/base_types/tag.cmi middle_end/base_types/symbol.cmi \ - middle_end/base_types/set_of_closures_id.cmi bytecomp/lambda.cmi \ - middle_end/freshening.cmi middle_end/flambda.cmi \ - middle_end/base_types/export_id.cmi middle_end/base_types/closure_id.cmi + middle_end/base_types/set_of_closures_origin.cmi \ + middle_end/base_types/set_of_closures_id.cmi middle_end/parameter.cmi \ + bytecomp/lambda.cmi middle_end/freshening.cmi middle_end/flambda.cmi \ + middle_end/base_types/export_id.cmi middle_end/debuginfo.cmi \ + middle_end/base_types/closure_origin.cmi \ + middle_end/base_types/closure_id.cmi middle_end/simplify_boxed_integer_ops.cmo : middle_end/simplify_common.cmi \ middle_end/simplify_boxed_integer_ops_intf.cmi \ middle_end/simple_value_approx.cmi bytecomp/lambda.cmi \ @@ -1880,13 +1984,15 @@ middle_end/unbox_closures.cmi : middle_end/base_types/variable.cmi \ middle_end/flambda.cmi middle_end/unbox_free_vars_of_closures.cmo : \ middle_end/base_types/variable.cmi middle_end/projection.cmi \ - middle_end/pass_wrapper.cmi utils/misc.cmi middle_end/inlining_cost.cmi \ + middle_end/pass_wrapper.cmi utils/misc.cmi \ + middle_end/internal_variable_names.cmi middle_end/inlining_cost.cmi \ middle_end/flambda_utils.cmi middle_end/flambda_iterators.cmi \ middle_end/flambda.cmi middle_end/extract_projections.cmi \ utils/clflags.cmi middle_end/unbox_free_vars_of_closures.cmi middle_end/unbox_free_vars_of_closures.cmx : \ middle_end/base_types/variable.cmx middle_end/projection.cmx \ - middle_end/pass_wrapper.cmx utils/misc.cmx middle_end/inlining_cost.cmx \ + middle_end/pass_wrapper.cmx utils/misc.cmx \ + middle_end/internal_variable_names.cmx middle_end/inlining_cost.cmx \ middle_end/flambda_utils.cmx middle_end/flambda_iterators.cmx \ middle_end/flambda.cmx middle_end/extract_projections.cmx \ utils/clflags.cmx middle_end/unbox_free_vars_of_closures.cmi @@ -1924,6 +2030,15 @@ middle_end/base_types/closure_id.cmx : \ middle_end/base_types/closure_id.cmi middle_end/base_types/closure_id.cmi : \ middle_end/base_types/closure_element.cmi +middle_end/base_types/closure_origin.cmo : \ + middle_end/base_types/closure_id.cmi \ + middle_end/base_types/closure_origin.cmi +middle_end/base_types/closure_origin.cmx : \ + middle_end/base_types/closure_id.cmx \ + middle_end/base_types/closure_origin.cmi +middle_end/base_types/closure_origin.cmi : utils/identifiable.cmi \ + middle_end/base_types/compilation_unit.cmi \ + middle_end/base_types/closure_id.cmi middle_end/base_types/compilation_unit.cmo : utils/misc.cmi \ middle_end/base_types/linkage_name.cmi utils/identifiable.cmi \ typing/ident.cmi middle_end/base_types/compilation_unit.cmi @@ -1953,14 +2068,16 @@ middle_end/base_types/linkage_name.cmo : utils/identifiable.cmi \ middle_end/base_types/linkage_name.cmx : utils/identifiable.cmx \ middle_end/base_types/linkage_name.cmi middle_end/base_types/linkage_name.cmi : utils/identifiable.cmi -middle_end/base_types/mutable_variable.cmo : utils/identifiable.cmi \ - typing/ident.cmi middle_end/base_types/compilation_unit.cmi \ +middle_end/base_types/mutable_variable.cmo : \ + middle_end/base_types/variable.cmi \ middle_end/base_types/mutable_variable.cmi -middle_end/base_types/mutable_variable.cmx : utils/identifiable.cmx \ - typing/ident.cmx middle_end/base_types/compilation_unit.cmx \ +middle_end/base_types/mutable_variable.cmx : \ + middle_end/base_types/variable.cmx \ middle_end/base_types/mutable_variable.cmi -middle_end/base_types/mutable_variable.cmi : utils/identifiable.cmi \ - typing/ident.cmi middle_end/base_types/compilation_unit.cmi +middle_end/base_types/mutable_variable.cmi : \ + middle_end/base_types/variable.cmi middle_end/internal_variable_names.cmi \ + utils/identifiable.cmi typing/ident.cmi \ + middle_end/base_types/compilation_unit.cmi middle_end/base_types/set_of_closures_id.cmo : utils/identifiable.cmi \ middle_end/base_types/id_types.cmi \ middle_end/base_types/compilation_unit.cmi \ @@ -1985,16 +2102,17 @@ middle_end/base_types/static_exception.cmo : utils/numbers.cmi \ middle_end/base_types/static_exception.cmx : utils/numbers.cmx \ bytecomp/lambda.cmx middle_end/base_types/static_exception.cmi middle_end/base_types/static_exception.cmi : utils/identifiable.cmi -middle_end/base_types/symbol.cmo : utils/misc.cmi \ - middle_end/base_types/linkage_name.cmi utils/identifiable.cmi \ - middle_end/base_types/compilation_unit.cmi \ +middle_end/base_types/symbol.cmo : middle_end/base_types/variable.cmi \ + utils/misc.cmi middle_end/base_types/linkage_name.cmi \ + utils/identifiable.cmi middle_end/base_types/compilation_unit.cmi \ middle_end/base_types/symbol.cmi -middle_end/base_types/symbol.cmx : utils/misc.cmx \ - middle_end/base_types/linkage_name.cmx utils/identifiable.cmx \ - middle_end/base_types/compilation_unit.cmx \ +middle_end/base_types/symbol.cmx : middle_end/base_types/variable.cmx \ + utils/misc.cmx middle_end/base_types/linkage_name.cmx \ + utils/identifiable.cmx middle_end/base_types/compilation_unit.cmx \ middle_end/base_types/symbol.cmi -middle_end/base_types/symbol.cmi : middle_end/base_types/linkage_name.cmi \ - utils/identifiable.cmi middle_end/base_types/compilation_unit.cmi +middle_end/base_types/symbol.cmi : middle_end/base_types/variable.cmi \ + middle_end/base_types/linkage_name.cmi utils/identifiable.cmi \ + middle_end/base_types/compilation_unit.cmi middle_end/base_types/tag.cmo : utils/numbers.cmi utils/misc.cmi \ utils/identifiable.cmi middle_end/base_types/tag.cmi middle_end/base_types/tag.cmx : utils/numbers.cmx utils/misc.cmx \ @@ -2008,13 +2126,16 @@ middle_end/base_types/var_within_closure.cmx : \ middle_end/base_types/var_within_closure.cmi middle_end/base_types/var_within_closure.cmi : \ middle_end/base_types/closure_element.cmi -middle_end/base_types/variable.cmo : utils/misc.cmi utils/identifiable.cmi \ +middle_end/base_types/variable.cmo : utils/misc.cmi \ + middle_end/internal_variable_names.cmi utils/identifiable.cmi \ typing/ident.cmi middle_end/base_types/compilation_unit.cmi \ middle_end/base_types/variable.cmi -middle_end/base_types/variable.cmx : utils/misc.cmx utils/identifiable.cmx \ +middle_end/base_types/variable.cmx : utils/misc.cmx \ + middle_end/internal_variable_names.cmx utils/identifiable.cmx \ typing/ident.cmx middle_end/base_types/compilation_unit.cmx \ middle_end/base_types/variable.cmi -middle_end/base_types/variable.cmi : utils/identifiable.cmi typing/ident.cmi \ +middle_end/base_types/variable.cmi : middle_end/internal_variable_names.cmi \ + utils/identifiable.cmi typing/ident.cmi \ middle_end/base_types/compilation_unit.cmi asmcomp/debug/available_regs.cmo : asmcomp/debug/reg_with_debug_info.cmi \ asmcomp/debug/reg_availability_set.cmi asmcomp/reg.cmi asmcomp/proc.cmi \ @@ -2066,13 +2187,11 @@ driver/compile.cmx : utils/warnings.cmx typing/typemod.cmx \ bytecomp/bytegen.cmx parsing/builtin_attributes.cmx driver/compile.cmi driver/compile.cmi : driver/compmisc.cmo : utils/warnings.cmi typing/typemod.cmi utils/misc.cmi \ - parsing/longident.cmi parsing/location.cmi typing/ident.cmi \ - typing/env.cmi utils/config.cmi driver/compenv.cmi utils/clflags.cmi \ - parsing/asttypes.cmi driver/compmisc.cmi + parsing/location.cmi typing/ident.cmi typing/env.cmi utils/config.cmi \ + driver/compenv.cmi utils/clflags.cmi driver/compmisc.cmi driver/compmisc.cmx : utils/warnings.cmx typing/typemod.cmx utils/misc.cmx \ - parsing/longident.cmx parsing/location.cmx typing/ident.cmx \ - typing/env.cmx utils/config.cmx driver/compenv.cmx utils/clflags.cmx \ - parsing/asttypes.cmi driver/compmisc.cmi + parsing/location.cmx typing/ident.cmx typing/env.cmx utils/config.cmx \ + driver/compenv.cmx utils/clflags.cmx driver/compmisc.cmi driver/compmisc.cmi : typing/env.cmi driver/compplugin.cmo : utils/misc.cmi parsing/location.cmi utils/config.cmi \ driver/compmisc.cmi driver/compenv.cmi driver/compdynlink.cmi \ diff --git a/.gitattributes b/.gitattributes index 0d7fefb0..60c928da 100644 --- a/.gitattributes +++ b/.gitattributes @@ -47,10 +47,10 @@ README* ocaml-typo=missing-header /Changes ocaml-typo=non-ascii,missing-header /INSTALL ocaml-typo=missing-header /LICENSE ocaml-typo=long-line,very-long-line,missing-header -# appveyor_build.cmd only has missing-header because dra27 too lazy to update -# check-typo to interpret Cmd-style comments! -/appveyor_build.cmd ocaml-typo=long-line,very-long-line,missing-header text eol=crlf -/appveyor_build.sh ocaml-typo=non-ascii +# tools/ci/appveyor/appveyor_build.cmd only has missing-header because +# dra27 too lazy to update check-typo to interpret Cmd-style comments! +/tools/ci/appveyor/appveyor_build.cmd ocaml-typo=long-line,very-long-line,missing-header text eol=crlf +/tools/ci/appveyor/appveyor_build.sh ocaml-typo=non-ascii asmcomp/*/emit.mlp ocaml-typo=tab,long-line,unused-prop @@ -101,7 +101,9 @@ yacc/*.[ch] ocaml-typo=long-line,very-long-line,unused-prop # Test suite command fragments *.checker text eol=lf *.precheck text eol=lf -*.runner text eol=lf +# ocamltest hooks which are used in the testsuite +*.check-program-output text eol=lf +*.run text eol=lf configure text eol=lf config/auto-aux/hasgot text eol=lf @@ -142,10 +144,15 @@ manual/tools/texexpand text eol=lf # Tests which include references spanning multiple lines fail with \r\n # endings, so use \n endings only, even on Windows. +testsuite/tests/basic-more/morematch.ml text eol=lf +testsuite/tests/basic-more/robustmatch.ml text eol=lf testsuite/tests/parsing/*.ml text eol=lf testsuite/tests/docstrings/empty.ml text eol=lf testsuite/tests/functors/functors.ml text eol=lf testsuite/tests/translprim/module_coercion.ml text eol=lf +testsuite/tests/typing-objects-bugs/pr3968_bad.ml text eol=lf +testsuite/tests/typing-recmod/t12bad.ml text eol=lf +testsuite/tests/typing-safe-linking/b_bad.ml text eol=lf testsuite/tests/warnings/w04.ml text eol=lf testsuite/tests/warnings/w04_failure.ml text eol=lf testsuite/tests/warnings/w32.ml text eol=lf @@ -155,12 +162,18 @@ testsuite/tests/warnings/w32.ml text eol=lf testsuite/tests/formatting/margins.ml text eol=lf testsuite/tests/letrec-disallowed/disallowed.ml text eol=lf testsuite/tests/letrec-disallowed/extension_constructor.ml text eol=lf -testsuite/tests/letrec-disallowed/float_block.ml text eol=lf +testsuite/tests/letrec-disallowed/float_block_allowed.ml text eol=lf +testsuite/tests/letrec-disallowed/float_block_disallowed.ml text eol=lf testsuite/tests/letrec-disallowed/generic_arrays.ml text eol=lf +testsuite/tests/letrec-disallowed/lazy_.ml text eol=lf testsuite/tests/letrec-disallowed/module_constraints.ml text eol=lf +testsuite/tests/letrec-disallowed/unboxed.ml text eol=lf testsuite/tests/letrec-disallowed/pr7215.ml text eol=lf +testsuite/tests/letrec-disallowed/pr7231.ml text eol=lf +testsuite/tests/letrec-disallowed/pr7706.ml text eol=lf testsuite/tests/lexing/uchar_esc.ml text eol=lf testsuite/tests/match-exception-warnings/exhaustiveness_warnings.ml text eol=lf +testsuite/tests/tool-toplevel/pr7060.ml text eol=lf testsuite/tests/typing-extension-constructor/test.ml text eol=lf testsuite/tests/typing-extensions/extensions.ml text eol=lf testsuite/tests/typing-extensions/open_types.ml text eol=lf diff --git a/.gitignore b/.gitignore index 3657bd96..4b78128f 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,8 @@ *.out.dSYM *.swp _ocamltest +_ocamltestd +*.odoc # local to root directory @@ -157,6 +159,7 @@ _ocamltest /ocamldoc/ocamldoc /ocamldoc/ocamldoc.opt +/ocamldoc/odoc /ocamldoc/odoc_crc.ml /ocamldoc/odoc_lexer.ml /ocamldoc/odoc_ocamlhtml.ml @@ -167,6 +170,7 @@ _ocamltest /ocamldoc/odoc_text_parser.ml /ocamldoc/odoc_text_parser.mli /ocamldoc/stdlib_man +/ocamldoc/stdlib_non_prefixed/*.mli /ocamldoc/stdlib_html /ocamldoc/*.output /ocamldoc/test_stdlib @@ -182,7 +186,7 @@ _ocamltest /otherlibs/dynlink/extract_crc /otherlibs/threads/marshal.mli -/otherlibs/threads/pervasives.mli +/otherlibs/threads/stdlib.mli /otherlibs/threads/unix.mli /otherlibs/win32graph/graphics.ml /otherlibs/win32graph/graphics.mli @@ -213,7 +217,6 @@ _ocamltest /otherlibs/win32unix/strofaddr.c /otherlibs/win32unix/time.c /otherlibs/win32unix/unlink.c -/otherlibs/win32unix/utimes.c /parsing/parser.ml /parsing/parser.mli @@ -240,25 +243,21 @@ _ocamltest /testsuite/**/*.byte /testsuite/**/*.native /testsuite/**/program -/testsuite/**/_log +/testsuite/**/_log* /testsuite/failure.stamp /testsuite/_retries -/testsuite/tests/asmcomp/codegen -/testsuite/tests/asmcomp/parsecmm.ml -/testsuite/tests/asmcomp/parsecmm.mli -/testsuite/tests/asmcomp/lexcmm.ml -/testsuite/tests/asmcomp/*.s -/testsuite/tests/asmcomp/*.out.manifest +/testsuite/tests/asmgen/codegen +/testsuite/tests/asmgen/parsecmm.ml +/testsuite/tests/asmgen/parsecmm.mli +/testsuite/tests/asmgen/lexcmm.ml +/testsuite/tests/asmgen/*.s +/testsuite/tests/asmgen/*.out.manifest -/testsuite/tests/basic/*.safe-string /testsuite/tests/embedded/caml -/testsuite/tests/float-unboxing/*.flambda -/testsuite/tests/float-unboxing/float_inline.ml - /testsuite/tests/lib-dynlink-bytecode/main /testsuite/tests/lib-dynlink-bytecode/static /testsuite/tests/lib-dynlink-bytecode/custom @@ -276,15 +275,6 @@ _ocamltest /testsuite/tests/lib-threads/*.byt -/testsuite/tests/lib-unix/win-stat/*-file -/testsuite/tests/lib-unix/win-symlink/link* -/testsuite/tests/lib-unix/win-symlink/test.txt - -/testsuite/tests/lib-unix/win-symlink/link* -/testsuite/tests/lib-unix/win-symlink/test.txt - -/testsuite/tests/opaque/*/*.mli - /testsuite/tests/output_obj/*.bc.c /testsuite/tests/output_obj/*_stub /testsuite/tests/output_obj/*_stub @@ -293,11 +283,6 @@ _ocamltest /testsuite/tests/self-contained-toplevel/cached_cmi.ml -/testsuite/tests/tool-debugger/**/compiler-libs -/testsuite/tests/tool-debugger/find-artifacts/out -/testsuite/tests/tool-debugger/no_debug_event/out -/testsuite/tests/tool-debugger/no_debug_event/c - /testsuite/tests/tool-ocamldep-modalias/*.byt* /testsuite/tests/tool-ocamldep-modalias/*.opt* /testsuite/tests/tool-ocamldep-modalias/depend.mk @@ -326,36 +311,15 @@ _ocamltest /testsuite/tests/tool-lexyacc/grammar.mli /testsuite/tests/tool-lexyacc/grammar.ml -/testsuite/tests/typing-misc/false.flat-float -/testsuite/tests/typing-misc/true.flat-float -/testsuite/tests/typing-misc/pr6939.ml - -/testsuite/tests/typing-multifile/a.ml -/testsuite/tests/typing-multifile/b.ml -/testsuite/tests/typing-multifile/c.ml -/testsuite/tests/typing-multifile/d.mli -/testsuite/tests/typing-multifile/e.ml -/testsuite/tests/typing-multifile/f.ml -/testsuite/tests/typing-multifile/g.ml -/testsuite/tests/typing-multifile/test - /testsuite/tests/typing-unboxed-types/false.flat-float /testsuite/tests/typing-unboxed-types/true.flat-float /testsuite/tests/typing-unboxed-types/test.ml.reference -/testsuite/tests/translprim/false.flat-float -/testsuite/tests/translprim/true.flat-float -/testsuite/tests/translprim/array_spec.ml.reference -/testsuite/tests/translprim/module_coercion.ml.reference - /testsuite/tests/unboxed-primitive-args/main.ml /testsuite/tests/unboxed-primitive-args/stubs.c /testsuite/tests/unwind/unwind_test -/testsuite/tests/warnings/w55.opt.opt_result -/testsuite/tests/warnings/w58.opt.opt_result - /testsuite/tests/win-unicode/symlink_tests.precheck /testsuite/tools/expect_test diff --git a/.mailmap b/.mailmap index 772aac57..9635a00a 100644 --- a/.mailmap +++ b/.mailmap @@ -27,6 +27,7 @@ Damien Doligez doligez Mohamed Iguernelala Jérémie Dimino Jeremy Yallop yallop +Nicolás Ojeda Bär # The aliases below correspond to preference expressed by # contributors on the name under which they credited, for example @@ -67,6 +68,7 @@ Stephen Dolan Junsong Li Junsong Li Christophe Raffali +Christophe Raffali Anton Bachin Reed Wilson David Scott @@ -86,9 +88,11 @@ Dwight Guth Dwight Guth Andreas Hauptmann fdopen Andreas Hauptmann +Andreas Hauptmann Hendrik Tews Hugo Heuzard Miod Vallat +Christoph Spiel # These contributors prefer to be referred to pseudonymously whitequark diff --git a/.merlin b/.merlin index 096ee297..5649a110 100644 --- a/.merlin +++ b/.merlin @@ -25,9 +25,6 @@ B ./otherlibs/dynlink S ./otherlibs/graph B ./otherlibs/graph -S ./otherlibs/num -B ./otherlibs/num - S ./otherlibs/str B ./otherlibs/str @@ -43,8 +40,8 @@ B ./otherlibs/unix S ./parsing B ./parsing -S ./stdlib -B ./stdlib +STDLIB ./stdlib +FLG -open Stdlib -nopervasives S ./toplevel B ./toplevel diff --git a/.travis-ci.sh b/.travis-ci.sh deleted file mode 100755 index f30d64dc..00000000 --- a/.travis-ci.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/bin/bash -#************************************************************************** -#* * -#* OCaml * -#* * -#* Anil Madhavapeddy, OCaml Labs * -#* * -#* Copyright 2014 Institut National de Recherche en Informatique et * -#* en Automatique. * -#* * -#* All rights reserved. This file is distributed under the terms of * -#* the GNU Lesser General Public License version 2.1, with the * -#* special exception on linking described in the file LICENSE. * -#* * -#************************************************************************** - -PREFIX=~/local - -MAKE=make SHELL=dash - -# TRAVIS_COMMIT_RANGE has the form ... -# TRAVIS_CUR_HEAD is -# TRAVIS_PR_HEAD is -# -# The following diagram illustrates the relationship between -# the commits: -# -# (trunk) (pr branch) -# TRAVIS_CUR_HEAD TRAVIS_PR_HEAD -# | / -# ... ... -# | / -# TRAVIS_MERGE_BASE -# -echo TRAVIS_COMMIT_RANGE=$TRAVIS_COMMIT_RANGE -TRAVIS_CUR_HEAD=${TRAVIS_COMMIT_RANGE%%...*} -TRAVIS_PR_HEAD=${TRAVIS_COMMIT_RANGE##*...} -case $TRAVIS_EVENT_TYPE in - # If this is not a pull request then TRAVIS_COMMIT_RANGE may be empty. - pull_request) - TRAVIS_MERGE_BASE=$(git merge-base $TRAVIS_CUR_HEAD $TRAVIS_PR_HEAD);; -esac - -BuildAndTest () { - mkdir -p $PREFIX - cat< /dev/null && CheckNoChangesMessage || echo pass -} - -CheckNoChangesMessage () { - API_URL=https://api.github.com/repos/$TRAVIS_REPO_SLUG/issues/$TRAVIS_PULL_REQUEST/labels - if test -n "$(git log --grep="[Nn]o [Cc]hange.* needed" --max-count=1 \ - ${TRAVIS_MERGE_BASE}..${TRAVIS_PR_HEAD})" - then echo pass - elif test -n "$(curl $API_URL | grep 'no-change-entry-needed')" - then echo pass - else exit 1 - fi -} - -CheckTestsuiteModified () { - cat< /dev/null && exit 1 || echo pass -} - -case $CI_KIND in -build) BuildAndTest;; -changes) - case $TRAVIS_EVENT_TYPE in - pull_request) CheckChangesModified;; - esac;; -tests) - case $TRAVIS_EVENT_TYPE in - pull_request) CheckTestsuiteModified;; - esac;; -*) echo unknown CI kind - exit 1 - ;; -esac diff --git a/.travis.yml b/.travis.yml index 3a220a6a..eb065cd3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ sudo: false language: c git: submodules: false -script: bash -ex .travis-ci.sh +script: bash -ex tools/ci/travis/travis-ci.sh matrix: include: - env: CI_KIND=build XARCH=i386 diff --git a/Changes b/Changes index 7128bbe8..25976617 100644 --- a/Changes +++ b/Changes @@ -1,3 +1,579 @@ +OCaml 4.07.0 (10 July 2018) +--------------------------- + +(Changes that can break existing programs are marked with a "*") + +### Language features: + +- MPR#6023, GPR#1648: Allow type-based selection of GADT constructors + (Thomas Refis and Leo White, review by Jacques Garrigue and Gabriel Scherer) + +- GPR#1546: Allow empty variants + (Runhang Li, review by Gabriel Radanne and Jacques Garrigue) + +### Standard library: + +- MPR#4170, GPR#1674: add the constant `Float.pi`. + (Christophe Troestler, review by Damien Doligez) + +- MPR#6139, GPR#1685: Move the Bigarray module to the standard library. Keep the + bigarray library as on overlay adding the deprecated map_file functions + (Jérémie Dimino, review by Mark Shinwell) + +- MPR#7690, GPR#1528: fix the float_of_string function for hexadecimal floats + with very large values of the exponent. + (Olivier Andrieu) + +- GPR#1002: add a new `Seq` module defining a list-of-thunks style iterator. + Also add `{to,of}_seq` to several standard modules. + (Simon Cruanes, review by Alain Frisch and François Bobot) + +* GPR#1010: pack all standard library modules into a single module Stdlib + which is the default opened module (Stdlib itself includes Pervasives) to free + up the global namespace for other standard libraries, while still allowing any + OCaml standard library module to be referred to as Stdlib.Module). This is + implemented efficiently using module aliases (prefixing all modules with + Stdlib__, e.g. Stdlib__string). + (Jérémie Dimino, David Allsopp and Florian Angeletti, review by David Allsopp + and Gabriel Radanne) + +- GPR#1637: String.escaped is faster and does not allocate when called with a + string that does not contain any characters needing to be escaped. + (Alain Frisch, review by Xavier Leroy and Gabriel Scherer) + +- GPR#1638: add a Float module. + (Nicolás Ojeda Bär, review by Alain Frisch and Jeremy Yallop) + +- GPR#1697: Tune [List.init] tailrec threshold so that it does not stack overflow + when compiled with the Js_of_ocaml backend. + (Hugo Heuzard, reviewed by Gabriel Scherer) + +### Other libraries: + +- MPR#7745, GPR#1629: Graphics.open_graph displays the correct window title on + Windows again (fault introduced by 4.06 Unicode changes). + (David Allsopp) + +* GPR#1406: Unix.isatty now returns true in the native Windows ports when + passed a file descriptor connected to a Cygwin PTY. In particular, compiler + colors for the native Windows ports now work under Cygwin/MSYS2. + (Nicolás Ojeda Bär, review by Gabriel Scherer, David Allsopp, Xavier Leroy) + +- GPR#1451: [getpwuid], [getgrgid], [getpwnam], [getgrnam] now raise Unix error + instead of returning [Not_found] when interrupted by a signal. + (Arseniy Alekseyev, review by Mark Shinwell and Xavier Leroy) + +- GPR#1477: raw_spacetime_lib can now be used in bytecode. + (Nicolás Ojeda Bär, review by Mark Shinwell) + +- GPR#1533: (a) The implementation of Thread.yield for system thread + now uses nanosleep(1) for enabling better preemption. + (b) Thread.delay is now an alias for Unix.sleepf. + (Jacques-Henri Jourdan, review by Xavier Leroy and David Allsopp) + +### Compiler user-interface and warnings: + +- MPR#7663, GPR#1694: print the whole cycle and add a reference to the manual in + the unsafe recursive module evaluation error message. + (Florian Angeletti, report by Matej Košík, review by Gabriel Scherer) + +- GPR#1166: In OCAMLPARAM, an alternative separator can be specified as + first character (instead of comma) in the set ":|; ," + (Fabrice Le Fessant) + +- GPR#1358: Fix usage warnings with no mli file + (Leo White, review by Alain Frisch) + +- GPR#1428: give a non dummy location for warning 49 (no cmi found) + (Valentin Gatien-Baron) + +- GPR#1491: Improve error reporting for ill-typed applicative functor + types, F(M).t. + (Valentin Gatien-Baron, review by Florian Angeletti and Gabriel Radanne) + +- GPR#1496: Refactor the code printing explanation for unification type errors, + in order to avoid duplicating pattern matches + (Armaël Guéneau, review by Florian Angeletti and Gabriel Scherer) + +- GPR#1505: Add specific error messages for unification errors involving + functions of type "unit -> _" + (Arthur Charguéraud and Armaël Guéneau, with help from Leo White, review by + Florian Angeletti and Gabriel Radanne) + +- GPR#1510: Add specific explanation for unification errors caused by type + constraints propagated by keywords (such as if, while, for...) + (Armaël Guéneau and Gabriel Scherer, original design by Arthur Charguéraud, + review by Frédéric Bour, Gabriel Radanne and Alain Frisch) + +- GPR#1515: honor the BUILD_PATH_PREFIX_MAP environment variable + to enable reproducible builds + (Gabriel Scherer, with help from Ximin Luo, review by Damien Doligez) + +- GPR#1534: Extend the warning printed when (*) is used, adding a hint to + suggest using ( * ) instead + (Armaël Guéneau, with help and review from Florian Angeletti and Gabriel + Scherer) + +- GPR#1552, GPR#1577: do not warn about ambiguous variables in guards + (warning 57) when the ambiguous values have been filtered by + a previous clause + (Gabriel Scherer and Thomas Refis, review by Luc Maranget) + +- GPR#1554: warnings 52 and 57: fix reference to manual detailed explanation + (Florian Angeletti, review by Thomas Refis and Gabriel Scherer) + +- GPR#1618: add the -dno-unique-ids and -dunique-ids compiler flags + (Sébastien Hinderer, review by Leo White and Damien Doligez) + +- GPR#1649 change compilation order of toplevel definitions, so that some warnings + emitted by the bytecode compiler appear more in-order than before. + (Luc Maranget, advice and review by Damien Doligez) + +- GPR#1806: add linscan to OCAMLPARAM options + (Raja Boujbel) + +### Code generation and optimizations: + +- MPR#7630, GPR#1401: Faster compilation of large modules with Flambda. + (Pierre Chambart, report by Emilio Jesús Gallego Arias, + Pierre-Marie Pédrot and Paul Steckler, review by Gabriel Scherer + and Leo White) + +- MPR#7630, GPR#1455: Disable CSE for the initialization function + (Pierre Chambart, report by Emilio Jesús Gallego Arias, + review by Gabriel Scherer and Xavier Leroy) + +- GPR#1370: Fix code duplication in Cmmgen + (Vincent Laviron, with help from Pierre Chambart, + reviews by Gabriel Scherer and Luc Maranget) + +- GPR#1486: ARM 32-bit port: add support for ARMv8 in 32-bit mode, + a.k.a. AArch32. + For this platform, avoid ITE conditional instruction blocks and use + simpler IT blocks instead + (Xavier Leroy, review by Mark Shinwell) + +- GPR#1487: Treat negated float comparisons more directly + (Leo White, review by Xavier Leroy) + +- GPR#1573: emitcode: merge events after instructions reordering + (Thomas Refis and Leo White, with help from David Allsopp, review by Frédéric + Bour) + +- GPR#1606: Simplify the semantics of Lambda.free_variables and Lambda.subst, + including some API changes in bytecomp/lambda.mli + (Pierre Chambart, review by Gabriel Scherer) + +- GPR#1613: ensure that set-of-closures are processed first so that other + entries in the let-rec symbol do not get dummy approximations + (Leo White and Xavier Clerc, review by Pierre Chambart) + +* GPR#1617: Make string/bytes distinguishable in the bytecode. + (Hugo Heuzard, reviewed by Nicolás Ojeda Bär) + +- GPR#1627: Reduce cmx sizes by sharing variable names (Flambda only) + (Fuyong Quah, Leo White, review by Xavier Clerc) + +- GPR#1665: reduce the size of cmx files in classic mode by droping the + bodies of functions that will not be inlined + (Fuyong Quah, review by Leo White and Pierre Chambart) + +- GPR#1666: reduce the size of cmx files in classic mode by droping the + bodies of functions that cannot be reached from the module block + (Fuyong Quah, review by Leo White and Pierre Chambart) + +- GPR#1686: Turn off by default flambda invariants checks. + (Pierre Chambart) + +- GPR#1707: Add [Closure_origin.t] to trace inlined functions to prevent + infinite loops from repeatedly inlining copies of the same function. + (Fu Yong Quah) + +- GPR#1740: make sure startup.o is always linked in when using + "-output-complete-obj". Previously, it was always linked in only on some + platforms, making this option unusable on platforms where it wasn't + (Jérémie Dimino, review by Sébastien Hinderer and Xavier Leroy) + +### Runtime system: + +- MPR#6411, GPR#1535: don't compile everything with -static-libgcc on mingw32, + only dllbigarray.dll and libbigarray.a. Allows the use of C++ libraries which + raise exceptions. + (David Allsopp) + +- MPR#7100, GPR#1476: trigger a minor GC when custom blocks accumulate + in minor heap + (Alain Frisch, report by talex, review by Damien Doligez, Leo White, + Gabriel Scherer) + +- GPR#1431: remove ocamlrun dependencies on curses/terminfo/termcap C library + (Xavier Leroy, review by Daniel Bünzli) + +- GPR#1478: The Spacetime profiler now works under Windows (but it is not yet + able to collect profiling information from C stubs). + (Nicolás Ojeda Bär, review by Xavier Leroy, Mark Shinwell) + +- GPR#1483: fix GC freelist accounting for chunks larger than the maximum block + size. + (David Allsopp and Damien Doligez) + +- GPR#1526: install the debug and instrumented runtimes + (lib{caml,asm}run{d,i}.a) + (Gabriel Scherer, reminded by Julia Lawall) + +- GPR#1563: simplify implementation of LSRINT and ASRINT + (Max Mouratov, review by Frédéric Bour) + +- GPR#1644: remove caml_alloc_float_array from the bytecode primitives list + (it's a native code primitive) + (David Allsopp) + +- GPR#1701: fix missing root bug in GPR#1476 + (Mark Shinwell) + +- GPR#1752: do not alias function arguments to sigprocmask (Anil Madhavapeddy) + +- GPR#1753: avoid potential off-by-one overflow in debugger socket path + length (Anil Madhavapeddy) + +### Tools: + +- MPR#7643, GPR#1377: ocamldep, fix an exponential blowup in presence of nested + structures and signatures (e.g. "include struct … include(struct … end) … end") + (Florian Angeletti, review by Gabriel Scherer, report by Christophe Raffalli) + +- MPR#7687, GPR#1653: deprecate -thread option, + which is equivalent to -I +threads. + (Nicolás Ojeda Bär, report by Daniel Bünzli) + +- MPR#7710: `ocamldep -sort` should exit with nonzero code in case of + cyclic dependencies + (Xavier Leroy, report by Mantis user baileyparker) + +- GPR#1537: boot/ocamldep is no longer included in the source distribution; + boot/ocamlc -depend can be used in its place. + (Nicolás Ojeda Bär, review by Xavier Leroy and Damien Doligez) + +- GPR#1585: optimize output of "ocamllex -ml" + (Alain Frisch, review by Frédéric Bour and Gabriel Scherer) + +- GPR#1667: add command-line options -no-propt, -no-version, -no-time, + -no-breakpoint and -topdirs-path to ocamldebug + (Sébastien Hinderer, review by Damien Doligez) + +- GPR#1695: add the -null-crc command-line option to ocamlobjinfo. + (Sébastien Hinderer, review by David Allsopp and Gabriel Scherer) + +- GPR#1710: ocamldoc, improve the 'man' rendering of subscripts and + superscripts. + (Gabriel Scherer) + +- GPR#1771: ocamdebug, avoid out of bound access + (Thomas Refis) + +### Manual and documentation: + +- MPR#7613: minor reword of the "refutation cases" paragraph + (Florian Angeletti, review by Jacques Garrigue) + +- PR#7647, GPR#1384: emphasize ocaml.org website and forum in README + (Yawar Amin, review by Gabriel Scherer) + +- PR#7698, GPR#1545: improve wording in OCaml manual in several places, + mostly in Chapter 1. This addresses the easier changes suggested in the PR. + (Jim Fehrle, review by Florian Angeletti and David Allsopp) + +- GPR#1540: manual, decouple verbatim and toplevel style in code examples + (Florian Angeletti, review by Gabriel Scherer) + +- GPR#1556: manual, add a consistency test for manual references inside + the compiler source code. + (Florian Angeletti, review by Gabriel Scherer) + +- GPR#1647: manual, subsection on record and variant disambiguation + (Florian Angeletti, review by Alain Frisch and Gabriel Scherer) + +- GPR#1702: manual, add a signature mode for code examples + (Florian Angeletti, review by Gabriel Scherer) + +- GPR#1741: manual, improve typesetting and legibility in HTML output + (steinuil, review by Gabriel Scherer) + +- GPR#1757: style the html manual, changing type and layout + (Charles Chamberlain, review by Florian Angeletti, Xavier Leroy, + Gabriel Radanne, Perry E. Metzger, and Gabriel Scherer) + +- GPR#1765: manual, ellipsis in code examples + (Florian Angeletti, review and suggestion by Gabriel Scherer) + +- GPR#1767: change html manual to use relative font sizes + (Charles Chamberlain, review by Daniel Bünzli, Perry E. Metzger, + Josh Berdine, and Gabriel Scherer) + +- GPR#1779: integrate the Bigarray documentation into the main manual. + (Perry E. Metzger, review by Florian Angeletti and Xavier Clerc) + +### Type system: + +- MPR#7611, GPR#1491: reject the use of generative functors as applicative + (Valentin Gatien-Baron) + +- MPR#7706, GPR#1565: in recursive value declarations, track + static size of locally-defined variables + (Gabriel Scherer, review by Jeremy Yallop and Leo White, report by Leo White) + +- MPR#7717, GPR#1593: in recursive value declarations, don't treat + unboxed constructor size as statically known + (Jeremy Yallop, report by Pierre Chambart, review by Gabriel Scherer) + +- MPR#7767, GPR#1712: restore legacy treatment of partially-applied + labeled functions in 'let rec' bindings. + (Jeremy Yallop, report by Ivan Gotovchits, review by Gabriel Scherer) + +* MPR#7787, GPR#1652, GPR#1743: Don't remove module aliases in `module type of` + and `with module`. + The old behaviour can be obtained using the `[@remove_aliases]` attribute. + (Leo White and Thomas Refis, review by Jacques Garrigue) + +- GPR#1468: Do not enrich type_decls with incoherent manifests + (Thomas Refis and Leo White, review by Jacques Garrigue) + +- GPR#1469: Use the information from [@@immediate] annotations when + computing whether a type can be [@@unboxed] + (Damien Doligez, report by Stephan Muenzel, review by Alain Frisch) + +- GPR#1513: Allow compilation units to shadow sub-modules of Pervasives. + For instance users can now use a largeFile.ml file in their project. + (Jérémie Dimino, review by Nicolas Ojeda Bar, Alain Frisch and Gabriel Radanne) + +- GPR#1516: Allow float array construction in recursive bindings + when configured with -no-flat-float-array + (Jeremy Yallop, report by Gabriel Scherer) + +- GPR#1583: propagate refined ty_arg to Parmatch checks + (Thomas Refis, review by Jacques Garrigue) + +- GPR#1609: Changes to ambivalence scope tracking + (Thomas Refis and Leo White, review by Jacques Garrigue) + +- GPR#1628: Treat reraise and raise_notrace as nonexpansive. + (Leo White, review by Alain Frisch) + +* GPR#1778: Fix Soundness bug with non-generalized type variable and + local modules. This is the same bug as MPR#7414, but using local + modules instead of non-local ones. + (Leo White, review by Jacques Garrigue) + +### Compiler distribution build system + +- MPR#5219, GPR#1680, GPR#1877: use 'install' instead of 'cp' + in install scripts. + (Gabriel Scherer, review by Sébastien Hinderer and Valentin Gatien-Baron) + +- MPR#7679: make sure .a files are erased before calling ar rc, otherwise + leftover .a files from an earlier compilation may contain unwanted modules + (Xavier Leroy) + +- GPR#1571: do not perform architecture tests on 32-bit platforms, allowing + 64-bit back-ends to use 64-bit specific constructs + (Xavier Clerc, review by Damien Doligez) + +### Internal/compiler-libs changes: + +- MPR#7738, GPR#1624: Asmlink.reset also resets lib_ccobjs/ccopts + (Cedric Cellier, review by Gabriel Scherer) + +- GPR#1488, GPR#1560: Refreshing parmatch + (Gabriel Scherer and Thomas Refis, review by Luc Maranget) + +- GPR#1502: more command line options for expect tests + (Florian Angeletti, review by Gabriel Scherer) + +- GPR#1511: show code at error location in expect-style tests, + using new Location.show_code_at_location function + (Gabriel Scherer and Armaël Guéneau, + review by Valentin Gatien-Baron and Damien Doligez) + +- GPR#1519, GPR#1532, GRP#1570: migrate tests to ocamltest + (Sébastien Hinderer, review by Gabriel Scherer, Valentin Gatien-Baron + and Nicolás Ojeda Bär) + +- GPR#1520: more robust implementation of Misc.no_overflow_mul + (Max Mouratov, review by Xavier Leroy) + +- GPR#1557: Organise and simplify translation of primitives + (Leo White, review by François Bobot and Nicolás Ojeda Bär) + +- GPR#1567: register all idents relevant for reraise + (Thomas Refis, review by Alain Frisch and Frédéric Bour) + +- GPR#1586: testsuite: 'make promote' for ocamltest tests + (The new "-promote" option for ocamltest is experimental + and subject to change/removal). + (Gabriel Scherer) + +- GPR#1619: expect_test: print all the exceptions, even the unexpected ones + (Thomas Refis, review by Jérémie Dimino) + +- GPR#1621: expect_test: make sure to not use the installed stdlib + (Jérémie Dimino, review by Thomas Refis) + +- GPR#1646 : add ocamldoc test to ocamltest and + migrate ocamldoc tests to ocamltest + (Florian Angeletti, review by Sébastien Hinderer) + +- GPR#1663: refactor flambda specialise/inlining handling + (Leo White and Xavier Clerc, review by Pierre Chambart) + +- GPR#1679 : remove Pbittest from primitives in lambda + (Hugo Heuzard, review by Mark Shinwell) + +* GPR#1704: Make Ident.t abstract and immutable. + (Gabriel Radanne, review by Mark Shinwell) + +### Bug fixes + +- MPR#4499, GPR#1479: Use native Windows API to implement Sys.getenv, + Unix.getenv and Unix.environment under Windows. + (Nicolás Ojeda Bär, report by Alain Frisch, review by David Allsopp, Xavier + Leroy) + +- MPR#5250, GPR#1435: on Cygwin, when ocamlrun searches the path + for a bytecode executable file, skip directories and other + non-regular files, like other Unix variants do. + (Xavier Leroy) + +- MPR#6394, GPR#1425: fix fatal_error from Parmatch.get_type_path + (Virgile Prevosto, review by David Allsopp, Thomas Refis and Jacques Garrigue) + +* MPR#6604, GPR#931: Only allow directives with filename and at the beginning of + the line + (Tadeu Zagallo, report by Roberto Di Cosmo, + review by Hongbo Zhang, David Allsopp, Gabriel Scherer, Xavier Leroy) + +- MPR#7138, MPR#7701, GPR#1693: Keep documentation comments + even in empty structures and signatures + (Leo White, Florian Angeletti, report by Anton Bachin) + +- MPR#7178, MPR#7253, MPR#7796, GPR#1790: Make sure a function + registered with "at_exit" is executed only once when the program exits + (Nicolás Ojeda Bär and Xavier Leroy, review by Max Mouratov) + +- MPR#7391, GPR#1620: Do not put a dummy method in object types + (Thomas Refis, review by Jacques Garrigue) + +- PR#7660, GPR#1445: Use native Windows API to implement Unix.utimes in order to + avoid unintended shifts of the argument timestamp depending on DST setting. + (Nicolás Ojeda Bär, review by David Allsopp, Xavier Leroy) + +- MPR#7668: -principal is broken with polymorphic variants + (Jacques Garrigue, report by Jun Furuse) + +- MPR#7680, GPR#1497: Incorrect interaction between Matching.for_let and + Simplif.simplify_exits + (Alain Frisch, report and review by Vincent Laviron) + +- MPR#7682, GPR#1495: fix [@@unboxed] for records with 1 polymorphic field + (Alain Frisch, report by Stéphane Graham-Lengrand, review by Gabriel Scherer) + +- MPR#7695, GPR#1541: Fatal error: exception Ctype.Unify(_) with field override + (Jacques Garrigue, report by Nicolás Ojeda Bär) + +- MPR#7704, GPR#1564: use proper variant tag in non-exhaustiveness warning + (Jacques Garrigue, report by Thomas Refis) + +- MPR#7711, GPR#1581: Internal typechecker error triggered by a constraint on + self type in a class type + (Jacques Garrigue, report and review by Florian Angeletti) + +- MPR#7712, GPR#1576: assertion failure with type abbreviations + (Thomas Refis, report by Michael O'Connor, review by Jacques Garrigue) + +- MPR#7747: Type checker can loop infinitly and consumes all computer memory + (Jacques Garrigue, report by kantian) + +- MPR#7751, GPR#1657: The toplevel prints some concrete types as abstract + (Jacques Garrigue, report by Matej Kosik) + +- MPR#7765, GPR#1718: When unmarshaling bigarrays, protect against integer + overflows in size computations + (Xavier Leroy, report by Maximilian Tschirschnitz, + review by Gabriel Scherer) + +- MPR#7760, GPR#1713: Exact selection of lexing engine, that is + correct "Segfault in ocamllex-generated code using 'shortest'" + (Luc Maranget, Frédéric Bour, report by Stephen Dolan, + review by Gabriel Scherer) + +- MPR#7769, GPR#1714: calls to Stream.junk could, under some conditions, be + ignored when used on streams based on input channels. + (Nicolás Ojeda Bär, report by Michael Perin, review by Gabriel Scherer) + +- MPR#7793, GPR#1766: the toplevel #use directive now accepts sequences of ';;' + tokens. This fixes a bug in which certain files accepted by the compiler were + rejected by ocamldep. + (Nicolás Ojeda Bär, report by Hugo Heuzard, review by Hugo Heuzard) + +- GPR#1517: More robust handling of type variables in mcomp + (Leo White and Thomas Refis, review by Jacques Garrigue) + +- GPR#1530, GPR#1574: testsuite, fix 'make parallel' and 'make one DIR=...' + to work on ocamltest-based tests. + (Runhang Li and Sébastien Hinderer, review by Gabriel Scherer) + +- GPR#1550, GPR#1555: Make pattern matching warnings more robust + to ill-typed columns + (Thomas Refis, with help from Gabriel Scherer and Luc Maranget) + +- GPR#1614: consider all bound variables when inlining, fixing a compiler + fatal error. + (Xavier Clerc, review by Pierre Chambart, Leo White) + +- GPR#1622: fix bug in the expansion of command-line arguments under Windows + which could result in some elements of Sys.argv being truncated in some cases. + (Nicolás Ojeda Bär, review by Sébastien Hinderer) + +- GPR#1623: Segfault on Windows 64 bits when expanding wildcards in arguments. + (Marc Lasson, review by David Allsopp, Alain Frisch, Sébastien Hinderer, + Xavier Leroy, Nicolas Ojeda Bar) + +- GPR#1661: more precise principality warning regarding record fields + disambiguation + (Thomas Refis, review by Leo White) + +- GPR#1687: fix bug in the printing of short functor types "(S1 -> S2) -> S3" + (Pieter Goetschalckx, review by Gabriel Scherer) + +- GPR#1722: Scrape types in Typeopt.maybe_pointer + (Leo White, review by Thomas Refis) + +- GPR#1755: ensure that a bigarray is never collected while reading complex + values (Xavier Clerc, Mark Shinwell and Leo White, report by Chris Hardin, + reviews by Stephen Dolan and Xavier Leroy) + +- GPR#1764: in byterun/memory.c, struct pool_block, use C99 flexible arrays + if available + (Xavier Leroy, review by Max Mouratov) + +- GPR#1774: ocamlopt for ARM could generate VFP loads and stores with bad + offsets, rejected by the assembler. + (Xavier Leroy, review by Mark Shinwell) + +- GPR#1808: handle `[@inlined]` attributes under a module constraint + (Xavier Clerc, review by Leo White) + +- GPR#1810: use bit-pattern comparison when meeting float approximations + (Xavier Clerc, report by Christophe Troestler, review by Nicolás Ojeda Bär + and Gabriel Scherer) + +- GPR#1835: Fix off-by-one errors in Weak.get_copy and Weak.blit + (KC Sivaramakrishnan) + +- GPR#1849: bug in runtime function generic_final_minor_update() + that could lead to crashes when Gc.finalise_last is used + (report and fix by Yuriy Vostrikov, review by François Bobot) + + OCaml 4.06.1 (16 Feb 2018): --------------------------- @@ -460,6 +1036,9 @@ OCaml 4.06.0 (3 Nov 2017): (Florian Angeletti, review by Daniel Bünzli, Xavier Leroy and Gabriel Scherer) +- GPR#1688: Fix printing of -0. + (Nicolás Ojeda Bär, review by Jérémie Dimino) + ### Runtime system: * MPR#3771, GPR#153, GPR#1200, GPR#1357, GPR#1362, GPR#1363, GPR#1369, GPR#1398, diff --git a/HACKING.adoc b/HACKING.adoc index 9de68b54..0770d4af 100644 --- a/HACKING.adoc +++ b/HACKING.adoc @@ -236,7 +236,7 @@ found in link:INSTALL.adoc#bootstrap[INSTALL.adoc]. ==== Github's CI: Travis and AppVeyor The script that is run on Travis continuous integration servers is -link:.travis-ci.sh[]; its configuration can be found as +link:tools/ci/travis/travis-ci.sh[]; its configuration can be found as a Travis configuration file in link:.travis.yml[]. For example, if you want to reproduce the default build on your @@ -244,7 +244,7 @@ machine, you can use the configuration values and run command taken from link:.travis.yml[]: ---- -CI_KIND=build XARCH=x64 bash -ex .travis-ci.sh +CI_KIND=build XARCH=x64 bash -ex tools/ci/travis/travis-ci.sh ---- The scripts support two other kinds of tests (values of the @@ -274,11 +274,11 @@ You do not need to be an INRIA employee to open an account on this jenkins service; anyone can create an account there to access build logs and manually restart builds. If you would like to do this but have trouble doing it, please email -ocaml-ci-admin@inria.fr +ocaml-ci-admin@inria.fr. To be notified by email of build failures, you can subscribe to the ocaml-ci-notifications@inria.fr mailing list by visiting -https://sympa.inria.fr/sympa/info/ocaml-ci-notifications[its web page] +https://sympa.inria.fr/sympa/info/ocaml-ci-notifications[its web page.] ==== Running INRIA's CI on a publicly available git branch @@ -307,4 +307,25 @@ OCaml's GitHub repository and then push "mybranch" to your fork. 7. You should receive a bunch of e-mails with the build logs for each slave and each tested configuration (with and without flambda) attached. +==== Changing what the CI does + +INRIA's CI "main" and "precheck" jobs run the script +tools/ci-build. In particular, when running the CI on a publicly +available branch via the "precheck" job as explained in the previous +section, you can edit this script to change what the CI will test. + +For instance, parallel builds are only tested for the "trunk" +branch. In order to use "precheck" to test parallel build on a custom +branch, add this at the beginning of tools/ci-build: + +---- +OCAML_JOBS=10 +---- + +=== The `caml-commits` mailing list + +If you would like to receive email notifications of all commits made to the main +git repository, you can subscribe to the caml-commits@inria.fr mailing list by +visiting https://sympa.inria.fr/sympa/info/caml-commits[its web page.] + Happy Hacking! diff --git a/INSTALL.adoc b/INSTALL.adoc index 03a00b62..bf3990fd 100644 --- a/INSTALL.adoc +++ b/INSTALL.adoc @@ -198,8 +198,15 @@ for guidance on how to edit the generated files by hand. 2. From the top directory, do: + make world.opt ++ +if your platform is supported by the native-code compiler (as reported during + the auto-configuration), or + make world + +if not. + This builds the OCaml bytecode compiler for the first time. This phase is fairly verbose; consider redirecting the output to a file: @@ -226,38 +233,7 @@ best thing to do is to try a second bootstrapping phase: just do `make bootstrap` again. It will either crash almost immediately, or re-re-compile everything correctly and reach the fix-point. -4. If your platform is supported by the native-code compiler (as reported during - the auto-configuration), you can now build the native-code compiler. From - the top directory, do: - - make opt -+ -or: - - make opt > log.opt 2>&1 # in sh - make opt >& log.opt # in csh - -5. anchor:step-5[] Compile fast versions of the OCaml compilers, by compiling - them with the native-code compiler (you will have only compiled them to - bytecode in steps 2-4). Just do: - - make opt.opt -+ -Later, you can compile your programs to bytecode using ocamlc.opt instead of -ocamlc, and to native-code using ocamlopt.opt instead of ocamlopt. The ".opt" -compilers should run faster than the normal compilers, especially on large input -files, but they may take longer to start due to increased code size. If -compilation times are an issue on your programs, try the ".opt" compilers to see -if they make a significant difference. -+ -An alternative, and faster approach to steps 2 to 5 is - - make world.opt # to build using native-code compilers -+ -The result is equivalent to `make world opt opt.opt`, but this may fail if -anything goes wrong in native-code generation. - -6. You can now install the OCaml system. This will create the following commands +4. You can now install the OCaml system. This will create the following commands (in the binary directory selected during autoconfiguration): + [width="70%",frame="topbot",cols="25%,75%"] @@ -277,20 +253,17 @@ anything goes wrong in native-code generation. | `ocamlcp` | the bytecode compiler in profiling mode |=============================================================================== + -and also, if you built them during <>: `ocamlc.opt`, -`ocamlopt.opt`, `ocamllex.opt`, `ocamldep.opt` and `ocamldoc.opt` -+ From the top directory, become superuser and do: umask 022 # make sure to give read & execute permission to all make install -7. Installation is complete. Time to clean up. From the toplevel directory, +5. Installation is complete. Time to clean up. From the toplevel directory, do: make clean -8. (Optional) The `emacs/` subdirectory contains Emacs-Lisp files for an OCaml +6. (Optional) The `emacs/` subdirectory contains Emacs-Lisp files for an OCaml editing mode and an interface for the debugger. To install these files, change to the `emacs/` subdirectory and do: @@ -303,7 +276,7 @@ or In the latter case, the destination directory defaults to the `site-lisp` directory of your Emacs installation. -9. After installation, do *not* strip the `ocamldebug` and `ocamlbrowser` +7. After installation, do *not* strip the `ocamldebug` and `ocamlbrowser` executables. These are mixed-mode executables (containing both compiled C code and OCaml bytecode) and stripping erases the bytecode! Other executables such as `ocamlrun` can safely be stripped. diff --git a/Makefile b/Makefile index 0875c099..1b4bec8e 100644 --- a/Makefile +++ b/Makefile @@ -16,16 +16,22 @@ # The main Makefile # Hard bootstrap how-to: -# (only necessary in some cases, for example if you remove some primitive) +# (only necessary if you remove or rename some primitive) # -# make coreboot [old system -- you were in a stable state] -# -# make clean runtime coreall -# -# make clean runtime coreall +# make core [old system -- you were in a stable state] +# make coreboot [optional -- check state stability] +# +# make clean && make core +# if the above fails: +# +# make clean && make core +# make coreboot [intermediate state with both old and new primitives] +# +# make clean && make runtime && make coreall # make coreboot [new system -- now in a stable state] include config/Makefile +include Makefile.common # For users who don't read the INSTALL file .PHONY: defaultentry @@ -72,7 +78,7 @@ endif YACCFLAGS=-v --strict CAMLLEX=$(CAMLRUN) boot/ocamllex -CAMLDEP=$(CAMLRUN) tools/ocamldep +CAMLDEP=$(CAMLRUN) boot/ocamlc -depend DEPFLAGS=$(INCLUDES) OCAMLDOC_OPT=$(WITH_OCAMLDOC:=.opt) @@ -83,6 +89,7 @@ UTILS=utils/config.cmo utils/misc.cmo \ utils/terminfo.cmo utils/ccomp.cmo utils/warnings.cmo \ utils/consistbl.cmo \ utils/strongly_connected_components.cmo \ + utils/build_path_prefix_map.cmo \ utils/targetint.cmo PARSING=parsing/location.cmo parsing/longident.cmo \ @@ -104,16 +111,15 @@ TYPING=typing/ident.cmo typing/path.cmo \ typing/typedtreeIter.cmo typing/typedtreeMap.cmo \ typing/tast_mapper.cmo \ typing/cmt_format.cmo typing/untypeast.cmo \ - typing/includemod.cmo typing/typetexp.cmo typing/parmatch.cmo \ - typing/stypes.cmo typing/typedecl.cmo typing/typeopt.cmo typing/typecore.cmo \ - typing/typeclass.cmo \ - typing/typemod.cmo + typing/includemod.cmo typing/typetexp.cmo typing/printpat.cmo \ + typing/parmatch.cmo typing/stypes.cmo typing/typedecl.cmo typing/typeopt.cmo \ + typing/typecore.cmo typing/typeclass.cmo typing/typemod.cmo COMP=bytecomp/lambda.cmo bytecomp/printlambda.cmo \ bytecomp/semantics_of_primitives.cmo \ bytecomp/switch.cmo bytecomp/matching.cmo \ bytecomp/translobj.cmo bytecomp/translattribute.cmo \ - bytecomp/translcore.cmo \ + bytecomp/translprim.cmo bytecomp/translcore.cmo \ bytecomp/translclass.cmo bytecomp/translmod.cmo \ bytecomp/simplif.cmo bytecomp/runtimedef.cmo \ bytecomp/meta.cmo bytecomp/opcodes.cmo \ @@ -161,6 +167,7 @@ ASMCOMP=\ asmcomp/export_info_for_pack.cmo \ asmcomp/compilenv.cmo \ asmcomp/closure.cmo \ + asmcomp/traverse_for_exported_symbols.cmo \ asmcomp/build_export_info.cmo \ asmcomp/closure_offsets.cmo \ asmcomp/flambda_to_clambda.cmo \ @@ -193,6 +200,7 @@ MIDDLE_END=\ middle_end/base_types/tag.cmo \ middle_end/base_types/linkage_name.cmo \ middle_end/base_types/compilation_unit.cmo \ + middle_end/internal_variable_names.cmo \ middle_end/base_types/variable.cmo \ middle_end/base_types/mutable_variable.cmo \ middle_end/base_types/id_types.cmo \ @@ -200,6 +208,7 @@ MIDDLE_END=\ middle_end/base_types/set_of_closures_origin.cmo \ middle_end/base_types/closure_element.cmo \ middle_end/base_types/closure_id.cmo \ + middle_end/base_types/closure_origin.cmo \ middle_end/base_types/var_within_closure.cmo \ middle_end/base_types/static_exception.cmo \ middle_end/base_types/export_id.cmo \ @@ -269,19 +278,14 @@ MAXSAVED=boot/Saved/Saved.prev/Saved.prev/Saved.prev/Saved.prev/Saved.prev COMPLIBDIR=$(LIBDIR)/compiler-libs -INSTALL_BINDIR=$(DESTDIR)$(BINDIR) -INSTALL_LIBDIR=$(DESTDIR)$(LIBDIR) -INSTALL_COMPLIBDIR=$(DESTDIR)$(COMPLIBDIR) -INSTALL_STUBLIBDIR=$(DESTDIR)$(STUBLIBDIR) -INSTALL_MANDIR=$(DESTDIR)$(MANDIR) -INSTALL_FLEXDLL=$(INSTALL_LIBDIR)/flexdll - +TOPINCLUDES=$(addprefix -I otherlibs/,$(filter-out %threads,$(OTHERLIBRARIES))) RUNTOP=./byterun/ocamlrun ./ocaml \ -nostdlib -I stdlib \ - -noinit $(TOPFLAGS) \ - -I otherlibs/$(UNIXLIB) -NATRUNTOP=./ocamlnat$(EXE) -nostdlib -I stdlib -noinit $(TOPFLAGS) -ifeq "UNIX_OR_WIN32" "unix" + -noinit $(TOPFLAGS) $(TOPINCLUDES) +NATRUNTOP=./ocamlnat$(EXE) \ + -nostdlib -I stdlib \ + -noinit $(TOPFLAGS) $(TOPINCLUDES) +ifeq "$(UNIX_OR_WIN32)" "unix" EXTRAPATH= else EXTRAPATH = PATH="otherlibs/win32unix:$(PATH)" @@ -332,6 +336,7 @@ utils/config.ml: utils/config.mlp config/Makefile Makefile $(call SUBST,EXT_LIB) \ $(call SUBST,EXT_OBJ) \ $(call SUBST,FLAMBDA) \ + $(call SUBST,WITH_FLAMBDA_INVARIANTS) \ $(call SUBST,FLEXLINK_FLAGS) \ $(call SUBST_QUOTE,FLEXDLL_DIR) \ $(call SUBST,HOST) \ @@ -399,11 +404,7 @@ coreall: # Build the core system: the minimum needed to make depend and bootstrap .PHONY: core core: -ifeq "$(UNIX_OR_WIN32)" "unix" $(MAKE) coldstart -else # Windows, to be fixed! - $(MAKE) runtime -endif $(MAKE) coreall # Save the current bootstrap compiler @@ -415,7 +416,7 @@ backup: mkdir boot/Saved mv boot/Saved.prev boot/Saved/Saved.prev cp boot/ocamlrun$(EXE) boot/Saved - cd boot; mv ocamlc ocamllex ocamlyacc$(EXE) ocamldep Saved + cd boot; mv ocamlc ocamllex ocamlyacc$(EXE) Saved cd boot; cp $(LIBFILES) Saved # Restore the saved bootstrap compiler if a problem arises @@ -427,8 +428,7 @@ restore: .PHONY: compare compare: @if $(CAMLRUN) tools/cmpbyt boot/ocamlc ocamlc \ - && $(CAMLRUN) tools/cmpbyt boot/ocamllex lex/ocamllex \ - && $(CAMLRUN) tools/cmpbyt boot/ocamldep tools/ocamldep; \ + && $(CAMLRUN) tools/cmpbyt boot/ocamllex lex/ocamllex; \ then echo "Fixpoint reached, bootstrap succeeded."; \ else echo "Fixpoint not reached, try one more bootstrapping cycle."; \ fi @@ -440,7 +440,6 @@ promote-cross: $(CAMLRUN) tools/stripdebug ocamlc boot/ocamlc $(CAMLRUN) tools/stripdebug lex/ocamllex boot/ocamllex cp yacc/ocamlyacc$(EXE) boot/ocamlyacc$(EXE) - $(CAMLRUN) tools/stripdebug tools/ocamldep boot/ocamldep cd stdlib; cp $(LIBFILES) ../boot # Promote the newly compiled system to the rank of bootstrap compiler @@ -462,19 +461,13 @@ opt-core: runtimeopt .PHONY: opt opt: -ifeq "$(UNIX_OR_WIN32)" "unix" $(MAKE) runtimeopt $(MAKE) ocamlopt $(MAKE) libraryopt $(MAKE) otherlibrariesopt ocamltoolsopt -else - $(MAKE) opt-core - $(MAKE) otherlibrariesopt ocamltoolsopt -endif # Native-code versions of the tools .PHONY: opt.opt -ifeq "$(UNIX_OR_WIN32)" "unix" opt.opt: $(MAKE) checkstack $(MAKE) runtime @@ -487,23 +480,6 @@ opt.opt: $(MAKE) otherlibrariesopt $(MAKE) ocamllex.opt ocamltoolsopt ocamltoolsopt.opt $(OCAMLDOC_OPT) \ ocamltest.opt -else -opt.opt: core opt-core ocamlc.opt all ocamlopt.opt ocamllex.opt \ - ocamltoolsopt ocamltoolsopt.opt otherlibrariesopt $(OCAMLDOC_OPT) \ - ocamltest.opt -endif - -.PHONY: base.opt -base.opt: - $(MAKE) checkstack - $(MAKE) runtime - $(MAKE) core - $(MAKE) ocaml - $(MAKE) opt-core - $(MAKE) ocamlc.opt - $(MAKE) otherlibraries $(WITH_DEBUGGER) $(WITH_OCAMLDOC) ocamltest - $(MAKE) ocamlopt.opt - $(MAKE) otherlibrariesopt # Core bootstrapping cycle .PHONY: coreboot @@ -602,17 +578,20 @@ flexlink.opt: mv flexlink.exe flexlink.opt && \ mv flexlink flexlink.exe +INSTALL_COMPLIBDIR=$(DESTDIR)$(COMPLIBDIR) +INSTALL_FLEXDLLDIR=$(INSTALL_LIBDIR)/flexdll + .PHONY: install-flexdll install-flexdll: cat stdlib/camlheader flexdll/flexlink.exe > \ "$(INSTALL_BINDIR)/flexlink.exe" ifneq "$(filter-out mingw,$(TOOLCHAIN))" "" - cp flexdll/default$(filter-out _i386,_$(ARCH)).manifest \ + $(INSTALL_DATA) flexdll/default$(filter-out _i386,_$(ARCH)).manifest \ "$(INSTALL_BINDIR)/" endif if test -n "$(wildcard flexdll/flexdll_*.$(O))" ; then \ - $(MKDIR) "$(INSTALL_FLEXDLL)" ; \ - cp flexdll/flexdll_*.$(O) "$(INSTALL_FLEXDLL)" ; \ + $(MKDIR) "$(INSTALL_FLEXDLLDIR)" ; \ + $(INSTALL_DATA) flexdll/flexdll_*.$(O) "$(INSTALL_FLEXDLLDIR)" ; \ fi # Installation @@ -622,26 +601,33 @@ install: $(MKDIR) "$(INSTALL_LIBDIR)" $(MKDIR) "$(INSTALL_STUBLIBDIR)" $(MKDIR) "$(INSTALL_COMPLIBDIR)" - cp VERSION "$(INSTALL_LIBDIR)" + $(INSTALL_DATA) \ + VERSION \ + "$(INSTALL_LIBDIR)" $(MAKE) -C byterun install - cp ocaml "$(INSTALL_BINDIR)/ocaml$(EXE)" - cp ocamlc "$(INSTALL_BINDIR)/ocamlc.byte$(EXE)" + $(INSTALL_PROG) ocaml "$(INSTALL_BINDIR)/ocaml$(EXE)" + $(INSTALL_PROG) ocamlc "$(INSTALL_BINDIR)/ocamlc.byte$(EXE)" $(MAKE) -C stdlib install - cp lex/ocamllex "$(INSTALL_BINDIR)/ocamllex.byte$(EXE)" - cp yacc/ocamlyacc$(EXE) "$(INSTALL_BINDIR)/ocamlyacc$(EXE)" - cp utils/*.cmi utils/*.cmt utils/*.cmti utils/*.mli \ + $(INSTALL_PROG) lex/ocamllex "$(INSTALL_BINDIR)/ocamllex.byte$(EXE)" + $(INSTALL_PROG) yacc/ocamlyacc$(EXE) "$(INSTALL_BINDIR)/ocamlyacc$(EXE)" + $(INSTALL_DATA) \ + utils/*.cmi utils/*.cmt utils/*.cmti utils/*.mli \ parsing/*.cmi parsing/*.cmt parsing/*.cmti parsing/*.mli \ typing/*.cmi typing/*.cmt typing/*.cmti typing/*.mli \ bytecomp/*.cmi bytecomp/*.cmt bytecomp/*.cmti bytecomp/*.mli \ driver/*.cmi driver/*.cmt driver/*.cmti driver/*.mli \ toplevel/*.cmi toplevel/*.cmt toplevel/*.cmti toplevel/*.mli \ "$(INSTALL_COMPLIBDIR)" - cp compilerlibs/ocamlcommon.cma compilerlibs/ocamlbytecomp.cma \ + $(INSTALL_DATA) \ + compilerlibs/ocamlcommon.cma compilerlibs/ocamlbytecomp.cma \ compilerlibs/ocamltoplevel.cma $(BYTESTART) $(TOPLEVELSTART) \ "$(INSTALL_COMPLIBDIR)" - cp expunge "$(INSTALL_LIBDIR)/expunge$(EXE)" - cp toplevel/topdirs.cmi toplevel/topdirs.cmt toplevel/topdirs.cmti \ - toplevel/topdirs.mli "$(INSTALL_LIBDIR)" + $(INSTALL_PROG) expunge "$(INSTALL_LIBDIR)/expunge$(EXE)" + $(INSTALL_DATA) \ + toplevel/topdirs.cmi \ + toplevel/topdirs.cmt toplevel/topdirs.cmti \ + toplevel/topdirs.mli \ + "$(INSTALL_LIBDIR)" $(MAKE) -C tools install ifeq "$(UNIX_OR_WIN32)" "unix" # Install manual pages only on Unix $(MKDIR) "$(INSTALL_MANDIR)/man$(PROGRAMS_MAN_SECTION)" @@ -665,7 +651,7 @@ ifeq "$(UNIX_OR_WIN32)" "win32" $(MAKE) install-flexdll; \ fi endif - cp config/Makefile "$(INSTALL_LIBDIR)/Makefile.config" + $(INSTALL_DATA) config/Makefile "$(INSTALL_LIBDIR)/Makefile.config" if test -f ocamlopt; then $(MAKE) installopt; else \ cd "$(INSTALL_BINDIR)"; \ $(LN) ocamlc.byte$(EXE) ocamlc$(EXE); \ @@ -676,17 +662,26 @@ endif .PHONY: installopt installopt: $(MAKE) -C asmrun install - cp ocamlopt "$(INSTALL_BINDIR)/ocamlopt.byte$(EXE)" + $(INSTALL_PROG) ocamlopt "$(INSTALL_BINDIR)/ocamlopt.byte$(EXE)" $(MAKE) -C stdlib installopt - cp middle_end/*.cmi middle_end/*.cmt middle_end/*.cmti \ + $(INSTALL_DATA) \ + middle_end/*.cmi \ + middle_end/*.cmt middle_end/*.cmti \ middle_end/*.mli \ - "$(INSTALL_COMPLIBDIR)" - cp middle_end/base_types/*.cmi middle_end/base_types/*.cmt \ - middle_end/base_types/*.cmti middle_end/base_types/*.mli \ - "$(INSTALL_COMPLIBDIR)" - cp asmcomp/*.cmi asmcomp/*.cmt asmcomp/*.cmti asmcomp/*.mli \ - "$(INSTALL_COMPLIBDIR)" - cp compilerlibs/ocamloptcomp.cma $(OPTSTART) "$(INSTALL_COMPLIBDIR)" + "$(INSTALL_COMPLIBDIR)" + $(INSTALL_DATA) \ + middle_end/base_types/*.cmi \ + middle_end/base_types/*.cmt middle_end/base_types/*.cmti \ + middle_end/base_types/*.mli \ + "$(INSTALL_COMPLIBDIR)" + $(INSTALL_DATA) \ + asmcomp/*.cmi \ + asmcomp/*.cmt asmcomp/*.cmti \ + asmcomp/*.mli \ + "$(INSTALL_COMPLIBDIR)" + $(INSTALL_DATA) \ + compilerlibs/ocamloptcomp.cma $(OPTSTART) \ + "$(INSTALL_COMPLIBDIR)" if test -n "$(WITH_OCAMLDOC)"; then \ $(MAKE) -C ocamldoc installopt; \ fi @@ -701,30 +696,39 @@ installopt: fi $(MAKE) -C tools installopt if test -f ocamlopt.opt -a -f flexdll/flexlink.opt ; then \ - cp -f flexdll/flexlink.opt "$(INSTALL_BINDIR)/flexlink$(EXE)" ; \ + $(INSTALL_PROG) \ + flexdll/flexlink.opt "$(INSTALL_BINDIR)/flexlink$(EXE)" ; \ fi .PHONY: installoptopt installoptopt: - cp ocamlc.opt "$(INSTALL_BINDIR)/ocamlc.opt$(EXE)" - cp ocamlopt.opt "$(INSTALL_BINDIR)/ocamlopt.opt$(EXE)" - cp lex/ocamllex.opt "$(INSTALL_BINDIR)/ocamllex.opt$(EXE)" + $(INSTALL_PROG) ocamlc.opt "$(INSTALL_BINDIR)/ocamlc.opt$(EXE)" + $(INSTALL_PROG) ocamlopt.opt "$(INSTALL_BINDIR)/ocamlopt.opt$(EXE)" + $(INSTALL_PROG) \ + lex/ocamllex.opt "$(INSTALL_BINDIR)/ocamllex.opt$(EXE)" cd "$(INSTALL_BINDIR)"; \ $(LN) ocamlc.opt$(EXE) ocamlc$(EXE); \ $(LN) ocamlopt.opt$(EXE) ocamlopt$(EXE); \ $(LN) ocamllex.opt$(EXE) ocamllex$(EXE) - cp utils/*.cmx parsing/*.cmx typing/*.cmx bytecomp/*.cmx \ - driver/*.cmx asmcomp/*.cmx "$(INSTALL_COMPLIBDIR)" - cp compilerlibs/ocamlcommon.cmxa compilerlibs/ocamlcommon.$(A) \ + $(INSTALL_DATA) \ + utils/*.cmx parsing/*.cmx typing/*.cmx bytecomp/*.cmx \ + driver/*.cmx asmcomp/*.cmx \ + "$(INSTALL_COMPLIBDIR)" + $(INSTALL_DATA) \ + compilerlibs/ocamlcommon.cmxa compilerlibs/ocamlcommon.$(A) \ compilerlibs/ocamlbytecomp.cmxa compilerlibs/ocamlbytecomp.$(A) \ - compilerlibs/ocamloptcomp.cmxa compilerlibs/ocamloptcomp.$(A) \ + compilerlibs/ocamloptcomp.cmxa compilerlibs/ocamloptcomp.$(A) \ $(BYTESTART:.cmo=.cmx) $(BYTESTART:.cmo=.$(O)) \ $(OPTSTART:.cmo=.cmx) $(OPTSTART:.cmo=.$(O)) \ "$(INSTALL_COMPLIBDIR)" if test -f ocamlnat$(EXE) ; then \ - cp ocamlnat$(EXE) "$(INSTALL_BINDIR)/ocamlnat$(EXE)"; \ - cp toplevel/opttopdirs.cmi "$(INSTALL_LIBDIR)"; \ - cp compilerlibs/ocamlopttoplevel.cmxa \ + $(INSTALL_PROG) \ + ocamlnat$(EXE) "$(INSTALL_BINDIR)/ocamlnat$(EXE)"; \ + $(INSTALL_DATA) \ + toplevel/opttopdirs.cmi \ + "$(INSTALL_LIBDIR)"; \ + $(INSTALL_DATA) \ + compilerlibs/ocamlopttoplevel.cmxa \ compilerlibs/ocamlopttoplevel.$(A) \ $(OPTTOPLEVELSTART:.cmo=.cmx) $(OPTTOPLEVELSTART:.cmo=.$(O)) \ "$(INSTALL_COMPLIBDIR)"; \ @@ -735,9 +739,11 @@ installoptopt: # Installation of the *.ml sources of compiler-libs .PHONY: install-compiler-sources install-compiler-sources: - cp utils/*.ml parsing/*.ml typing/*.ml bytecomp/*.ml driver/*.ml \ + $(INSTALL_DATA) \ + utils/*.ml parsing/*.ml typing/*.ml bytecomp/*.ml driver/*.ml \ toplevel/*.ml middle_end/*.ml middle_end/base_types/*.ml \ - asmcomp/*.ml $(INSTALL_COMPLIBDIR) + asmcomp/*.ml \ + "$(INSTALL_COMPLIBDIR)" # Run all tests @@ -819,22 +825,17 @@ partialclean:: .PHONY: runtop runtop: -ifeq "$(UNIX_OR_WIN32)" "unix" - $(MAKE) runtime - $(MAKE) coreall - $(MAKE) ocaml -else - $(MAKE) core + $(MAKE) coldstart + $(MAKE) ocamlc + $(MAKE) otherlibraries $(MAKE) ocaml -endif @rlwrap --help 2>/dev/null && $(EXTRAPATH) rlwrap $(RUNTOP) ||\ $(EXTRAPATH) $(RUNTOP) .PHONY: natruntop natruntop: - $(MAKE) runtime - $(MAKE) coreall - $(MAKE) opt.opt + $(MAKE) core + $(MAKE) opt $(MAKE) ocamlnat @rlwrap --help 2>/dev/null && $(EXTRAPATH) rlwrap $(NATRUNTOP) ||\ $(EXTRAPATH) $(NATRUNTOP) @@ -983,7 +984,8 @@ clean:: otherlibs_all := bigarray dynlink graph raw_spacetime_lib \ str systhreads threads unix win32graph win32unix subdirs := asmrun byterun debugger lex ocamldoc ocamltest stdlib tools \ - $(addprefix otherlibs/, $(otherlibs_all)) + $(addprefix otherlibs/, $(otherlibs_all)) \ + ocamldoc/stdlib_non_prefixed .PHONY: alldepend ifeq "$(TOOLCHAIN)" "msvc" @@ -1111,15 +1113,16 @@ ocamldebugger: ocamlc ocamlyacc ocamllex otherlibraries partialclean:: $(MAKE) -C debugger clean -# Check that the stack limit is reasonable. -ifeq "$(UNIX_OR_WIN32)" "unix" +# Check that the stack limit is reasonable (Unix-only) .PHONY: checkstack checkstack: +ifeq "$(UNIX_OR_WIN32)" "unix" if $(MKEXE) $(OUTPUTEXE)tools/checkstack$(EXE) tools/checkstack.c; \ then tools/checkstack$(EXE); \ - else :; \ fi rm -f tools/checkstack$(EXE) +else + @ endif # Lint @since and @deprecated annotations @@ -1137,19 +1140,6 @@ lintapidiff: clean:: cd testsuite; $(MAKE) clean -# Make MacOS X package -ifeq "$(UNIX_OR_WIN32)" "unix" -.PHONY: package-macosx -package-macosx: - sudo rm -rf package-macosx/root - $(MAKE) PREFIX="`pwd`"/package-macosx/root install - tools/make-package-macosx - sudo rm -rf package-macosx/root - -clean:: - rm -rf package-macosx/*.pkg package-macosx/*.dmg -endif - # The middle end (whose .cma library is currently only used for linking # the "ocamlobjinfo" program, since we cannot depend on the whole native code # compiler for "make world" and the list of dependencies for @@ -1205,11 +1195,15 @@ check_arch: .PHONY: check_all_arches check_all_arches: +ifneq ($(shell grep -E '^\#define ARCH_SIXTYFOUR$$' byterun/caml/m.h 2> /dev/null),) @STATUS=0; \ for i in $(ARCHES); do \ $(MAKE) --no-print-directory check_arch ARCH=$$i || STATUS=1; \ done; \ exit $$STATUS +else + @echo "Architecture tests are disabled on 32-bit platforms." +endif # Compiler Plugins @@ -1321,6 +1315,6 @@ distclean: clean rm -f config/Makefile byterun/caml/m.h byterun/caml/s.h rm -f tools/*.bak rm -f ocaml ocamlc - rm -f testsuite/_log + rm -f testsuite/_log* include .depend diff --git a/Makefile.common b/Makefile.common new file mode 100644 index 00000000..cd9f676b --- /dev/null +++ b/Makefile.common @@ -0,0 +1,30 @@ +#************************************************************************** +#* * +#* OCaml * +#* * +#* Gabriel Scherer, projet Parsifal, INRIA Saclay * +#* * +#* Copyright 2018 Institut National de Recherche en Informatique et * +#* en Automatique. * +#* * +#* All rights reserved. This file is distributed under the terms of * +#* the GNU Lesser General Public License version 2.1, with the * +#* special exception on linking described in the file LICENSE. * +#* * +#************************************************************************** + +# This makefile contains common definitions shared by other Makefiles +# We assume that config/Makefile has already been included + +INSTALL ?= install +INSTALL_DATA ?= $(INSTALL) -m u=rw,g=rw,o=r +INSTALL_PROG ?= $(INSTALL) -m u=rwx,g=rwx,o=rx + +# note: these are defined by lazy expansions +# as some parts of the makefiles change BINDIR, etc. +# and expect INSTALL_BINDIR, etc. to stay in synch +# (see `shellquote` in tools/Makefile) +INSTALL_BINDIR = $(DESTDIR)$(BINDIR) +INSTALL_LIBDIR = $(DESTDIR)$(LIBDIR) +INSTALL_STUBLIBDIR = $(DESTDIR)$(STUBLIBDIR) +INSTALL_MANDIR = $(DESTDIR)$(MANDIR) diff --git a/README.adoc b/README.adoc index 34ca38a0..74d1ec25 100644 --- a/README.adoc +++ b/README.adoc @@ -1,8 +1,10 @@ |===== -| Branch `trunk` | Branch `4.05` | Branch `4.04` +| Branch `trunk` | Branch `4.06` | Branch `4.05` | Branch `4.04` | image:https://travis-ci.org/ocaml/ocaml.svg?branch=trunk["TravisCI Build Status (trunk branch)",link="https://travis-ci.org/ocaml/ocaml"] image:https://ci.appveyor.com/api/projects/status/github/ocaml/ocaml?branch=trunk&svg=true["AppVeyor Build Status (trunk branch)",link="https://ci.appveyor.com/project/avsm/ocaml"] +| image:https://travis-ci.org/ocaml/ocaml.svg?branch=4.06["TravisCI Build Status (4.06 branch)",link="https://travis-ci.org/ocaml/ocaml"] + image:https://ci.appveyor.com/api/projects/status/github/ocaml/ocaml?branch=4.06&svg=true["AppVeyor Build Status (4.06 branch)",link="https://ci.appveyor.com/project/avsm/ocaml"] | image:https://travis-ci.org/ocaml/ocaml.svg?branch=4.05["TravisCI Build Status (4.05 branch)",link="https://travis-ci.org/ocaml/ocaml"] image:https://ci.appveyor.com/api/projects/status/github/ocaml/ocaml?branch=4.05&svg=true["AppVeyor Build Status (4.05 branch)",link="https://ci.appveyor.com/project/avsm/ocaml"] | image:https://travis-ci.org/ocaml/ocaml.svg?branch=4.04["TravisCI Build Status (4.04 branch)",link="https://travis-ci.org/ocaml/ocaml"] @@ -73,45 +75,37 @@ Windows, see link:README.win32.adoc[]. The OCaml manual is distributed in HTML, PDF, Postscript, DVI, and Emacs Info files. It is available at -http://caml.inria.fr/ - -The community also maintains the Web site http://ocaml.org, with tutorials -and other useful information for OCaml users. +http://caml.inria.fr/pub/docs/manual-ocaml/ == Availability The complete OCaml distribution can be accessed at -http://caml.inria.fr/ +http://ocaml.org/docs/install.html == Keeping in Touch with the Caml Community -There exists a mailing list of users of the OCaml implementations developed -at INRIA. The purpose of this list is to share experience, exchange ideas -(and even code), and report on applications of the OCaml language. Messages -can be written in English or in French. The list has more than 1000 -subscribers. - -Messages to the list should be sent to: +The OCaml mailing list is the longest-running forum for OCaml users. +You can email it at mailto:caml-list@inria.fr[] -You can subscribe to this list via the Web interface at +You can subscribe and access list archives via the Web interface at https://sympa.inria.fr/sympa/subscribe/caml-list -Archives of the list are available on the Web site above. +You can also access a newer discussion forum at -The Usenet news `groups comp.lang.ml` and `comp.lang.functional` also -contains discussions about the ML family of programming languages, including -OCaml. +https://discuss.ocaml.org/ -The IRC channel `#ocaml` on https://freenode.net/[Freenode] also has several -hundred users and welcomes questions. +There also exist other mailing lists, chat channels, and various other forums +around the internet for getting in touch with the OCaml and ML family language +community. These can be accessed at -The OCaml Community website is +http://ocaml.org/community/ -http://ocaml.org/ +In particular, the IRC channel `#ocaml` on https://freenode.net/[Freenode] has a +long history and welcomes questions. == Bug Reports and User Feedback diff --git a/README.win32.adoc b/README.win32.adoc index 57ef54fa..8d9d110b 100644 --- a/README.win32.adoc +++ b/README.win32.adoc @@ -188,7 +188,7 @@ the top-level of the OCaml distribution by running: eval $(tools/msvs-promote-path) -If you forget to do this, `make world` will fail relatively +If you forget to do this, `make world.opt` will fail relatively quickly as it will be unable to link `ocamlrun`. Now run: @@ -210,7 +210,8 @@ which indicates where to install everything. Finally, use `make` to build the system, e.g. - make world bootstrap opt opt.opt install + make world.opt + make install After installing, it is not necessary to keep the Cygwin installation (although you may require it to build additional third party libraries and tools). You @@ -281,7 +282,8 @@ which indicates where to install everything. Finally, use `make` to build the system, e.g. - make world bootstrap opt opt.opt install + make world.opt + make install After installing, you will need to ensure that `ocamlopt` (or `ocamlc -custom`) can access the C compiler. You can do this either by using OCaml from Cygwin's @@ -292,12 +294,6 @@ bash or by adding Cygwin's bin directory (e.g. `C:\cygwin\bin`) to your `PATH`. * The replay debugger is partially supported (no reverse execution). -* The default `config/Makefile.mingw` and `config/Makefile.mingw64` pass - `-static-libgcc` to the linker. For more information on this topic: - - - http://gcc.gnu.org/onlinedocs/gcc-4.9.1/gcc/Link-Options.html#Link-Options - - http://caml.inria.fr/mantis/view.php?id=6411 - [[seflexdll]] == FlexDLL Although the core of FlexDLL is necessarily written in C, the `flexlink` program @@ -327,7 +323,10 @@ done in one of three ways: OCaml is then compiled as normal for the port you require, except that before compiling `world`, you must compile `flexdll`, i.e.: - make flexdll world [bootstrap] opt opt.opt flexlink.opt install + make flexdll + make world.opt + make flexlink.opt + make install * You should ignore the error messages that say ocamlopt was not found. * `make install` will install FlexDLL by placing `flexlink.exe` diff --git a/VERSION b/VERSION index 33d444c9..6d514477 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -4.06.1 +4.07.0 # The version string is the first line of this file. # It must be in the format described in stdlib/sys.mli diff --git a/appveyor.yml b/appveyor.yml index baa20dac..c07c60f6 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -35,10 +35,10 @@ cache: install: # This is a hangover from monitoring effects of MPR#7452 - wmic cpu get name - - call "%APPVEYOR_BUILD_FOLDER%\appveyor_build.cmd" install + - call "%APPVEYOR_BUILD_FOLDER%\tools\ci\appveyor\appveyor_build.cmd" install build_script: - - call "%APPVEYOR_BUILD_FOLDER%\appveyor_build.cmd" build + - call "%APPVEYOR_BUILD_FOLDER%\tools\ci\appveyor\appveyor_build.cmd" build test_script: - - call "%APPVEYOR_BUILD_FOLDER%\appveyor_build.cmd" test + - call "%APPVEYOR_BUILD_FOLDER%\tools\ci\appveyor\appveyor_build.cmd" test diff --git a/appveyor_build.cmd b/appveyor_build.cmd deleted file mode 100644 index 76093daf..00000000 --- a/appveyor_build.cmd +++ /dev/null @@ -1,113 +0,0 @@ -@rem *********************************************************************** -@rem * * -@rem * OCaml * -@rem * * -@rem * David Allsopp, OCaml Labs, Cambridge. * -@rem * * -@rem * Copyright 2017 MetaStack Solutions Ltd. * -@rem * * -@rem * All rights reserved. This file is distributed under the terms of * -@rem * the GNU Lesser General Public License version 2.1, with the * -@rem * special exception on linking described in the file LICENSE. * -@rem * * -@rem *********************************************************************** - -@rem BE CAREFUL ALTERING THIS FILE TO ENSURE THAT ERRORS PROPAGATE -@rem IF A COMMAND SHOULD FAIL IT PROBABLY NEEDS TO END WITH -@rem || exit /b 1 -@rem BASICALLY, DO THE TESTING IN BASH... - -@rem Do not call setlocal! -@echo off - -goto %1 - -goto :EOF - -:SaveVars -set OCAML_PREV_PATH=%PATH% -set OCAML_PREV_LIB=%LIB% -set OCAML_PREV_INCLUDE=%INCLUDE% -goto :EOF - -:RestoreVars -set PATH=%OCAML_PREV_PATH% -set LIB=%OCAML_PREV_LIB% -set INCLUDE=%OCAML_PREV_INCLUDE% -goto :EOF - -:CheckPackage -"%CYG_ROOT%\bin\bash.exe" -lc "cygcheck -dc %1" | findstr %1 > nul -if %ERRORLEVEL% equ 1 ( - echo Cygwin package %1 will be installed - set CYGWIN_INSTALL_PACKAGES=%CYGWIN_INSTALL_PACKAGES%,%1 -) -goto :EOF - -:UpgradeCygwin -if "%CYGWIN_INSTALL_PACKAGES%" neq "" "%CYG_ROOT%\setup-x86_64.exe" --quiet-mode --no-shortcuts --no-startmenu --no-desktop --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages %CYGWIN_INSTALL_PACKAGES:~1% > nul -for %%P in (%CYGWIN_COMMANDS%) do "%CYG_ROOT%\bin\%%P.exe" --version > nul || set CYGWIN_UPGRADE_REQUIRED=1 -"%CYG_ROOT%\bin\bash.exe" -lc "cygcheck -dc %CYGWIN_PACKAGES%" -if %CYGWIN_UPGRADE_REQUIRED% equ 1 ( - echo Cygwin package upgrade required - please go and drink coffee - "%CYG_ROOT%\setup-x86_64.exe" --quiet-mode --no-shortcuts --no-startmenu --no-desktop --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --upgrade-also > nul - "%CYG_ROOT%\bin\bash.exe" -lc "cygcheck -dc %CYGWIN_PACKAGES%" -) -goto :EOF - -:install -chcp 65001 > nul -rem This must be kept in sync with appveyor_build.sh -set BUILD_PREFIX=🐫реализация -git worktree add "..\%BUILD_PREFIX%-msvc64" -b appveyor-build-msvc64 -git worktree add "..\%BUILD_PREFIX%-mingw32" -b appveyor-build-mingw32 -git worktree add "..\%BUILD_PREFIX%-msvc32" -b appveyor-build-msvc32 -cd "..\%BUILD_PREFIX%-mingw32" -git submodule update --init flexdll - -cd "%APPVEYOR_BUILD_FOLDER%" -appveyor DownloadFile "https://github.com/alainfrisch/flexdll/archive/0.37.tar.gz" -FileName "flexdll.tar.gz" || exit /b 1 -appveyor DownloadFile "https://github.com/alainfrisch/flexdll/releases/download/0.37/flexdll-bin-0.37.zip" -FileName "flexdll.zip" || exit /b 1 -rem flexdll.zip is processed here, rather than in appveyor_build.sh because the -rem unzip command comes from MSYS2 (via Git for Windows) and it has to be -rem invoked via cmd /c in a bash script which is weird(er). -mkdir "%APPVEYOR_BUILD_FOLDER%\..\flexdll" -move flexdll.zip "%APPVEYOR_BUILD_FOLDER%\..\flexdll" -cd "%APPVEYOR_BUILD_FOLDER%\..\flexdll" && unzip -q flexdll.zip - -rem CYGWIN_PACKAGES is the list of required Cygwin packages (cygwin is included -rem in the list just so that the Cygwin version is always displayed on the log). -rem CYGWIN_COMMANDS is a corresponding command to run with --version to test -rem whether the package works. This is used to verify whether the installation -rem needs upgrading. -set CYGWIN_PACKAGES=cygwin make diffutils mingw64-i686-gcc-core -set CYGWIN_COMMANDS=cygcheck make diff i686-w64-mingw32-gcc - -set CYGWIN_INSTALL_PACKAGES= -set CYGWIN_UPGRADE_REQUIRED=0 - -for %%P in (%CYGWIN_PACKAGES%) do call :CheckPackage %%P -call :UpgradeCygwin - -"%CYG_ROOT%\bin\bash.exe" -lec "$APPVEYOR_BUILD_FOLDER/appveyor_build.sh install" || exit /b 1 - -call :SaveVars -goto :EOF - -:build -rem Run the msvc64 and mingw32 builds -call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvars64.bat" -"%CYG_ROOT%\bin\bash.exe" -lec "$APPVEYOR_BUILD_FOLDER/appveyor_build.sh" || exit /b 1 - -rem Reconfigure the environment and run the msvc32 partial build -call :RestoreVars -call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x86 -"%CYG_ROOT%\bin\bash.exe" -lec "$APPVEYOR_BUILD_FOLDER/appveyor_build.sh msvc32-only" || exit /b 1 -goto :EOF - -:test -rem Reconfigure the environment for the msvc64 build -call :RestoreVars -call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvars64.bat" -"%CYG_ROOT%\bin\bash.exe" -lec "$APPVEYOR_BUILD_FOLDER/appveyor_build.sh test" || exit /b 1 -goto :EOF diff --git a/appveyor_build.sh b/appveyor_build.sh deleted file mode 100644 index 995553f1..00000000 --- a/appveyor_build.sh +++ /dev/null @@ -1,125 +0,0 @@ -#!/bin/bash -#************************************************************************** -#* * -#* OCaml * -#* * -#* Christophe Troestler * -#* * -#* Copyright 2015 Christophe Troestler * -#* * -#* All rights reserved. This file is distributed under the terms of * -#* the GNU Lesser General Public License version 2.1, with the * -#* special exception on linking described in the file LICENSE. * -#* * -#************************************************************************** - -BUILD_PID=0 - -function run { - NAME=$1 - shift - echo "-=-=- $NAME -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-" - $@ - CODE=$? - if [ $CODE -ne 0 ]; then - echo "-=-=- $NAME failed! -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-" - if [ $BUILD_PID -ne 0 ] ; then - kill -KILL $BUILD_PID 2>/dev/null - wait $BUILD_PID 2>/dev/null - fi - exit $CODE - else - echo "-=-=- End of $NAME -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-" - fi -} - -function set_configuration { - cp config/m-nt.h byterun/caml/m.h - cp config/s-nt.h byterun/caml/s.h - - FILE=$(pwd | cygpath -f - -m)/config/Makefile - echo "Edit $FILE to set PREFIX=$2" - sed -e "/PREFIX=/s|=.*|=$2|" \ - -e "/^ *CFLAGS *=/s/\r\?$/ $3\0/" \ - config/Makefile.$1 > config/Makefile -# run "Content of $FILE" cat config/Makefile -} - -APPVEYOR_BUILD_FOLDER=$(echo $APPVEYOR_BUILD_FOLDER| cygpath -f -) -# These directory names are specified here, because getting UTF-8 correctly -# through appveyor.yml -> Command Script -> Bash is quite painful... -OCAMLROOT=$(echo $PROGRAMFILES/Бактріан🐫| cygpath -f - -m) - -# This must be kept in sync with appveyor_build.cmd -BUILD_PREFIX=🐫реализация - -export PATH=$(echo $OCAMLROOT| cygpath -f -)/bin/flexdll:$PATH - -case "$1" in - install) - mkdir -p "$OCAMLROOT/bin/flexdll" - cd $APPVEYOR_BUILD_FOLDER/../flexdll - # msvc64 objects need to be compiled with VS2015, so are copied later from - # a source build. - for f in flexdll.h flexlink.exe flexdll*_msvc.obj default*.manifest ; do - cp $f "$OCAMLROOT/bin/flexdll/" - done - echo 'eval $($APPVEYOR_BUILD_FOLDER/tools/msvs-promote-path)' >> ~/.bash_profile - ;; - msvc32-only) - cd $APPVEYOR_BUILD_FOLDER/../$BUILD_PREFIX-msvc32 - - set_configuration msvc "$OCAMLROOT-msvc32" -WX - - run "make world" make world - run "make runtimeopt" make runtimeopt - run "make -C otherlibs/systhreads libthreadsnat.lib" \ - make -C otherlibs/systhreads libthreadsnat.lib - - exit 0 - ;; - test) - FULL_BUILD_PREFIX=$APPVEYOR_BUILD_FOLDER/../$BUILD_PREFIX - run "ocamlc.opt -version" $FULL_BUILD_PREFIX-msvc64/ocamlc.opt -version - run "test msvc64" make -C $FULL_BUILD_PREFIX-msvc64 tests - run "test mingw32" make -C $FULL_BUILD_PREFIX-mingw32 tests - run "install msvc64" make -C $FULL_BUILD_PREFIX-msvc64 install - run "install mingw32" make -C $FULL_BUILD_PREFIX-mingw32 install - ;; - *) - cd $APPVEYOR_BUILD_FOLDER/../$BUILD_PREFIX-msvc64 - - tar -xzf $APPVEYOR_BUILD_FOLDER/flexdll.tar.gz - cd flexdll-$FLEXDLL_VERSION - make MSVC_DETECT=0 CHAINS=msvc64 support - cp flexdll*_msvc64.obj "$OCAMLROOT/bin/flexdll/" - cd .. - - set_configuration msvc64 "$OCAMLROOT" -WX - - cd ../$BUILD_PREFIX-mingw32 - - set_configuration mingw "$OCAMLROOT-mingw32" -Werror - - cd $APPVEYOR_BUILD_FOLDER/../$BUILD_PREFIX-msvc64 - - export TERM=ansi - script --quiet --return --command "make -C ../$BUILD_PREFIX-mingw32 flexdll world.opt" ../$BUILD_PREFIX-mingw32/build.log >/dev/null 2>/dev/null & - BUILD_PID=$! - - run "make world" make world - run "make bootstrap" make bootstrap - run "make opt" make opt - run "make opt.opt" make opt.opt - - set +e - - # For an explanation of the sed command, see https://github.com/appveyor/ci/issues/1824 - tail --pid=$BUILD_PID -n +1 -f ../$BUILD_PREFIX-mingw32/build.log | sed -e 's/\d027\[K//g' -e 's/\d027\[m/\d027[0m/g' -e 's/\d027\[01\([m;]\)/\d027[1\1/g' & - TAIL_PID=$! - wait $BUILD_PID - STATUS=$? - wait $TAIL_PID - exit $STATUS - ;; -esac diff --git a/asmcomp/CSEgen.ml b/asmcomp/CSEgen.ml index 7e585355..d0b6eb2a 100644 --- a/asmcomp/CSEgen.ml +++ b/asmcomp/CSEgen.ml @@ -362,6 +362,10 @@ method private cse n i = next = self#cse empty_numbering i.next} method fundecl f = - {f with fun_body = self#cse empty_numbering f.fun_body} + (* CSE can trigger bad register allocation behaviors, see MPR#7630 *) + if List.mem Cmm.No_CSE f.fun_codegen_options then + f + else + {f with fun_body = self#cse empty_numbering f.fun_body } end diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index 36ec47ed..cf261a06 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -385,7 +385,7 @@ let output_test_zero arg = (* Output a floating-point compare and branch *) -let emit_float_test cmp neg i lbl = +let emit_float_test cmp i lbl = (* Effect of comisd on flags and conditional branches: ZF PF CF cond. branches taken unordered 1 1 1 je, jb, jbe, jp @@ -395,33 +395,41 @@ let emit_float_test cmp neg i lbl = If FP traps are on (they are off by default), comisd traps on QNaN and SNaN but ucomisd traps on SNaN only. *) - match (cmp, neg) with - | (Ceq, false) | (Cne, true) -> + match cmp with + | CFeq -> let next = new_label() in I.ucomisd (arg i 1) (arg i 0); I.jp (label next); (* skip if unordered *) I.je lbl; (* branch taken if x=y *) def_label next - | (Cne, false) | (Ceq, true) -> + | CFneq -> I.ucomisd (arg i 1) (arg i 0); I.jp lbl; (* branch taken if unordered *) I.jne lbl (* branch taken if xy *) - | (Clt, _) -> + | CFlt -> I.comisd (arg i 0) (arg i 1); - if not neg then I.ja lbl (* branch taken if y>x i.e. x + I.ja lbl (* branch taken if y>x i.e. x + I.comisd (arg i 0) (arg i 1); + I.jbe lbl (* taken if unordered or y<=x i.e. !(x + I.comisd (arg i 0) (arg i 1);(* swap compare *) + I.jae lbl (* branch taken if y>=x i.e. x<=y *) + | CFnle -> I.comisd (arg i 0) (arg i 1);(* swap compare *) - if not neg then I.jae lbl (* branch taken if y>=x i.e. x<=y *) - else I.jb lbl (* taken if unordered or y + I.jb lbl (* taken if unordered or y I.comisd (arg i 1) (arg i 0); - if not neg then I.ja lbl (* branch taken if x>y *) - else I.jbe lbl (* taken if unordered or x<=y i.e. !(x>y) *) - | (Cge, _) -> + I.ja lbl (* branch taken if x>y *) + | CFngt -> + I.comisd (arg i 1) (arg i 0); + I.jbe lbl (* taken if unordered or x<=y i.e. !(x>y) *) + | CFge -> + I.comisd (arg i 1) (arg i 0);(* swap compare *) + I.jae lbl (* branch taken if x>=y *) + | CFnge -> I.comisd (arg i 1) (arg i 0);(* swap compare *) - if not neg then I.jae lbl (* branch taken if x>=y *) - else I.jb lbl (* taken if unordered or x=y) *) + I.jb lbl (* taken if unordered or x=y) *) (* Deallocate the stack frame before a return or tail call *) @@ -770,8 +778,8 @@ let emit_instr fallthrough i = | Iinttest_imm(cmp, n) -> I.cmp (int n) (arg i 0); I.j (cond cmp) lbl - | Ifloattest(cmp, neg) -> - emit_float_test cmp neg i lbl + | Ifloattest cmp -> + emit_float_test cmp i lbl | Ioddtest -> I.test (int 1) (arg8 i 0); I.jne lbl diff --git a/asmcomp/amd64/reload.ml b/asmcomp/amd64/reload.ml index 690e0165..a4070b47 100644 --- a/asmcomp/amd64/reload.ml +++ b/asmcomp/amd64/reload.ml @@ -107,13 +107,13 @@ method! reload_test tst arg = if stackp arg.(0) && stackp arg.(1) then [| self#makereg arg.(0); arg.(1) |] else arg - | Ifloattest((Clt|Cle), _) -> + | Ifloattest (CFlt | CFnlt | CFle | CFnle) -> (* Cf. emit.mlp: we swap arguments in this case *) (* First argument can be on stack, second must be in register *) if stackp arg.(1) then [| arg.(0); self#makereg arg.(1) |] else arg - | Ifloattest((Ceq|Cne|Cgt|Cge), _) -> + | Ifloattest (CFeq | CFneq | CFgt | CFngt | CFge | CFnge) -> (* Second argument can be on stack, first must be in register *) if stackp arg.(0) then [| self#makereg arg.(0); arg.(1) |] diff --git a/asmcomp/arm/arch.ml b/asmcomp/arm/arch.ml index 2ec00e01..becfff38 100644 --- a/asmcomp/arm/arch.ml +++ b/asmcomp/arm/arch.ml @@ -19,7 +19,7 @@ open Format type abi = EABI | EABI_HF -type arch = ARMv4 | ARMv5 | ARMv5TE | ARMv6 | ARMv6T2 | ARMv7 +type arch = ARMv4 | ARMv5 | ARMv5TE | ARMv6 | ARMv6T2 | ARMv7 | ARMv8 type fpu = Soft | VFPv2 | VFPv3_D16 | VFPv3 let abi = @@ -35,6 +35,7 @@ let string_of_arch = function | ARMv6 -> "armv6" | ARMv6T2 -> "armv6t2" | ARMv7 -> "armv7" + | ARMv8 -> "armv8" let string_of_fpu = function Soft -> "soft" @@ -53,8 +54,10 @@ let (arch, fpu, thumb) = | EABI, "armv6" -> ARMv6, Soft, false | EABI, "armv6t2" -> ARMv6T2, Soft, false | EABI, "armv7" -> ARMv7, Soft, false + | EABI, "armv8" -> ARMv8, Soft, false | EABI, _ -> ARMv4, Soft, false | EABI_HF, "armv6" -> ARMv6, VFPv2, false + | EABI_HF, "armv8" -> ARMv8, VFPv3, true | EABI_HF, _ -> ARMv7, VFPv3_D16, true end in (ref def_arch, ref def_fpu, ref def_thumb) @@ -67,6 +70,7 @@ let farch spec = | "armv6" -> ARMv6 | "armv6t2" -> ARMv6T2 | "armv7" -> ARMv7 + | "armv8" -> ARMv8 | spec -> raise (Arg.Bad ("wrong '-farch' option: " ^ spec)) end diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp index 1531cb7a..182ccbdf 100644 --- a/asmcomp/arm/emit.mlp +++ b/asmcomp/arm/emit.mlp @@ -165,8 +165,8 @@ let emit_call_bound_error bd = (* Negate a comparison *) let negate_integer_comparison = function - Isigned cmp -> Isigned(negate_comparison cmp) - | Iunsigned cmp -> Iunsigned(negate_comparison cmp) + | Isigned cmp -> Isigned(negate_integer_comparison cmp) + | Iunsigned cmp -> Iunsigned(negate_integer_comparison cmp) (* Names of various instructions *) @@ -367,6 +367,35 @@ let emit_load_symbol_addr dst s = 1 end +(* Emit instructions that set [rd] to 1 if integer condition [cmp] holds + and set [rd] to 0 otherwise. *) + +let emit_set_condition cmp rd = + let compthen = name_for_comparison cmp in + let compelse = name_for_comparison (negate_integer_comparison cmp) in + if !arch < ARMv8 || not !thumb then begin + ` ite {emit_string compthen}\n`; + ` mov{emit_string compthen} {emit_reg rd}, #1\n`; + ` mov{emit_string compelse} {emit_reg rd}, #0\n`; + 3 + end else begin + (* T32 mode in ARMv8 deprecates general ITE blocks + and favors IT blocks containing only one 16-bit instruction. + mov , # is 16 bits if is R0...R7 + and fits in 8 bits. *) + let temp = + match rd.loc with + | Reg r when r < 8 -> rd (* can assign rd directly *) + | _ -> phys_reg 3 (* use r3 as temporary *) in + ` it {emit_string compthen}\n`; + ` mov{emit_string compthen} {emit_reg temp}, #1\n`; + ` it {emit_string compelse}\n`; + ` mov{emit_string compelse} {emit_reg temp}, #0\n`; + if temp.loc = rd.loc then 4 else begin + ` movs {emit_reg rd}, {emit_reg temp}\n`; 5 + end + end + (* Output the assembly code for an instruction *) let emit_instr i = @@ -572,19 +601,11 @@ let emit_instr i = 1 + ninstr end | Lop(Iintop(Icomp cmp)) -> - let compthen = name_for_comparison cmp in - let compelse = name_for_comparison (negate_integer_comparison cmp) in ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; - ` ite {emit_string compthen}\n`; - ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`; - ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4 + 1 + emit_set_condition cmp i.res.(0) | Lop(Iintop_imm(Icomp cmp, n)) -> - let compthen = name_for_comparison cmp in - let compelse = name_for_comparison (negate_integer_comparison cmp) in ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; - ` ite {emit_string compthen}\n`; - ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`; - ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4 + 1 + emit_set_condition cmp i.res.(0) | Lop(Iintop (Icheckbound { label_after_error; } )) -> let lbl = bound_error_label ?label:label_after_error i.dbg in ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; @@ -705,18 +726,20 @@ let emit_instr i = ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; let comp = name_for_comparison cmp in ` b{emit_string comp} {emit_label lbl}\n`; 2 - | Ifloattest(cmp, neg) -> - let comp = (match (cmp, neg) with - (Ceq, false) | (Cne, true) -> "eq" - | (Cne, false) | (Ceq, true) -> "ne" - | (Clt, false) -> "cc" - | (Clt, true) -> "cs" - | (Cle, false) -> "ls" - | (Cle, true) -> "hi" - | (Cgt, false) -> "gt" - | (Cgt, true) -> "le" - | (Cge, false) -> "ge" - | (Cge, true) -> "lt") in + | Ifloattest cmp -> + let comp = + match cmp with + | CFeq -> "eq" + | CFneq -> "ne" + | CFlt -> "cc" + | CFnlt -> "cs" + | CFle -> "ls" + | CFnle -> "hi" + | CFgt -> "gt" + | CFngt -> "le" + | CFge -> "ge" + | CFnge -> "lt" + in ` fcmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` fmstat\n`; ` b{emit_string comp} {emit_label lbl}\n`; 3 @@ -929,6 +952,7 @@ let begin_assembly() = | ARMv6 -> ` .arch armv6\n` | ARMv6T2 -> ` .arch armv6t2\n` | ARMv7 -> ` .arch armv7-a\n` + | ARMv8 -> ` .arch armv8-a\n` end; begin match !fpu with Soft -> ` .fpu softvfp\n` diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml index df9aaeef..9e1bb648 100644 --- a/asmcomp/arm/proc.ml +++ b/asmcomp/arm/proc.ml @@ -265,6 +265,9 @@ let destroyed_at_oper = function [| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *) | Iop(Iintop Imulh) when !arch < ARMv6 -> [| phys_reg 8 |] (* r12 destroyed *) + | Iop(Iintop (Icomp _) | Iintop_imm(Icomp _, _)) + when !arch >= ARMv8 && !thumb -> + [| phys_reg 3 |] (* r3 destroyed *) | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) -> [| phys_reg 107 |] (* d7 (s14-s15) destroyed *) | _ -> [||] diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml index 3a3a5c61..747e86a2 100644 --- a/asmcomp/arm/selection.ml +++ b/asmcomp/arm/selection.ml @@ -23,10 +23,10 @@ open Mach let is_offset chunk n = match chunk with - (* VFPv{2,3} load/store have -1020 to 1020 *) - Single | Double | Double_u + (* VFPv{2,3} load/store have -1020 to 1020. Offset must be multiple of 4 *) + | Single | Double | Double_u when !fpu >= VFPv2 -> - n >= -1020 && n <= 1020 + n >= -1020 && n <= 1020 && n mod 4 = 0 (* ARM load/store byte/word have -4095 to 4095 *) | Byte_unsigned | Byte_signed | Thirtytwo_unsigned | Thirtytwo_signed @@ -240,16 +240,19 @@ method private select_operation_softfp op args dbg = | (Cfloatofint, args) -> (self#iextcall("__aeabi_i2d", false), args) | (Cintoffloat, args) -> (self#iextcall("__aeabi_d2iz", false), args) | (Ccmpf comp, args) -> - let func = (match comp with - Cne (* there's no __aeabi_dcmpne *) - | Ceq -> "__aeabi_dcmpeq" - | Clt -> "__aeabi_dcmplt" - | Cle -> "__aeabi_dcmple" - | Cgt -> "__aeabi_dcmpgt" - | Cge -> "__aeabi_dcmpge") in - let comp = (match comp with - Cne -> Ceq (* eq 0 => false *) - | _ -> Cne (* ne 0 => true *)) in + let comp, func = + match comp with + | CFeq -> Cne, "__aeabi_dcmpeq" + | CFneq -> Ceq, "__aeabi_dcmpeq" + | CFlt -> Cne, "__aeabi_dcmplt" + | CFnlt -> Ceq, "__aeabi_dcmplt" + | CFle -> Cne, "__aeabi_dcmple" + | CFnle -> Ceq, "__aeabi_dcmple" + | CFgt -> Cne, "__aeabi_dcmpgt" + | CFngt -> Ceq, "__aeabi_dcmpgt" + | CFge -> Cne, "__aeabi_dcmpge" + | CFnge -> Ceq, "__aeabi_dcmpge" + in (Iintop_imm(Icomp(Iunsigned comp), 0), [Cop(Cextcall(func, typ_int, false, None), args, dbg)]) (* Add coercions around loads and stores of 32-bit floats *) diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp index ba97d813..de15b744 100644 --- a/asmcomp/arm64/emit.mlp +++ b/asmcomp/arm64/emit.mlp @@ -792,18 +792,20 @@ let emit_instr i = ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; let comp = name_for_comparison cmp in ` b.{emit_string comp} {emit_label lbl}\n` - | Ifloattest(cmp, neg) -> - let comp = (match (cmp, neg) with - | (Ceq, false) | (Cne, true) -> "eq" - | (Cne, false) | (Ceq, true) -> "ne" - | (Clt, false) -> "cc" - | (Clt, true) -> "cs" - | (Cle, false) -> "ls" - | (Cle, true) -> "hi" - | (Cgt, false) -> "gt" - | (Cgt, true) -> "le" - | (Cge, false) -> "ge" - | (Cge, true) -> "lt") in + | Ifloattest cmp -> + let comp = + match cmp with + | CFeq -> "eq" + | CFneq -> "ne" + | CFlt -> "cc" + | CFnlt -> "cs" + | CFle -> "ls" + | CFnle -> "hi" + | CFgt -> "gt" + | CFngt -> "le" + | CFge -> "ge" + | CFnge -> "lt" + in ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` b.{emit_string comp} {emit_label lbl}\n` | Ioddtest -> diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml index 2d8dc3f8..c3bd1cb5 100644 --- a/asmcomp/asmgen.ml +++ b/asmcomp/asmgen.ml @@ -198,7 +198,7 @@ let end_gen_implementation ?toplevel ppf let flambda_gen_implementation ?toplevel ~backend ppf (program:Flambda.program) = - let export = Build_export_info.build_export_info ~backend program in + let export = Build_export_info.build_transient ~backend program in let (clambda, preallocated, constants) = Profile.record_call "backend" (fun () -> (program, export) @@ -230,7 +230,7 @@ let lambda_gen_implementation ?toplevel ppf symbol = Compilenv.make_symbol None; exported = true; tag = 0; - size = lambda.main_module_block_size; + fields = List.init lambda.main_module_block_size (fun _ -> None); } in let clambda_and_constants = diff --git a/asmcomp/asmlink.ml b/asmcomp/asmlink.ml index fe93e5e1..90617dc8 100644 --- a/asmcomp/asmlink.ml +++ b/asmcomp/asmlink.ml @@ -119,7 +119,7 @@ let object_file_name name = try find_in_path !load_path name with Not_found -> - fatal_error "Asmlink.object_file_name: not found" in + fatal_errorf "Asmlink.object_file_name: %s not found" name in if Filename.check_suffix file_name ".cmx" then Filename.chop_suffix file_name ".cmx" ^ ext_obj else if Filename.check_suffix file_name ".cmxa" then @@ -203,6 +203,9 @@ let scan_file obj_name tolink = match read_file obj_name with (* Second pass: generate the startup file and link it with everything else *) +let force_linking_of_startup ppf = + Asmgen.compile_phrase ppf (Cmm.Cdata ([Cmm.Csymbol_address "caml_startup"])) + let make_startup_file ppf units_list = let compile_phrase p = Asmgen.compile_phrase ppf p in Location.input_name := "caml_startup"; (* set name of "current" input *) @@ -238,6 +241,8 @@ let make_startup_file ppf units_list = if Config.spacetime then begin compile_phrase (Cmmgen.spacetime_shapes all_names); end; + if !Clflags.output_complete_object then + force_linking_of_startup ppf; Emit.end_assembly () let make_shared_startup_file ppf units = @@ -251,6 +256,8 @@ let make_shared_startup_file ppf units = compile_phrase (Cmmgen.global_table (List.map (fun (ui,_) -> ui.ui_symbol) units)); + if !Clflags.output_complete_object then + force_linking_of_startup ppf; (* this is to force a reference to all units, otherwise the linker might drop some of them (in case of libraries) *) Emit.end_assembly () @@ -422,4 +429,6 @@ let reset () = implementations_defined := []; cmx_required := []; interfaces := []; - implementations := [] + implementations := []; + lib_ccobjs := []; + lib_ccopts := [] diff --git a/asmcomp/build_export_info.ml b/asmcomp/build_export_info.ml index 80f97f05..bf37e609 100644 --- a/asmcomp/build_export_info.ml +++ b/asmcomp/build_export_info.ml @@ -20,7 +20,14 @@ module Env : sig type t val new_descr : t -> Export_info.descr -> Export_id.t + val record_descr : t -> Export_id.t -> Export_info.descr -> unit + val new_value_closure_descr + : t + -> closure_id:Closure_id.t + -> set_of_closures: Export_info.value_set_of_closures + -> Export_id.t + val get_descr : t -> Export_info.approx -> Export_info.descr option val add_approx : t -> Variable.t -> Export_info.approx -> t @@ -56,11 +63,13 @@ end = struct (* Note that [ex_table]s themselves are shared (hence [ref] and not [mutable]). *) ex_table : Export_info.descr Export_id.Map.t ref; + closure_table : Export_id.t Closure_id.Map.t ref; } let create_empty () = { sym = Symbol.Map.empty; ex_table = ref Export_id.Map.empty; + closure_table = ref Closure_id.Map.empty; } let add_symbol t sym export_id = @@ -85,12 +94,14 @@ end = struct { var : Export_info.approx Variable.Map.t; sym : Export_id.t Symbol.Map.t; ex_table : Export_info.descr Export_id.Map.t ref; + closure_table: Export_id.t Closure_id.Map.t ref; } let empty_of_global (env : Global.t) = { var = Variable.Map.empty; sym = env.sym; ex_table = env.ex_table; + closure_table = env.closure_table; } let extern_id_descr export_id = @@ -102,13 +113,17 @@ end = struct if Compilenv.is_predefined_exception sym then None else - let export = Compilenv.approx_for_global (Symbol.compilation_unit sym) in - try - let id = Symbol.Map.find sym export.symbol_id in - let descr = Export_info.find_description export id in - Some descr + match + Compilenv.approx_for_global (Symbol.compilation_unit sym) with - | Not_found -> None + | None -> None + | Some export -> + try + let id = Symbol.Map.find sym export.symbol_id in + let descr = Export_info.find_description export id in + Some descr + with + | Not_found -> None let get_id_descr t export_id = try Some (Export_id.Map.find export_id !(t.ex_table)) @@ -140,6 +155,17 @@ end = struct record_descr t id descr; id + let new_value_closure_descr t ~closure_id ~set_of_closures = + match Closure_id.Map.find closure_id !(t.closure_table) with + | exception Not_found -> + let export_id = + new_descr t (Value_closure { closure_id; set_of_closures }) + in + t.closure_table := + Closure_id.Map.add closure_id export_id !(t.closure_table); + export_id + | export_id -> export_id + let new_unit_descr t = new_descr t (Value_constptr 0) @@ -278,10 +304,9 @@ and descr_of_named (env : Env.t) (named : Flambda.named) [Project_closure]: closure ID %a not in set of closures" Closure_id.print closure_id end; - let descr : Export_info.descr = - Value_closure { closure_id = closure_id; set_of_closures; } - in - Value_id (Env.new_descr env descr) + Value_id ( + Env.new_value_closure_descr env ~closure_id ~set_of_closures + ) | _ -> (* It would be nice if this were [assert false], but owing to the fact that this pass may propagate less information than for example @@ -292,10 +317,9 @@ and descr_of_named (env : Env.t) (named : Flambda.named) begin match Env.get_descr env (Env.find_approx env closure) with | Some (Value_closure { set_of_closures; closure_id; }) -> assert (Closure_id.equal closure_id start_from); - let descr : Export_info.descr = - Value_closure { closure_id = move_to; set_of_closures; } - in - Value_id (Env.new_descr env descr) + Value_id ( + Env.new_value_closure_descr env ~closure_id:move_to ~set_of_closures + ) | _ -> Value_unknown end | Project_var { closure; closure_id = closure_id'; var; } -> @@ -343,6 +367,7 @@ and describe_set_of_closures env (set : Flambda.set_of_closures) { Export_info. set_of_closures_id = set.function_decls.set_of_closures_id; bound_vars = Var_within_closure.wrap_map bound_vars_approx; + free_vars = set.free_vars; results = Closure_id.wrap_map (Variable.Map.map (fun _ -> Export_info.Value_unknown) @@ -351,13 +376,12 @@ and describe_set_of_closures env (set : Flambda.set_of_closures) } in Variable.Map.mapi (fun fun_var _function_decl -> - let descr : Export_info.descr = - Value_closure - { closure_id = Closure_id.wrap fun_var; - set_of_closures = initial_value_set_of_closures; - } + let export_id = + let closure_id = Closure_id.wrap fun_var in + let set_of_closures = initial_value_set_of_closures in + Env.new_value_closure_descr env ~closure_id ~set_of_closures in - Export_info.Value_id (Env.new_descr env descr)) + Export_info.Value_id export_id) set.function_decls.funs in let closure_env = @@ -372,6 +396,7 @@ and describe_set_of_closures env (set : Flambda.set_of_closures) in { set_of_closures_id = set.function_decls.set_of_closures_id; bound_vars = Var_within_closure.wrap_map bound_vars_approx; + free_vars = set.free_vars; results = Closure_id.wrap_map results; aliased_symbol = None; } @@ -489,10 +514,13 @@ let describe_program (env : Env.Global.t) (program : Flambda.program) = in loop env program.program_body -let build_export_info ~(backend : (module Backend_intf.S)) - (program : Flambda.program) : Export_info.t = + +let build_transient ~(backend : (module Backend_intf.S)) + (program : Flambda.program) : Export_info.transient = if !Clflags.opaque then - Export_info.empty + let compilation_unit = Compilenv.current_unit () in + let root_symbol = Compilenv.current_unit_symbol () in + Export_info.opaque_transient ~root_symbol ~compilation_unit else (* CR-soon pchambart: Should probably use that instead of the ident of the module as global identifier. @@ -502,27 +530,44 @@ let build_export_info ~(backend : (module Backend_intf.S)) let _global_symbol, env = describe_program (Env.Global.create_empty ()) program in - let sets_of_closures = - Flambda_utils.all_function_decls_indexed_by_set_of_closures_id program + let sets_of_closures_map = + Flambda_utils.all_sets_of_closures_map program in - let closures = - Flambda_utils.all_function_decls_indexed_by_closure_id program - in - let invariant_params = - Set_of_closures_id.Map.map - (fun { Flambda. function_decls; _ } -> - Invariant_params.invariant_params_in_recursion - ~backend function_decls) - (Flambda_utils.all_sets_of_closures_map program) + let function_declarations_map = + let set_of_closures_approx { Flambda. function_decls; _ } = + let recursive = + lazy + (Find_recursive_functions.in_function_declarations + function_decls ~backend) + in + let keep_body = + Inline_and_simplify_aux.keep_body_check + ~is_classic_mode:function_decls.is_classic_mode ~recursive + in + Simple_value_approx.function_declarations_approx + ~keep_body function_decls + in + Set_of_closures_id.Map.map set_of_closures_approx sets_of_closures_map in let unnested_values = Env.Global.export_id_to_descr_map env in let invariant_params = + let invariant_params = + Set_of_closures_id.Map.map + (fun { Flambda. function_decls; _ } -> + if function_decls.is_classic_mode then begin + Variable.Map.empty + end else begin + Invariant_params.invariant_params_in_recursion + ~backend function_decls + end) + (Flambda_utils.all_sets_of_closures_map program) + in let export = Compilenv.approx_env () in - Export_id.Map.fold (fun _eid (descr:Export_info.descr) - (invariant_params) -> - match descr with + Export_id.Map.fold + (fun _eid (descr:Export_info.descr) invariant_params -> + match (descr : Export_info.descr) with | Value_closure { set_of_closures } | Value_set_of_closures set_of_closures -> let { Export_info.set_of_closures_id } = set_of_closures in @@ -532,20 +577,137 @@ let build_export_info ~(backend : (module Backend_intf.S)) with | exception Not_found -> invariant_params - | (set:Variable.Set.t Variable.Map.t) -> - Set_of_closures_id.Map.add set_of_closures_id set invariant_params + | (set : Variable.Set.t Variable.Map.t) -> + Set_of_closures_id.Map.add + set_of_closures_id set invariant_params end - | _ -> + | Export_info.Value_boxed_int (_, _) + | Value_block _ + | Value_mutable_block _ + | Value_int _ + | Value_char _ + | Value_constptr _ + | Value_float _ + | Value_float_array _ + | Value_string _ + | Value_unknown_descr -> invariant_params) unnested_values invariant_params in + let recursive = + let recursive = + Set_of_closures_id.Map.map + (fun { Flambda. function_decls; _ } -> + if function_decls.is_classic_mode then begin + Variable.Set.empty + end else begin + Find_recursive_functions.in_function_declarations + ~backend function_decls + end) + (Flambda_utils.all_sets_of_closures_map program) + in + let export = Compilenv.approx_env () in + Export_id.Map.fold + (fun _eid (descr:Export_info.descr) recursive -> + match (descr : Export_info.descr) with + | Value_closure { set_of_closures } + | Value_set_of_closures set_of_closures -> + let { Export_info.set_of_closures_id } = set_of_closures in + begin match + Set_of_closures_id.Map.find set_of_closures_id + export.recursive + with + | exception Not_found -> + recursive + | (set : Variable.Set.t) -> + Set_of_closures_id.Map.add + set_of_closures_id set recursive + end + | Export_info.Value_boxed_int (_, _) + | Value_block _ + | Value_mutable_block _ + | Value_int _ + | Value_char _ + | Value_constptr _ + | Value_float _ + | Value_float_array _ + | Value_string _ + | Value_unknown_descr -> + recursive) + unnested_values recursive + in + let values = Export_info.nest_eid_map unnested_values in + let symbol_id = Env.Global.symbol_to_export_id_map env in + let { Traverse_for_exported_symbols. + set_of_closure_ids = relevant_set_of_closures; + symbols = relevant_symbols; + export_ids = relevant_export_ids; + set_of_closure_ids_keep_declaration = + relevant_set_of_closures_declaration_only; + relevant_local_closure_ids; + relevant_imported_closure_ids; + relevant_local_vars_within_closure; + relevant_imported_vars_within_closure; + } = + let closure_id_to_set_of_closures_id = + Set_of_closures_id.Map.fold + (fun set_of_closure_id + (function_declarations : Simple_value_approx.function_declarations) acc -> + Variable.Map.fold + (fun fun_var _ acc -> + let closure_id = Closure_id.wrap fun_var in + Closure_id.Map.add closure_id set_of_closure_id acc) + function_declarations.funs + acc) + function_declarations_map + Closure_id.Map.empty + in + Traverse_for_exported_symbols.traverse + ~sets_of_closures_map + ~closure_id_to_set_of_closures_id + ~function_declarations_map + ~values:(Compilation_unit.Map.find (Compilenv.current_unit ()) values) + ~symbol_id + ~root_symbol:(Compilenv.current_unit_symbol ()) + in + let sets_of_closures = + Set_of_closures_id.Map.filter_map + function_declarations_map + ~f:(fun key (fun_decls : Simple_value_approx.function_declarations) -> + if Set_of_closures_id.Set.mem key relevant_set_of_closures then + Some fun_decls + else if begin + Set_of_closures_id.Set.mem key + relevant_set_of_closures_declaration_only + end then begin + if fun_decls.is_classic_mode then + Some (Simple_value_approx.clear_function_bodies fun_decls) + else + Some fun_decls + end else begin + None + end) + in + let values = - Export_info.nest_eid_map unnested_values + Compilation_unit.Map.map (fun map -> + Export_id.Map.filter (fun key _ -> + Export_id.Set.mem key relevant_export_ids) + map) + values in - Export_info.create ~values - ~symbol_id:(Env.Global.symbol_to_export_id_map env) - ~offset_fun:Closure_id.Map.empty - ~offset_fv:Var_within_closure.Map.empty - ~sets_of_closures ~closures - ~constant_sets_of_closures:Set_of_closures_id.Set.empty + let symbol_id = + Symbol.Map.filter + (fun key _ -> Symbol.Set.mem key relevant_symbols) + symbol_id + in + Export_info.create_transient ~values + ~symbol_id + ~sets_of_closures ~invariant_params + ~recursive + ~relevant_local_closure_ids + ~relevant_imported_closure_ids + ~relevant_local_vars_within_closure + ~relevant_imported_vars_within_closure + diff --git a/asmcomp/build_export_info.mli b/asmcomp/build_export_info.mli index 2a824ea3..0380604b 100644 --- a/asmcomp/build_export_info.mli +++ b/asmcomp/build_export_info.mli @@ -19,7 +19,7 @@ (** Construct export information, for emission into .cmx files, from an Flambda program. *) -val build_export_info : +val build_transient : backend:(module Backend_intf.S) -> Flambda.program -> - Export_info.t + Export_info.transient diff --git a/asmcomp/clambda.ml b/asmcomp/clambda.ml index b0fa607d..dc0d5fd3 100644 --- a/asmcomp/clambda.ml +++ b/asmcomp/clambda.ml @@ -95,11 +95,15 @@ type value_approximation = (* Preallocated globals *) +type uconstant_block_field = + | Uconst_field_ref of string + | Uconst_field_int of int + type preallocated_block = { symbol : string; exported : bool; tag : int; - size : int; + fields : uconstant_block_field option list; } type preallocated_constant = { diff --git a/asmcomp/clambda.mli b/asmcomp/clambda.mli index c2e5d137..e4db85a0 100644 --- a/asmcomp/clambda.mli +++ b/asmcomp/clambda.mli @@ -100,11 +100,15 @@ val compare_structured_constants: val compare_constants: uconstant -> uconstant -> int +type uconstant_block_field = + | Uconst_field_ref of string + | Uconst_field_int of int + type preallocated_block = { symbol : string; exported : bool; tag : int; - size : int; + fields : uconstant_block_field option list; } type preallocated_constant = { diff --git a/asmcomp/closure.ml b/asmcomp/closure.ml index 4fdb3778..a8a22253 100644 --- a/asmcomp/closure.ml +++ b/asmcomp/closure.ml @@ -131,7 +131,6 @@ let prim_size prim args = | Parraysetu kind -> if kind = Pgenarray then 16 else 4 | Parrayrefs kind -> if kind = Pgenarray then 18 else 8 | Parraysets kind -> if kind = Pgenarray then 22 else 10 - | Pbittest -> 3 | Pbigarrayref(_, ndims, _, _) -> 4 + ndims * 6 | Pbigarrayset(_, ndims, _, _) -> 4 + ndims * 6 | _ -> 2 (* arithmetic and comparisons *) @@ -226,15 +225,31 @@ let make_const_ref c = let make_const_int n = make_const (Uconst_int n) let make_const_ptr n = make_const (Uconst_ptr n) let make_const_bool b = make_const_ptr(if b then 1 else 0) -let make_comparison cmp x y = + +let make_integer_comparison cmp x y = make_const_bool (match cmp with Ceq -> x = y - | Cneq -> x <> y + | Cne -> x <> y | Clt -> x < y | Cgt -> x > y | Cle -> x <= y | Cge -> x >= y) + +let make_float_comparison cmp x y = + make_const_bool + (match cmp with + | CFeq -> x = y + | CFneq -> not (x = y) + | CFlt -> x < y + | CFnlt -> not (x < y) + | CFgt -> x > y + | CFngt -> not (x > y) + | CFle -> x <= y + | CFnle -> not (x <= y) + | CFge -> x >= y + | CFnge -> not (x >= y)) + let make_const_float n = make_const_ref (Uconst_float n) let make_const_natint n = make_const_ref (Uconst_nativeint n) let make_const_int32 n = make_const_ref (Uconst_int32 n) @@ -280,7 +295,7 @@ let simplif_arith_prim_pure fpc p (args, approxs) dbg = make_const_int (n1 lsr n2) | Pasrint when 0 <= n2 && n2 < 8 * Arch.size_int -> make_const_int (n1 asr n2) - | Pintcomp c -> make_comparison c n1 n2 + | Pintcomp c -> make_integer_comparison c n1 n2 | _ -> default end (* float *) @@ -299,7 +314,7 @@ let simplif_arith_prim_pure fpc p (args, approxs) dbg = | Psubfloat -> make_const_float (n1 -. n2) | Pmulfloat -> make_const_float (n1 *. n2) | Pdivfloat -> make_const_float (n1 /. n2) - | Pfloatcomp c -> make_comparison c n1 n2 + | Pfloatcomp c -> make_float_comparison c n1 n2 | _ -> default end (* nativeint *) @@ -325,7 +340,7 @@ let simplif_arith_prim_pure fpc p (args, approxs) dbg = | Pandbint Pnativeint -> make_const_natint (Nativeint.logand n1 n2) | Porbint Pnativeint -> make_const_natint (Nativeint.logor n1 n2) | Pxorbint Pnativeint -> make_const_natint (Nativeint.logxor n1 n2) - | Pbintcomp(Pnativeint, c) -> make_comparison c n1 n2 + | Pbintcomp(Pnativeint, c) -> make_integer_comparison c n1 n2 | _ -> default end (* nativeint, int *) @@ -363,7 +378,7 @@ let simplif_arith_prim_pure fpc p (args, approxs) dbg = | Pandbint Pint32 -> make_const_int32 (Int32.logand n1 n2) | Porbint Pint32 -> make_const_int32 (Int32.logor n1 n2) | Pxorbint Pint32 -> make_const_int32 (Int32.logxor n1 n2) - | Pbintcomp(Pint32, c) -> make_comparison c n1 n2 + | Pbintcomp(Pint32, c) -> make_integer_comparison c n1 n2 | _ -> default end (* int32, int *) @@ -401,7 +416,7 @@ let simplif_arith_prim_pure fpc p (args, approxs) dbg = | Pandbint Pint64 -> make_const_int64 (Int64.logand n1 n2) | Porbint Pint64 -> make_const_int64 (Int64.logor n1 n2) | Pxorbint Pint64 -> make_const_int64 (Int64.logxor n1 n2) - | Pbintcomp(Pint64, c) -> make_comparison c n1 n2 + | Pbintcomp(Pint64, c) -> make_integer_comparison c n1 n2 | _ -> default end (* int64, int *) @@ -1141,7 +1156,7 @@ and close_functions fenv cenv fun_defs = !function_nesting_depth < excessive_function_nesting_depth in (* Determine the free variables of the functions *) let fv = - IdentSet.elements (free_variables (Lletrec(fun_defs, lambda_unit))) in + Ident.Set.elements (free_variables (Lletrec(fun_defs, lambda_unit))) in (* Build the function descriptors for the functions. Initially all functions are assumed not to need their environment parameter. *) @@ -1274,13 +1289,13 @@ and close_switch fenv cenv cases num_keys default = (* First default case *) begin match default with | Some def when ncases < num_keys -> - assert (store.act_store def = 0) + assert (store.act_store () def = 0) | _ -> () end ; (* Then all other cases *) List.iter (fun (key,lam) -> - index.(key) <- store.act_store lam) + index.(key) <- store.act_store () lam) cases ; (* Explicit sharing with catch/exit, as switcher compilation may diff --git a/asmcomp/closure_offsets.ml b/asmcomp/closure_offsets.ml index 94eb4a1f..51a09f02 100644 --- a/asmcomp/closure_offsets.ml +++ b/asmcomp/closure_offsets.ml @@ -87,52 +87,3 @@ let compute (program:Flambda.program) = init (Flambda_utils.all_sets_of_closures program) in r - -let compute_reexported_offsets program - ~current_unit_offset_fun ~current_unit_offset_fv - ~imported_units_offset_fun ~imported_units_offset_fv = - let offset_fun = ref current_unit_offset_fun in - let offset_fv = ref current_unit_offset_fv in - let used_closure_id closure_id = - match Closure_id.Map.find closure_id imported_units_offset_fun with - | offset -> - assert (not (Closure_id.Map.mem closure_id current_unit_offset_fun)); - begin match Closure_id.Map.find closure_id !offset_fun with - | exception Not_found -> - offset_fun := Closure_id.Map.add closure_id offset !offset_fun - | offset' -> assert (offset = offset') - end - | exception Not_found -> - assert (Closure_id.Map.mem closure_id current_unit_offset_fun) - in - let used_var_within_closure var = - match Var_within_closure.Map.find var imported_units_offset_fv with - | offset -> - assert (not (Var_within_closure.Map.mem var current_unit_offset_fv)); - begin match Var_within_closure.Map.find var !offset_fv with - | exception Not_found -> - offset_fv := Var_within_closure.Map.add var offset !offset_fv - | offset' -> assert (offset = offset') - end - | exception Not_found -> - assert (Var_within_closure.Map.mem var current_unit_offset_fv) - in - Flambda_iterators.iter_named_of_program program - ~f:(fun (named : Flambda.named) -> - match named with - | Project_closure { closure_id; _ } -> - used_closure_id closure_id - | Move_within_set_of_closures { start_from; move_to; _ } -> - used_closure_id start_from; - used_closure_id move_to - | Project_var { closure_id; var; _ } -> - used_closure_id closure_id; - used_var_within_closure var - | Symbol _ | Const _ | Allocated_const _ | Read_mutable _ - | Read_symbol_field _ | Set_of_closures _ | Prim _ | Expr _ -> ()); - Flambda_iterators.iter_constant_defining_values_on_program program - ~f:(fun (const : Flambda.constant_defining_value) -> - match const with - | Project_closure (_, closure_id) -> used_closure_id closure_id - | Allocated_const _ | Block _ | Set_of_closures _ -> ()); - !offset_fun, !offset_fv diff --git a/asmcomp/closure_offsets.mli b/asmcomp/closure_offsets.mli index 7acb8449..7ecf9c27 100644 --- a/asmcomp/closure_offsets.mli +++ b/asmcomp/closure_offsets.mli @@ -25,20 +25,3 @@ type result = private { } val compute : Flambda.program -> result - -(** If compilation unit [C] references [B], which contains functions inlined - from another compilation unit [A], then we may need to know the layout of - closures inside (or constructed by code inside) a.cmx in order to - compile c.cmx. Unfortunately a.cmx is permitted to be absent during such - compilation; c.cmx will be compiled using just b.cmx. As such, when - building the .cmx export information for a given compilation unit, we - also include information about the layout of any closures that it depends - on from other compilation units. This means that when situations as just - describe arise, we always have access to the necessary closure offsets. *) -val compute_reexported_offsets - : Flambda.program - -> current_unit_offset_fun:int Closure_id.Map.t - -> current_unit_offset_fv:int Var_within_closure.Map.t - -> imported_units_offset_fun:int Closure_id.Map.t - -> imported_units_offset_fv:int Var_within_closure.Map.t - -> int Closure_id.Map.t * int Var_within_closure.Map.t diff --git a/asmcomp/cmm.ml b/asmcomp/cmm.ml index 5b2fd6b8..939298dd 100644 --- a/asmcomp/cmm.ml +++ b/asmcomp/cmm.ml @@ -89,24 +89,21 @@ let size_machtype mty = done; !size -type comparison = - Ceq - | Cne - | Clt - | Cle - | Cgt - | Cge - -let negate_comparison = function - Ceq -> Cne | Cne -> Ceq - | Clt -> Cge | Cle -> Cgt - | Cgt -> Cle | Cge -> Clt - -let swap_comparison = function - Ceq -> Ceq | Cne -> Cne - | Clt -> Cgt | Cle -> Cge - | Cgt -> Clt | Cge -> Cle +type integer_comparison = Lambda.integer_comparison = + | Ceq | Cne | Clt | Cgt | Cle | Cge +let negate_integer_comparison = Lambda.negate_integer_comparison + +let swap_integer_comparison = Lambda.swap_integer_comparison + +(* With floats [not (x < y)] is not the same as [x >= y] due to NaNs, + so we provide additional comparisons to represent the negations.*) +type float_comparison = Lambda.float_comparison = + | CFeq | CFneq | CFlt | CFnlt | CFgt | CFngt | CFle | CFnle | CFge | CFnge + +let negate_float_comparison = Lambda.negate_float_comparison + +let swap_float_comparison = Lambda.swap_float_comparison type label = int let label_counter = ref 99 @@ -142,13 +139,13 @@ and operation = | Cstore of memory_chunk * Lambda.initialization_or_assignment | Caddi | Csubi | Cmuli | Cmulhi | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr - | Ccmpi of comparison + | Ccmpi of integer_comparison | Caddv | Cadda - | Ccmpa of comparison + | Ccmpa of integer_comparison | Cnegf | Cabsf | Caddf | Csubf | Cmulf | Cdivf | Cfloatofint | Cintoffloat - | Ccmpf of comparison + | Ccmpf of float_comparison | Craise of raise_kind | Ccheckbound @@ -173,11 +170,15 @@ type expression = | Cexit of int * expression list | Ctrywith of expression * Ident.t * expression +type codegen_option = + | Reduce_code_size + | No_CSE + type fundecl = { fun_name: string; fun_args: (Ident.t * machtype) list; fun_body: expression; - fun_fast: bool; + fun_codegen_options : codegen_option list; fun_dbg : Debuginfo.t; } diff --git a/asmcomp/cmm.mli b/asmcomp/cmm.mli index a62578f6..01fe0c93 100644 --- a/asmcomp/cmm.mli +++ b/asmcomp/cmm.mli @@ -72,16 +72,17 @@ val ge_component val size_machtype: machtype -> int -type comparison = - Ceq - | Cne - | Clt - | Cle - | Cgt - | Cge +type integer_comparison = Lambda.integer_comparison = + | Ceq | Cne | Clt | Cgt | Cle | Cge -val negate_comparison: comparison -> comparison -val swap_comparison: comparison -> comparison +val negate_integer_comparison: integer_comparison -> integer_comparison +val swap_integer_comparison: integer_comparison -> integer_comparison + +type float_comparison = Lambda.float_comparison = + | CFeq | CFneq | CFlt | CFnlt | CFgt | CFngt | CFle | CFnle | CFge | CFnge + +val negate_float_comparison: float_comparison -> float_comparison +val swap_float_comparison: float_comparison -> float_comparison type label = int val new_label: unit -> label @@ -113,14 +114,14 @@ and operation = | Cstore of memory_chunk * Lambda.initialization_or_assignment | Caddi | Csubi | Cmuli | Cmulhi | Cdivi | Cmodi | Cand | Cor | Cxor | Clsl | Clsr | Casr - | Ccmpi of comparison + | Ccmpi of integer_comparison | Caddv (* pointer addition that produces a [Val] (well-formed Caml value) *) | Cadda (* pointer addition that produces a [Addr] (derived heap pointer) *) - | Ccmpa of comparison + | Ccmpa of integer_comparison | Cnegf | Cabsf | Caddf | Csubf | Cmulf | Cdivf | Cfloatofint | Cintoffloat - | Ccmpf of comparison + | Ccmpf of float_comparison | Craise of raise_kind | Ccheckbound @@ -149,11 +150,15 @@ and expression = | Cexit of int * expression list | Ctrywith of expression * Ident.t * expression +type codegen_option = + | Reduce_code_size + | No_CSE + type fundecl = { fun_name: string; fun_args: (Ident.t * machtype) list; fun_body: expression; - fun_fast: bool; + fun_codegen_options : codegen_option list; fun_dbg : Debuginfo.t; } diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml index 483123f4..4509f58b 100644 --- a/asmcomp/cmmgen.ml +++ b/asmcomp/cmmgen.ml @@ -289,9 +289,14 @@ let mk_not dbg cmm = | Cop(Caddi, [Cop(Clsl, [c; Cconst_int 1], _); Cconst_int 1], dbg') -> begin match c with | Cop(Ccmpi cmp, [c1; c2], dbg'') -> - tag_int (Cop(Ccmpi (negate_comparison cmp), [c1; c2], dbg'')) dbg' + tag_int + (Cop(Ccmpi (negate_integer_comparison cmp), [c1; c2], dbg'')) dbg' | Cop(Ccmpa cmp, [c1; c2], dbg'') -> - tag_int (Cop(Ccmpa (negate_comparison cmp), [c1; c2], dbg'')) dbg' + tag_int + (Cop(Ccmpa (negate_integer_comparison cmp), [c1; c2], dbg'')) dbg' + | Cop(Ccmpf cmp, [c1; c2], dbg'') -> + tag_int + (Cop(Ccmpf (negate_float_comparison cmp), [c1; c2], dbg'')) dbg' | _ -> (* 0 -> 3, 1 -> 1 *) Cop(Csubi, [Cconst_int 3; Cop(Clsl, [c; Cconst_int 1], dbg)], dbg) @@ -606,7 +611,10 @@ let get_field env ptr n dbg = let set_field ptr n newval init dbg = Cop(Cstore (Word_val, init), [field_address ptr n dbg; newval], dbg) -let non_profinfo_mask = (1 lsl (64 - Config.profinfo_width)) - 1 +let non_profinfo_mask = + if Config.profinfo + then (1 lsl (64 - Config.profinfo_width)) - 1 + else 0 (* [non_profinfo_mask] is unused in this case *) let get_header ptr dbg = (* We cannot deem this as [Immutable] due to the presence of [Obj.truncate] @@ -868,13 +876,9 @@ let curry_function n = (* Comparisons *) -let transl_comparison = function - Lambda.Ceq -> Ceq - | Lambda.Cneq -> Cne - | Lambda.Cge -> Cge - | Lambda.Cgt -> Cgt - | Lambda.Cle -> Cle - | Lambda.Clt -> Clt +let transl_int_comparison cmp = cmp + +let transl_float_comparison cmp = cmp (* Translate structured constants *) @@ -1077,7 +1081,7 @@ let bigarray_indexing unsafe elt_kind layout b args dbg = and elt_size = bigarray_elt_size elt_kind in (* [array_indexing] can simplify the given expressions *) - array_indexing ~typ:Int (log2 elt_size) + array_indexing ~typ:Addr (log2 elt_size) (Cop(Cload (Word_int, Mutable), [field_address b 1 dbg], dbg)) offset dbg @@ -1102,12 +1106,13 @@ let bigarray_get unsafe elt_kind layout b args dbg = Pbigarray_complex32 | Pbigarray_complex64 -> let kind = bigarray_word_kind elt_kind in let sz = bigarray_elt_size elt_kind / 2 in - bind "addr" (bigarray_indexing unsafe elt_kind layout b args dbg) - (fun addr -> - box_complex dbg - (Cop(Cload (kind, Mutable), [addr], dbg)) - (Cop(Cload (kind, Mutable), - [Cop(Cadda, [addr; Cconst_int sz], dbg)], dbg))) + bind "addr" (bigarray_indexing unsafe elt_kind layout b args dbg) (fun addr -> + bind "reval" + (Cop(Cload (kind, Mutable), [addr], dbg)) (fun reval -> + bind "imval" + (Cop(Cload (kind, Mutable), + [Cop(Cadda, [addr; Cconst_int sz], dbg)], dbg)) (fun imval -> + box_complex dbg reval imval))) | _ -> Cop(Cload (bigarray_word_kind elt_kind, Mutable), [bigarray_indexing unsafe elt_kind layout b args dbg], @@ -1363,7 +1368,7 @@ let simplif_primitive_32bits = function | Plsrbint Pint64 -> Pccall (default_prim "caml_int64_shift_right_unsigned") | Pasrbint Pint64 -> Pccall (default_prim "caml_int64_shift_right") | Pbintcomp(Pint64, Lambda.Ceq) -> Pccall (default_prim "caml_equal") - | Pbintcomp(Pint64, Lambda.Cneq) -> Pccall (default_prim "caml_notequal") + | Pbintcomp(Pint64, Lambda.Cne) -> Pccall (default_prim "caml_notequal") | Pbintcomp(Pint64, Lambda.Clt) -> Pccall (default_prim "caml_lessthan") | Pbintcomp(Pint64, Lambda.Cgt) -> Pccall (default_prim "caml_greaterthan") | Pbintcomp(Pint64, Lambda.Cle) -> Pccall (default_prim "caml_lessequal") @@ -1373,7 +1378,8 @@ let simplif_primitive_32bits = function | Pbigarrayset(_unsafe, n, Pbigarray_int64, _layout) -> Pccall (default_prim ("caml_ba_set_" ^ string_of_int n)) | Pstring_load_64(_) -> Pccall (default_prim "caml_string_get64") - | Pstring_set_64(_) -> Pccall (default_prim "caml_string_set64") + | Pbytes_load_64(_) -> Pccall (default_prim "caml_bytes_get64") + | Pbytes_set_64(_) -> Pccall (default_prim "caml_bytes_set64") | Pbigstring_load_64(_) -> Pccall (default_prim "caml_ba_uint8_get64") | Pbigstring_set_64(_) -> Pccall (default_prim "caml_ba_uint8_set64") | Pbbswap Pint64 -> Pccall (default_prim "caml_int64_bswap") @@ -1473,6 +1479,30 @@ end (* cmm store, as sharing as normally been detected in previous phases, we only share exits *) +(* Some specific patterns can lead to switches where several cases + point to the same action, but this action is not an exit (see GPR#1370). + The addition of the index in the action array as context allows to + share them correctly without duplication. *) +module StoreExpForSwitch = + Switch.CtxStore + (struct + type t = expression + type key = int option * int + type context = int + let make_key index expr = + let continuation = + match expr with + | Cexit (i,[]) -> Some i + | _ -> None + in + Some (continuation, index) + let compare_key (cont, index) (cont', index') = + match cont, cont' with + | Some i, Some i' when i = i' -> 0 + | _, _ -> Pervasives.compare index index' + end) + +(* For string switches, we can use a generic store *) module StoreExp = Switch.Store (struct @@ -1493,10 +1523,10 @@ let transl_int_switch loc arg low high cases default = match cases with | [] -> assert false | _::_ -> let store = StoreExp.mk_store () in - assert (store.Switch.act_store default = 0) ; + assert (store.Switch.act_store () default = 0) ; let cases = List.map - (fun (i,act) -> i,store.Switch.act_store act) + (fun (i,act) -> i,store.Switch.act_store () act) cases in let rec inters plow phigh pact = function | [] -> @@ -1622,8 +1652,10 @@ let rec is_unboxed_number ~strict env e = Boxed (Boxed_integer (Pint64, dbg), false) | Pbigarrayref(_, _, Pbigarray_native_int,_) -> Boxed (Boxed_integer (Pnativeint, dbg), false) - | Pstring_load_32(_) -> Boxed (Boxed_integer (Pint32, dbg), false) - | Pstring_load_64(_) -> Boxed (Boxed_integer (Pint64, dbg), false) + | Pstring_load_32(_) | Pbytes_load_32(_) -> + Boxed (Boxed_integer (Pint32, dbg), false) + | Pstring_load_64(_) | Pbytes_load_64(_) -> + Boxed (Boxed_integer (Pint64, dbg), false) | Pbigstring_load_32(_) -> Boxed (Boxed_integer (Pint32, dbg), false) | Pbigstring_load_64(_) -> Boxed (Boxed_integer (Pint64, dbg), false) | Praise _ -> No_result @@ -2174,7 +2206,7 @@ and transl_prim_2 env p arg1 arg2 dbg = Cop(Cor, [asr_int (transl env arg1) (untag_int(transl env arg2) dbg) dbg; Cconst_int 1], dbg) | Pintcomp cmp -> - tag_int(Cop(Ccmpi(transl_comparison cmp), + tag_int(Cop(Ccmpi(transl_int_comparison cmp), [transl env arg1; transl env arg2], dbg)) dbg | Pisout -> transl_isout (transl env arg1) (transl env arg2) dbg @@ -2196,7 +2228,7 @@ and transl_prim_2 env p arg1 arg2 dbg = [transl_unbox_float dbg env arg1; transl_unbox_float dbg env arg2], dbg)) | Pfloatcomp cmp -> - tag_int(Cop(Ccmpf(transl_comparison cmp), + tag_int(Cop(Ccmpf(transl_float_comparison cmp), [transl_unbox_float dbg env arg1; transl_unbox_float dbg env arg2], dbg)) dbg @@ -2215,7 +2247,7 @@ and transl_prim_2 env p arg1 arg2 dbg = Cop(Cload (Byte_unsigned, Mutable), [add_int str idx dbg], dbg))))) dbg - | Pstring_load_16(unsafe) -> + | Pstring_load_16(unsafe) | Pbytes_load_16(unsafe) -> tag_int (bind "str" (transl env arg1) (fun str -> bind "index" (untag_int (transl env arg2) dbg) (fun idx -> @@ -2235,7 +2267,7 @@ and transl_prim_2 env p arg1 arg2 dbg = (Cconst_int 1) dbg) idx (unaligned_load_16 ba_data idx dbg))))) dbg - | Pstring_load_32(unsafe) -> + | Pstring_load_32(unsafe) | Pbytes_load_32(unsafe) -> box_int dbg Pint32 (bind "str" (transl env arg1) (fun str -> bind "index" (untag_int (transl env arg2) dbg) (fun idx -> @@ -2255,7 +2287,7 @@ and transl_prim_2 env p arg1 arg2 dbg = (Cconst_int 3) dbg) idx (unaligned_load_32 ba_data idx dbg))))) - | Pstring_load_64(unsafe) -> + | Pstring_load_64(unsafe) | Pbytes_load_64(unsafe) -> box_int dbg Pint64 (bind "str" (transl env arg1) (fun str -> bind "index" (untag_int (transl env arg2) dbg) (fun idx -> @@ -2331,18 +2363,6 @@ and transl_prim_2 env p arg1 arg2 dbg = unboxed_float_array_ref arr idx dbg)))) end - (* Operations on bitvects *) - | Pbittest -> - bind "index" (untag_int(transl env arg2) dbg) (fun idx -> - tag_int( - Cop(Cand, [Cop(Clsr, [Cop(Cload (Byte_unsigned, Mutable), - [add_int (transl env arg1) - (Cop(Clsr, [idx; Cconst_int 3], dbg)) - dbg], - dbg); - Cop(Cand, [idx; Cconst_int 7], dbg)], dbg); - Cconst_int 1], dbg)) dbg) - (* Boxed integers *) | Paddbint bi -> box_int dbg bi (Cop(Caddi, @@ -2391,7 +2411,7 @@ and transl_prim_2 env p arg1 arg2 dbg = [transl_unbox_int dbg env bi arg1; untag_int(transl env arg2) dbg], dbg)) | Pbintcomp(bi, cmp) -> - tag_int (Cop(Ccmpi(transl_comparison cmp), + tag_int (Cop(Ccmpi(transl_int_comparison cmp), [transl_unbox_int dbg env bi arg1; transl_unbox_int dbg env bi arg2], dbg)) dbg | prim -> @@ -2498,7 +2518,7 @@ and transl_prim_3 env p arg1 arg2 arg3 dbg = float_array_set arr idx newval dbg)))) end) - | Pstring_set_16(unsafe) -> + | Pbytes_set_16(unsafe) -> return_unit (bind "str" (transl env arg1) (fun str -> bind "index" (untag_int (transl env arg2) dbg) (fun idx -> @@ -2521,7 +2541,7 @@ and transl_prim_3 env p arg1 arg2 arg3 dbg = dbg) idx (unaligned_set_16 ba_data idx newval dbg)))))) - | Pstring_set_32(unsafe) -> + | Pbytes_set_32(unsafe) -> return_unit (bind "str" (transl env arg1) (fun str -> bind "index" (untag_int (transl env arg2) dbg) (fun idx -> @@ -2544,7 +2564,7 @@ and transl_prim_3 env p arg1 arg2 arg3 dbg = dbg) idx (unaligned_set_32 ba_data idx newval dbg)))))) - | Pstring_set_64(unsafe) -> + | Pbytes_set_64(unsafe) -> return_unit (bind "str" (transl env arg1) (fun str -> bind "index" (untag_int (transl env arg2) dbg) (fun idx -> @@ -2718,10 +2738,10 @@ and transl_switch loc env arg index cases = match Array.length cases with | 1 -> transl env cases.(0) | _ -> let cases = Array.map (transl env) cases in - let store = StoreExp.mk_store () in + let store = StoreExpForSwitch.mk_store () in let index = Array.map - (fun j -> store.Switch.act_store cases.(j)) + (fun j -> store.Switch.act_store j cases.(j)) index in let n_index = Array.length index in let inters = ref [] @@ -2801,10 +2821,16 @@ let transl_function f = Afl_instrument.instrument_function (transl env body) else transl env body in + let fun_codegen_options = + if !Clflags.optimize_for_speed then + [] + else + [ Reduce_code_size ] + in Cfunction {fun_name = f.label; fun_args = List.map (fun id -> (id, typ_val)) f.params; fun_body = cmm_body; - fun_fast = !Clflags.optimize_for_speed; + fun_codegen_options; fun_dbg = f.dbg} (* Translate all function definitions *) @@ -3019,18 +3045,24 @@ let emit_gc_roots_table ~symbols cont = (* Build preallocated blocks (used for Flambda [Initialize_symbol] constructs, and Clambda global module) *) -let preallocate_block cont { Clambda.symbol; exported; tag; size } = +let preallocate_block cont { Clambda.symbol; exported; tag; fields } = let space = (* These words will be registered as roots and as such must contain valid values, in case we are in no-naked-pointers mode. Likewise the block header must be black, below (see [caml_darken]), since the overall record may be referenced. *) - Array.to_list - (Array.init size (fun _index -> - Cint (Nativeint.of_int 1 (* Val_unit *)))) + List.map (fun field -> + match field with + | None -> + Cint (Nativeint.of_int 1 (* Val_unit *)) + | Some (Uconst_field_int n) -> + cint_const n + | Some (Uconst_field_ref label) -> + Csymbol_address label) + fields in let data = - Cint(black_block_header tag size) :: + Cint(black_block_header tag (List.length fields)) :: if exported then Cglobal_symbol symbol :: Cdefine_symbol symbol :: space @@ -3057,7 +3089,16 @@ let compunit (ulam, preallocated_blocks, constants) = transl empty_env ulam in let c1 = [Cfunction {fun_name = Compilenv.make_symbol (Some "entry"); fun_args = []; - fun_body = init_code; fun_fast = false; + fun_body = init_code; + (* This function is often large and run only once. + Compilation time matter more than runtime. + See MPR#7630 *) + fun_codegen_options = + if Config.flambda then [ + Reduce_code_size; + No_CSE; + ] + else [ Reduce_code_size ]; fun_dbg = Debuginfo.none }] in let c2 = emit_constants c1 constants in let c3 = transl_all_functions_and_emit_all_constants c2 in @@ -3198,7 +3239,7 @@ let send_function arity = {fun_name; fun_args = fun_args; fun_body = body; - fun_fast = true; + fun_codegen_options = []; fun_dbg = Debuginfo.none } let apply_function arity = @@ -3209,7 +3250,7 @@ let apply_function arity = {fun_name; fun_args = List.map (fun id -> (id, typ_val)) all_args; fun_body = body; - fun_fast = true; + fun_codegen_options = []; fun_dbg = Debuginfo.none; } @@ -3234,7 +3275,7 @@ let tuplify_function arity = Cop(Capply typ_val, get_field env (Cvar clos) 2 dbg :: access_components 0 @ [Cvar clos], dbg); - fun_fast = true; + fun_codegen_options = []; fun_dbg = Debuginfo.none; } @@ -3297,7 +3338,7 @@ let final_curry_function arity = "_" ^ string_of_int (arity-1); fun_args = [last_arg, typ_val; last_clos, typ_val]; fun_body = curry_fun [] last_clos (arity-1); - fun_fast = true; + fun_codegen_options = []; fun_dbg = Debuginfo.none } let rec intermediate_curry_functions arity num = @@ -3327,7 +3368,7 @@ let rec intermediate_curry_functions arity num = Cconst_symbol(name1 ^ "_" ^ string_of_int (num+1)); int_const 1; Cvar arg; Cvar clos], dbg); - fun_fast = true; + fun_codegen_options = []; fun_dbg = Debuginfo.none } :: (if arity <= max_arity_optimized && arity - num > 2 then @@ -3355,7 +3396,7 @@ let rec intermediate_curry_functions arity num = fun_args = direct_args @ [clos, typ_val]; fun_body = iter (num+1) (List.map (fun (arg,_) -> Cvar arg) direct_args) clos; - fun_fast = true; + fun_codegen_options = []; fun_dbg = Debuginfo.none } in cf :: intermediate_curry_functions arity (num+1) @@ -3418,7 +3459,7 @@ let entry_point namelist = Cfunction {fun_name = "caml_program"; fun_args = []; fun_body = body; - fun_fast = false; + fun_codegen_options = [Reduce_code_size]; fun_dbg = Debuginfo.none } (* Generate the table of globals *) diff --git a/asmcomp/compilenv.ml b/asmcomp/compilenv.ml index a1332031..0eb7eab2 100644 --- a/asmcomp/compilenv.ml +++ b/asmcomp/compilenv.ml @@ -39,7 +39,8 @@ let export_infos_table = let imported_sets_of_closures_table = (Set_of_closures_id.Tbl.create 10 - : Flambda.function_declarations option Set_of_closures_id.Tbl.t) + : Simple_value_approx.function_declarations option + Set_of_closures_id.Tbl.t) module CstMap = Map.Make(struct @@ -145,12 +146,6 @@ let current_unit_infos () = let current_unit_name () = current_unit.ui_name -let make_symbol ?(unitname = current_unit.ui_symbol) idopt = - let prefix = "caml" ^ unitname in - match idopt with - | None -> prefix - | Some id -> prefix ^ "__" ^ id - let symbol_in_current_unit name = let prefix = "caml" ^ current_unit.ui_symbol in name = prefix || @@ -278,9 +273,9 @@ let is_predefined_exception sym = let symbol_for_global' id = let sym_label = Linkage_name.create (symbol_for_global id) in if Ident.is_predef_exn id then - Symbol.unsafe_create predefined_exception_compilation_unit sym_label + Symbol.of_global_linkage predefined_exception_compilation_unit sym_label else - Symbol.unsafe_create (unit_for_global id) sym_label + Symbol.of_global_linkage (unit_for_global id) sym_label let set_global_approx approx = assert(not Config.flambda); @@ -307,14 +302,16 @@ let approx_for_global comp_unit = || not (Ident.global id) then invalid_arg (Format.asprintf "approx_for_global %a" Ident.print id); let modname = Ident.name id in - try Hashtbl.find export_infos_table modname with - | Not_found -> - let exported = match get_global_info id with - | None -> Export_info.empty - | Some ui -> get_flambda_export_info ui in - Hashtbl.add export_infos_table modname exported; - merged_environment := Export_info.merge !merged_environment exported; - exported + match Hashtbl.find export_infos_table modname with + | otherwise -> Some otherwise + | exception Not_found -> + match get_global_info id with + | None -> None + | Some ui -> + let exported = get_flambda_export_info ui in + Hashtbl.add export_infos_table modname exported; + merged_environment := Export_info.merge !merged_environment exported; + Some exported let approx_env () = !merged_environment @@ -348,16 +345,13 @@ let save_unit_info filename = current_unit.ui_imports_cmi <- Env.imports(); write_unit_info current_unit filename -let current_unit_linkage_name () = - Linkage_name.create (make_symbol ~unitname:current_unit.ui_symbol None) - let current_unit () = match Compilation_unit.get_current () with | Some current_unit -> current_unit | None -> Misc.fatal_error "Compilenv.current_unit" let current_unit_symbol () = - Symbol.unsafe_create (current_unit ()) (current_unit_linkage_name ()) + Symbol.of_global_linkage (current_unit ()) (current_unit_linkage_name ()) let const_label = ref 0 @@ -414,7 +408,7 @@ let closure_symbol fv = let linkage_name = concat_symbol unitname ((Closure_id.unique_name fv) ^ "_closure") in - Symbol.unsafe_create compilation_unit (Linkage_name.create linkage_name) + Symbol.of_global_linkage compilation_unit (Linkage_name.create linkage_name) let function_label fv = let compilation_unit = Closure_id.get_compilation_unit fv in diff --git a/asmcomp/compilenv.mli b/asmcomp/compilenv.mli index 98d5f024..4fcd55e2 100644 --- a/asmcomp/compilenv.mli +++ b/asmcomp/compilenv.mli @@ -26,7 +26,7 @@ open Cmx_format improvement feature. *) val imported_sets_of_closures_table - : Flambda.function_declarations option Set_of_closures_id.Tbl.t + : Simple_value_approx.function_declarations option Set_of_closures_id.Tbl.t (* flambda-only *) val reset: ?packname:string -> string -> unit @@ -91,7 +91,7 @@ val set_export_info: Export_info.t -> unit val approx_env: unit -> Export_info.t (* Returns all the information loaded from external compilation units flambda-only *) -val approx_for_global: Compilation_unit.t -> Export_info.t +val approx_for_global: Compilation_unit.t -> Export_info.t option (* Loads the exported information declaring the compilation_unit flambda-only *) diff --git a/asmcomp/export_info.ml b/asmcomp/export_info.ml index 82123a92..9edc7c1b 100644 --- a/asmcomp/export_info.ml +++ b/asmcomp/export_info.ml @@ -16,6 +16,8 @@ [@@@ocaml.warning "+a-4-9-30-40-41-42"] +module A = Simple_value_approx + type value_string_contents = | Contents of string | Unknown_or_mutable @@ -42,10 +44,11 @@ type descr = | Value_constptr of int | Value_float of float | Value_float_array of value_float_array - | Value_boxed_int : 'a Simple_value_approx.boxed_int * 'a -> descr + | Value_boxed_int : 'a A.boxed_int * 'a -> descr | Value_string of value_string | Value_closure of value_closure | Value_set_of_closures of value_set_of_closures + | Value_unknown_descr and value_closure = { closure_id : Closure_id.t; @@ -55,6 +58,7 @@ and value_closure = { and value_set_of_closures = { set_of_closures_id : Set_of_closures_id.t; bound_vars : approx Var_within_closure.Map.t; + free_vars : Flambda.specialised_to Variable.Map.t; results : approx Closure_id.Map.t; aliased_symbol : Symbol.t option; } @@ -98,6 +102,8 @@ let equal_set_of_closures (s1:value_set_of_closures) let equal_descr (d1:descr) (d2:descr) : bool = match d1, d2 with + | Value_unknown_descr, Value_unknown_descr -> + true | Value_block (t1, f1), Value_block (t2, f2) -> Tag.equal t1 t2 && equal_array equal_approx f1 f2 | Value_mutable_block (t1, s1), Value_mutable_block (t2, s2) -> @@ -114,7 +120,7 @@ let equal_descr (d1:descr) (d2:descr) : bool = | Value_float_array s1, Value_float_array s2 -> s1 = s2 | Value_boxed_int (t1, v1), Value_boxed_int (t2, v2) -> - Simple_value_approx.equal_boxed_int t1 v1 t2 v2 + A.equal_boxed_int t1 v1 t2 v2 | Value_string s1, Value_string s2 -> s1 = s2 | Value_closure c1, Value_closure c2 -> @@ -125,53 +131,135 @@ let equal_descr (d1:descr) (d2:descr) : bool = | ( Value_block (_, _) | Value_mutable_block (_, _) | Value_int _ | Value_char _ | Value_constptr _ | Value_float _ | Value_float_array _ | Value_boxed_int _ | Value_string _ | Value_closure _ - | Value_set_of_closures _ ), + | Value_set_of_closures _ + | Value_unknown_descr ), ( Value_block (_, _) | Value_mutable_block (_, _) | Value_int _ | Value_char _ | Value_constptr _ | Value_float _ | Value_float_array _ | Value_boxed_int _ | Value_string _ | Value_closure _ - | Value_set_of_closures _ ) -> + | Value_set_of_closures _ + | Value_unknown_descr ) -> false type t = { - sets_of_closures : Flambda.function_declarations Set_of_closures_id.Map.t; - closures : Flambda.function_declarations Closure_id.Map.t; + sets_of_closures : A.function_declarations Set_of_closures_id.Map.t; values : descr Export_id.Map.t Compilation_unit.Map.t; symbol_id : Export_id.t Symbol.Map.t; offset_fun : int Closure_id.Map.t; offset_fv : int Var_within_closure.Map.t; - constant_sets_of_closures : Set_of_closures_id.Set.t; + constant_closures : Closure_id.Set.t; + invariant_params : Variable.Set.t Variable.Map.t Set_of_closures_id.Map.t; + recursive : Variable.Set.t Set_of_closures_id.Map.t; +} + +type transient = { + sets_of_closures : A.function_declarations Set_of_closures_id.Map.t; + values : descr Export_id.Map.t Compilation_unit.Map.t; + symbol_id : Export_id.t Symbol.Map.t; invariant_params : Variable.Set.t Variable.Map.t Set_of_closures_id.Map.t; + recursive : Variable.Set.t Set_of_closures_id.Map.t; + relevant_local_closure_ids : Closure_id.Set.t; + relevant_imported_closure_ids : Closure_id.Set.t; + relevant_local_vars_within_closure : Var_within_closure.Set.t; + relevant_imported_vars_within_closure : Var_within_closure.Set.t; } let empty : t = { sets_of_closures = Set_of_closures_id.Map.empty; - closures = Closure_id.Map.empty; values = Compilation_unit.Map.empty; symbol_id = Symbol.Map.empty; offset_fun = Closure_id.Map.empty; offset_fv = Var_within_closure.Map.empty; - constant_sets_of_closures = Set_of_closures_id.Set.empty; + constant_closures = Closure_id.Set.empty; invariant_params = Set_of_closures_id.Map.empty; + recursive = Set_of_closures_id.Map.empty; } -let create ~sets_of_closures ~closures ~values ~symbol_id - ~offset_fun ~offset_fv ~constant_sets_of_closures - ~invariant_params = +let opaque_transient ~compilation_unit ~root_symbol : transient = + let export_id = Export_id.create compilation_unit in + let values = + let map = Export_id.Map.singleton export_id Value_unknown_descr in + Compilation_unit.Map.singleton compilation_unit map + in + let symbol_id = Symbol.Map.singleton root_symbol export_id in + { sets_of_closures = Set_of_closures_id.Map.empty; + values; + symbol_id; + invariant_params = Set_of_closures_id.Map.empty; + recursive = Set_of_closures_id.Map.empty; + relevant_local_closure_ids = Closure_id.Set.empty; + relevant_imported_closure_ids = Closure_id.Set.empty; + relevant_local_vars_within_closure = Var_within_closure.Set.empty; + relevant_imported_vars_within_closure = Var_within_closure.Set.empty; + } + +let create ~sets_of_closures ~values ~symbol_id + ~offset_fun ~offset_fv ~constant_closures + ~invariant_params ~recursive = { sets_of_closures; - closures; values; symbol_id; offset_fun; offset_fv; - constant_sets_of_closures; + constant_closures; invariant_params; + recursive; } -let add_clambda_info t ~offset_fun ~offset_fv ~constant_sets_of_closures = - assert (Closure_id.Map.cardinal t.offset_fun = 0); - assert (Var_within_closure.Map.cardinal t.offset_fv = 0); - assert (Set_of_closures_id.Set.cardinal t.constant_sets_of_closures = 0); - { t with offset_fun; offset_fv; constant_sets_of_closures; } +let create_transient + ~sets_of_closures ~values ~symbol_id ~invariant_params ~recursive + ~relevant_local_closure_ids ~relevant_imported_closure_ids + ~relevant_local_vars_within_closure + ~relevant_imported_vars_within_closure = + { sets_of_closures; + values; + symbol_id; + invariant_params; + recursive; + relevant_local_closure_ids; + relevant_imported_closure_ids; + relevant_local_vars_within_closure; + relevant_imported_vars_within_closure; + } + +let t_of_transient transient + ~program:_ + ~local_offset_fun ~local_offset_fv + ~imported_offset_fun ~imported_offset_fv + ~constant_closures = + let offset_fun = + let fold_map set = + Closure_id.Map.fold (fun key value unchanged -> + if Closure_id.Set.mem key set then + Closure_id.Map.add key value unchanged + else + unchanged) + in + Closure_id.Map.empty + |> fold_map transient.relevant_local_closure_ids local_offset_fun + |> fold_map transient.relevant_imported_closure_ids imported_offset_fun + in + let offset_fv = + let fold_map set = + Var_within_closure.Map.fold (fun key value unchanged -> + if Var_within_closure.Set.mem key set then + Var_within_closure.Map.add key value unchanged + else + unchanged) + in + Var_within_closure.Map.empty + |> fold_map transient.relevant_local_vars_within_closure local_offset_fv + |> fold_map transient.relevant_imported_vars_within_closure + imported_offset_fv + in + { sets_of_closures = transient.sets_of_closures; + values = transient.values; + symbol_id = transient.symbol_id; + invariant_params = transient.invariant_params; + recursive = transient.recursive; + offset_fun; + offset_fv; + constant_closures; + } let merge (t1 : t) (t2 : t) : t = let eidmap_disjoint_union ?eq map1 map2 = @@ -189,20 +277,23 @@ let merge (t1 : t) (t2 : t) : t = sets_of_closures = Set_of_closures_id.Map.disjoint_union t1.sets_of_closures t2.sets_of_closures; - closures = Closure_id.Map.disjoint_union t1.closures t2.closures; symbol_id = Symbol.Map.disjoint_union ~print:Export_id.print t1.symbol_id t2.symbol_id; offset_fun = Closure_id.Map.disjoint_union ~eq:int_eq t1.offset_fun t2.offset_fun; offset_fv = Var_within_closure.Map.disjoint_union ~eq:int_eq t1.offset_fv t2.offset_fv; - constant_sets_of_closures = - Set_of_closures_id.Set.union t1.constant_sets_of_closures - t2.constant_sets_of_closures; + constant_closures = + Closure_id.Set.union t1.constant_closures t2.constant_closures; invariant_params = Set_of_closures_id.Map.disjoint_union ~print:(Variable.Map.print Variable.Set.print) ~eq:(Variable.Map.equal Variable.Set.equal) t1.invariant_params t2.invariant_params; + recursive = + Set_of_closures_id.Map.disjoint_union + ~print:Variable.Set.print + ~eq:Variable.Set.equal + t1.recursive t2.recursive; } let find_value eid map = @@ -225,8 +316,103 @@ let nest_eid_map map = in Export_id.Map.fold add_map map Compilation_unit.Map.empty -let print_approx ppf ((t,root_symbols) : t * Symbol.t list) = - let values = t.values in +let print_raw_approx ppf approx = + let fprintf = Format.fprintf in + match approx with + | Value_unknown -> fprintf ppf "(Unknown)" + | Value_id export_id -> fprintf ppf "(Id %a)" Export_id.print export_id + | Value_symbol symbol -> fprintf ppf "(Symbol %a)" Symbol.print symbol + +let print_value_set_of_closures ppf (t : value_set_of_closures) = + let print_bound_vars ppf bound_vars = + Format.fprintf ppf "(%a)" + (Var_within_closure.Map.print print_raw_approx) + bound_vars + in + let print_free_vars ppf free_vars = + Format.fprintf ppf "(%a)" + (Variable.Map.print Flambda.print_specialised_to) + free_vars + in + let print_results ppf results = + Format.fprintf ppf "(%a)" (Closure_id.Map.print print_raw_approx) results + in + let print_aliased_symbol ppf aliased_symbol = + match aliased_symbol with + | None -> Format.fprintf ppf "" + | Some symbol -> Format.fprintf ppf "(%a)" Symbol.print symbol + in + Format.fprintf ppf + "((set_of_closures_id %a) \ + (bound_vars %a) \ + (free_vars %a) \ + (results %a) \ + (aliased_symbol %a))" + Set_of_closures_id.print t.set_of_closures_id + print_bound_vars t.bound_vars + print_free_vars t.free_vars + print_results t.results + print_aliased_symbol t.aliased_symbol + +let print_value_closure ppf (t : value_closure) = + Format.fprintf ppf "((closure_id %a) (set_of_closures %a))" + Closure_id.print t.closure_id + print_value_set_of_closures t.set_of_closures + +let print_value_float_array_contents + ppf (value : value_float_array_contents) = + match value with + | Unknown_or_mutable -> Format.fprintf ppf "(Unknown_or_mutable)" + | Contents _ -> Format.fprintf ppf "(Contents ...)" + +let print_value_float_array ppf (value : value_float_array) = + Format.fprintf ppf "((size %d) (contents %a))" + value.size + print_value_float_array_contents value.contents + +let print_value_string_contents ppf (value : value_string_contents) = + match value with + | Unknown_or_mutable -> Format.fprintf ppf "(Unknown_or_mutable)" + | Contents _ -> Format.fprintf ppf "(Contents ...)" + +let print_value_string ppf (value : value_string) = + Format.fprintf ppf "((size %d) (contents %a))" + value.size + print_value_string_contents value.contents + +let print_raw_descr ppf descr = + let fprintf = Format.fprintf in + let print_approx_array ppf arr = + Array.iter (fun approx -> fprintf ppf "%a " print_raw_approx approx) arr + in + match descr with + | Value_block (tag, approx_array) -> + fprintf ppf "(Value_block (%a %a))" + Tag.print tag + print_approx_array approx_array + | Value_mutable_block (tag, i) -> + fprintf ppf "(Value_mutable-block (%a %d))" Tag.print tag i + | Value_int i -> fprintf ppf "(Value_int %d)" i + | Value_char c -> fprintf ppf "(Value_char %c)" c + | Value_constptr p -> fprintf ppf "(Value_constptr %d)" p + | Value_float f -> fprintf ppf "(Value_float %.3f)" f + | Value_float_array value_float_array -> + fprintf ppf "(Value_float_array %a)" + print_value_float_array value_float_array + | Value_boxed_int _ -> + fprintf ppf "(Value_Boxed_int)" + | Value_string value_string -> + fprintf ppf "(Value_string %a)" print_value_string value_string + | Value_closure value_closure -> + fprintf ppf "(Value_closure %a)" + print_value_closure value_closure + | Value_set_of_closures value_set_of_closures -> + fprintf ppf "(Value_set_of_closures %a)" + print_value_set_of_closures value_set_of_closures + | Value_unknown_descr -> fprintf ppf "(Value_unknown_descr)" + +let print_approx_components ppf ~symbol_id ~values + (root_symbols : Symbol.t list) = let fprintf = Format.fprintf in let printed = ref Export_id.Set.empty in let recorded_symbol = ref Symbol.Set.empty in @@ -286,11 +472,12 @@ let print_approx ppf ((t,root_symbols) : t * Symbol.t list) = | Contents _ -> "_imm") float_array.size | Value_boxed_int (t, i) -> - let module A = Simple_value_approx in - match t with + begin match t with | A.Int32 -> Format.fprintf ppf "%li" i | A.Int64 -> Format.fprintf ppf "%Li" i | A.Nativeint -> Format.fprintf ppf "%ni" i + end + | Value_unknown_descr -> Format.fprintf ppf "?" and print_fields ppf fields = Array.iter (fun approx -> fprintf ppf "%a@ " print_approx approx) fields and print_set_of_closures ppf @@ -321,7 +508,7 @@ let print_approx ppf ((t,root_symbols) : t * Symbol.t list) = let rec print_recorded_symbols () = if not (Queue.is_empty symbols_to_print) then begin let sym = Queue.pop symbols_to_print in - begin match Symbol.Map.find sym t.symbol_id with + begin match Symbol.Map.find sym symbol_id with | exception Not_found -> () | id -> fprintf ppf "@[%a:@ %a@];@ " @@ -337,6 +524,11 @@ let print_approx ppf ((t,root_symbols) : t * Symbol.t list) = print_recorded_symbols (); fprintf ppf "@]" +let print_approx ppf ((t : t), symbols) = + let symbol_id = t.symbol_id in + let values = t.values in + print_approx_components ppf ~symbol_id ~values symbols + let print_offsets ppf (t : t) = Format.fprintf ppf "@[offset_fun:@ "; Closure_id.Map.iter (fun cid off -> @@ -349,7 +541,8 @@ let print_offsets ppf (t : t) = Format.fprintf ppf "@]@ " let print_functions ppf (t : t) = - Set_of_closures_id.Map.print Flambda.print_function_declarations ppf + Set_of_closures_id.Map.print + A.print_function_declarations ppf t.sets_of_closures let print_all ppf ((t, root_symbols) : t * Symbol.t list) = diff --git a/asmcomp/export_info.mli b/asmcomp/export_info.mli index d6fbd7ae..f93698be 100644 --- a/asmcomp/export_info.mli +++ b/asmcomp/export_info.mli @@ -19,6 +19,8 @@ (** Exported information (that is to say, information written into a .cmx file) about a compilation unit. *) +module A = Simple_value_approx + type value_string_contents = | Contents of string | Unknown_or_mutable @@ -45,10 +47,11 @@ type descr = | Value_constptr of int | Value_float of float | Value_float_array of value_float_array - | Value_boxed_int : 'a Simple_value_approx.boxed_int * 'a -> descr + | Value_boxed_int : 'a A.boxed_int * 'a -> descr | Value_string of value_string | Value_closure of value_closure | Value_set_of_closures of value_set_of_closures + | Value_unknown_descr and value_closure = { closure_id : Closure_id.t; @@ -58,6 +61,7 @@ and value_closure = { and value_set_of_closures = { set_of_closures_id : Set_of_closures_id.t; bound_vars : approx Var_within_closure.Map.t; + free_vars : Flambda.specialised_to Variable.Map.t; results : approx Closure_id.Map.t; aliased_symbol : Symbol.t option; } @@ -76,10 +80,8 @@ and approx = (** A structure that describes what a single compilation unit exports. *) type t = private { - sets_of_closures : Flambda.function_declarations Set_of_closures_id.Map.t; + sets_of_closures : A.function_declarations Set_of_closures_id.Map.t; (** Code of exported functions indexed by set of closures IDs. *) - closures : Flambda.function_declarations Closure_id.Map.t; - (** Code of exported functions indexed by closure IDs. *) values : descr Export_id.Map.t Compilation_unit.Map.t; (** Structure of exported values. *) symbol_id : Export_id.t Symbol.Map.t; @@ -88,28 +90,58 @@ type t = private { (** Positions of function pointers in their closures. *) offset_fv : int Var_within_closure.Map.t; (** Positions of value pointers in their closures. *) - constant_sets_of_closures : Set_of_closures_id.Set.t; + constant_closures : Closure_id.Set.t; (* CR-soon mshinwell for pchambart: Add comment *) invariant_params : Variable.Set.t Variable.Map.t Set_of_closures_id.Map.t; (* Function parameters known to be invariant (see [Invariant_params]) indexed by set of closures ID. *) + recursive : Variable.Set.t Set_of_closures_id.Map.t; +} + +type transient = private { + sets_of_closures : A.function_declarations Set_of_closures_id.Map.t; + values : descr Export_id.Map.t Compilation_unit.Map.t; + symbol_id : Export_id.t Symbol.Map.t; + invariant_params : Variable.Set.t Variable.Map.t Set_of_closures_id.Map.t; + recursive : Variable.Set.t Set_of_closures_id.Map.t; + relevant_local_closure_ids : Closure_id.Set.t; + relevant_imported_closure_ids : Closure_id.Set.t; + relevant_local_vars_within_closure : Var_within_closure.Set.t; + relevant_imported_vars_within_closure : Var_within_closure.Set.t; } (** Export information for a compilation unit that exports nothing. *) val empty : t +val opaque_transient + : compilation_unit:Compilation_unit.t + -> root_symbol:Symbol.t + -> transient + (** Create a new export information structure. *) val create - : sets_of_closures:Flambda.function_declarations Set_of_closures_id.Map.t - -> closures:Flambda.function_declarations Closure_id.Map.t + : sets_of_closures:(A.function_declarations Set_of_closures_id.Map.t) -> values:descr Export_id.Map.t Compilation_unit.Map.t -> symbol_id:Export_id.t Symbol.Map.t -> offset_fun:int Closure_id.Map.t -> offset_fv:int Var_within_closure.Map.t - -> constant_sets_of_closures:Set_of_closures_id.Set.t + -> constant_closures:Closure_id.Set.t -> invariant_params:Variable.Set.t Variable.Map.t Set_of_closures_id.Map.t + -> recursive:Variable.Set.t Set_of_closures_id.Map.t -> t +val create_transient + : sets_of_closures:(A.function_declarations Set_of_closures_id.Map.t) + -> values:descr Export_id.Map.t Compilation_unit.Map.t + -> symbol_id:Export_id.t Symbol.Map.t + -> invariant_params:Variable.Set.t Variable.Map.t Set_of_closures_id.Map.t + -> recursive:Variable.Set.t Set_of_closures_id.Map.t + -> relevant_local_closure_ids: Closure_id.Set.t + -> relevant_imported_closure_ids : Closure_id.Set.t + -> relevant_local_vars_within_closure : Var_within_closure.Set.t + -> relevant_imported_vars_within_closure : Var_within_closure.Set.t + -> transient + (* CR-someday pchambart: Should we separate [t] in 2 types: one created by the current [create] function, returned by [Build_export_info]. And another built using t and offset_informations returned by @@ -119,11 +151,14 @@ val create (** Record information about the layout of closures and which sets of closures are constant. These are all worked out during the [Flambda_to_clambda] pass. *) -val add_clambda_info - : t - -> offset_fun:int Closure_id.Map.t - -> offset_fv:int Var_within_closure.Map.t - -> constant_sets_of_closures:Set_of_closures_id.Set.t +val t_of_transient + : transient + -> program: Flambda.program + -> local_offset_fun:int Closure_id.Map.t + -> local_offset_fv:int Var_within_closure.Map.t + -> imported_offset_fun:int Closure_id.Map.t + -> imported_offset_fv:int Var_within_closure.Map.t + -> constant_closures:Closure_id.Set.t -> t (** Union of export information. Verifies that there are no identifier @@ -143,7 +178,18 @@ val nest_eid_map (**/**) (* Debug printing functions. *) +val print_approx_components + : Format.formatter + -> symbol_id: Export_id.t Symbol.Map.t + -> values: descr Export_id.Map.t Compilation_unit.Map.t + -> Symbol.t list + -> unit val print_approx : Format.formatter -> t * Symbol.t list -> unit val print_functions : Format.formatter -> t -> unit val print_offsets : Format.formatter -> t -> unit val print_all : Format.formatter -> t * Symbol.t list -> unit + +(** Prints approx and descr as it is, without recursively looking up + [Export_id.t] *) +val print_raw_approx : Format.formatter -> approx -> unit +val print_raw_descr : Format.formatter -> descr -> unit diff --git a/asmcomp/export_info_for_pack.ml b/asmcomp/export_info_for_pack.ml index da413408..42a81553 100644 --- a/asmcomp/export_info_for_pack.ml +++ b/asmcomp/export_info_for_pack.ml @@ -16,11 +16,13 @@ [@@@ocaml.warning "+a-4-9-30-40-41-42"] +module A = Simple_value_approx + let rename_id_state = Export_id.Tbl.create 100 let rename_set_of_closures_id_state = Set_of_closures_id.Tbl.create 10 let imported_function_declarations_table = (Set_of_closures_id.Tbl.create 10 - : Flambda.function_declarations Set_of_closures_id.Tbl.t) + : A.function_declarations Set_of_closures_id.Tbl.t) (* Rename export identifiers' compilation units to denote that they now live within a pack. *) @@ -82,6 +84,7 @@ let import_set_of_closures units pack bound_vars = Var_within_closure.Map.map (import_approx_for_pack units pack) set_of_closures.bound_vars; + free_vars = set_of_closures.free_vars; results = Closure_id.Map.map (import_approx_for_pack units pack) set_of_closures.results; @@ -111,6 +114,7 @@ let import_descr_for_pack units pack (descr : Export_info.descr) } | Value_set_of_closures set_of_closures -> Value_set_of_closures (import_set_of_closures units pack set_of_closures) + | Value_unknown_descr -> Value_unknown_descr let rec import_code_for_pack units pack expr = Flambda_iterators.map_named (function @@ -124,7 +128,7 @@ let rec import_code_for_pack units pack expr = ~specialised_args:set_of_closures.specialised_args ~direct_call_surrogates:set_of_closures.direct_call_surrogates ~function_decls: - (import_function_declarations_for_pack units pack + (import_function_declarations_for_pack_aux units pack set_of_closures.function_decls) in Set_of_closures set_of_closures @@ -134,13 +138,15 @@ let rec import_code_for_pack units pack expr = and import_function_declarations_for_pack_aux units pack (function_decls : Flambda.function_declarations) = let funs = - Variable.Map.map (fun (function_decl : Flambda.function_declaration) -> + Variable.Map.map + (fun (function_decl : Flambda.function_declaration) -> Flambda.create_function_declaration ~params:function_decl.params ~body:(import_code_for_pack units pack function_decl.body) ~stub:function_decl.stub ~dbg:function_decl.dbg ~inline:function_decl.inline ~specialise:function_decl.specialise - ~is_a_functor:function_decl.is_a_functor) + ~is_a_functor:function_decl.is_a_functor + ~closure_origin:function_decl.closure_origin) function_decls.funs in Flambda.import_function_declarations_for_pack @@ -148,8 +154,22 @@ and import_function_declarations_for_pack_aux units pack (import_set_of_closures_id_for_pack units pack) (import_set_of_closures_origin_for_pack units pack) -and import_function_declarations_for_pack units pack - (function_decls:Flambda.function_declarations) = +let import_function_declarations_for_pack_aux units pack + (function_decls : A.function_declarations) : A.function_declarations = + let funs = + Variable.Map.map + (fun (function_decl : A.function_declaration) -> + A.update_function_declaration_body function_decl + (fun body -> import_code_for_pack units pack body)) + function_decls.funs + in + A.import_function_declarations_for_pack + (A.update_function_declarations function_decls ~funs) + (import_set_of_closures_id_for_pack units pack) + (import_set_of_closures_origin_for_pack units pack) + +let import_function_declarations_approx_for_pack units pack + (function_decls: A.function_declarations) = let original_set_of_closures_id = function_decls.set_of_closures_id in try Set_of_closures_id.Tbl.find imported_function_declarations_table @@ -183,7 +203,7 @@ let import_for_pack ~pack_units ~pack (exp : Export_info.t) = import_set_of_closures_id_for_pack pack_units pack in let import_function_declarations = - import_function_declarations_for_pack pack_units pack + import_function_declarations_approx_for_pack pack_units pack in let sets_of_closures = Set_of_closures_id.Map.map_keys import_set_of_closures_id @@ -192,18 +212,18 @@ let import_for_pack ~pack_units ~pack (exp : Export_info.t) = exp.sets_of_closures) in Export_info.create ~sets_of_closures - ~closures:(Flambda_utils.make_closure_map' sets_of_closures) ~offset_fun:exp.offset_fun ~offset_fv:exp.offset_fv ~values:(import_eidmap import_descr exp.values) ~symbol_id:(Symbol.Map.map_keys import_sym (Symbol.Map.map import_eid exp.symbol_id)) - ~constant_sets_of_closures: - (Set_of_closures_id.Set.map import_set_of_closures_id - exp.constant_sets_of_closures) + ~constant_closures:exp.constant_closures ~invariant_params: (Set_of_closures_id.Map.map_keys import_set_of_closures_id exp.invariant_params) + ~recursive: + (Set_of_closures_id.Map.map_keys import_set_of_closures_id + exp.recursive) let clear_import_state () = Set_of_closures_id.Tbl.clear imported_function_declarations_table; diff --git a/asmcomp/flambda_to_clambda.ml b/asmcomp/flambda_to_clambda.ml index b9d2beb9..713e87bf 100644 --- a/asmcomp/flambda_to_clambda.ml +++ b/asmcomp/flambda_to_clambda.ml @@ -16,28 +16,25 @@ [@@@ocaml.warning "+a-4-9-30-40-41-42"] -type for_one_or_more_units = { +type 'a for_one_or_more_units = { fun_offset_table : int Closure_id.Map.t; fv_offset_table : int Var_within_closure.Map.t; - closures : Flambda.function_declarations Closure_id.Map.t; - constant_sets_of_closures : Set_of_closures_id.Set.t; + constant_closures : Closure_id.Set.t; + closures: Closure_id.Set.t; } type t = { - current_unit : for_one_or_more_units; - imported_units : for_one_or_more_units; + current_unit : Set_of_closures_id.t for_one_or_more_units; + imported_units : Simple_value_approx.function_declarations for_one_or_more_units; } -type ('a, 'b) declaration_position = - | Current_unit of 'a - | Imported_unit of 'b - | Not_declared - let get_fun_offset t closure_id = let fun_offset_table = if Closure_id.in_compilation_unit closure_id (Compilenv.current_unit ()) - then t.current_unit.fun_offset_table - else t.imported_units.fun_offset_table + then + t.current_unit.fun_offset_table + else + t.imported_units.fun_offset_table in try Closure_id.Map.find closure_id fun_offset_table with Not_found -> @@ -56,23 +53,12 @@ let get_fv_offset t var_within_closure = Misc.fatal_errorf "Flambda_to_clambda: missing offset for variable %a" Var_within_closure.print var_within_closure -let function_declaration_position t closure_id = - try - Current_unit (Closure_id.Map.find closure_id t.current_unit.closures) - with Not_found -> - try - Imported_unit (Closure_id.Map.find closure_id t.imported_units.closures) - with Not_found -> Not_declared - let is_function_constant t closure_id = - match function_declaration_position t closure_id with - | Current_unit { set_of_closures_id } -> - Set_of_closures_id.Set.mem set_of_closures_id - t.current_unit.constant_sets_of_closures - | Imported_unit { set_of_closures_id } -> - Set_of_closures_id.Set.mem set_of_closures_id - t.imported_units.constant_sets_of_closures - | Not_declared -> + if Closure_id.Set.mem closure_id t.current_unit.closures then + Closure_id.Set.mem closure_id t.current_unit.constant_closures + else if Closure_id.Set.mem closure_id t.imported_units.closures then + Closure_id.Set.mem closure_id t.imported_units.constant_closures + else Misc.fatal_errorf "Flambda_to_clambda: missing closure %a" Closure_id.print closure_id @@ -156,14 +142,14 @@ end = struct let ident_for_var_exn t id = Variable.Map.find id t.var let add_fresh_ident t var = - let id = Ident.create (Variable.unique_name var) in + let id = Ident.create (Variable.name var) in id, { t with var = Variable.Map.add var id t.var } let ident_for_mutable_var_exn t mut_var = Mutable_variable.Map.find mut_var t.mutable_var let add_fresh_mutable_ident t mut_var = - let id = Mutable_variable.unique_ident mut_var in + let id = Ident.create (Mutable_variable.name mut_var) in let mutable_var = Mutable_variable.Map.add mut_var id t.mutable_var in id, { t with mutable_var; } @@ -411,13 +397,28 @@ and to_clambda_switch t env cases num_keys default = if Numbers.Int.Set.cardinal num_keys = 0 then 0 else Numbers.Int.Set.max_elt num_keys + 1 in - let index = Array.make num_keys 0 in let store = Flambda_utils.Switch_storer.mk_store () in - begin match default with - | Some def when List.length cases < num_keys -> ignore (store.act_store def) - | _ -> () + let default_action = + match default with + | Some def when List.length cases < num_keys -> + store.act_store () def + | _ -> -1 + in + let index = Array.make num_keys default_action in + let smallest_key = ref num_keys in + List.iter + (fun (key, lam) -> + index.(key) <- store.act_store () lam; + smallest_key := min key !smallest_key + ) + cases; + if !smallest_key < num_keys then begin + let action = ref index.(!smallest_key) in + Array.iteri + (fun i act -> + if act >= 0 then action := act else index.(i) <- !action) + index end; - List.iter (fun (key, lam) -> index.(key) <- store.act_store lam) cases; let actions = Array.map (to_clambda t env) (store.act_get ()) in match actions with | [| |] -> [| |], [| |] (* May happen when [default] is [None]. *) @@ -569,7 +570,7 @@ and to_clambda_closed_set_of_closures t env symbol let to_clambda_initialize_symbol t env symbol fields : Clambda.ulambda = let fields = - List.mapi (fun index expr -> index, to_clambda t env expr) fields + List.map (fun (index, expr) -> index, to_clambda t env expr) fields in let build_setfield (index, field) : Clambda.ulambda = (* Note that this will never cause a write barrier hit, owing to @@ -602,7 +603,9 @@ let accumulate_structured_constants t env symbol let to_clambda_program t env constants (program : Flambda.program) = let rec loop env constants (program : Flambda.program_body) - : Clambda.ulambda * Clambda.ustructured_constant Symbol.Map.t = + : Clambda.ulambda * + Clambda.ustructured_constant Symbol.Map.t * + Clambda.preallocated_block list = match program with | Let_symbol (symbol, alloc, program) -> (* Useful only for unboxing. Since floats and boxed integers will @@ -624,19 +627,52 @@ let to_clambda_program t env constants (program : Flambda.program) = constants defs in loop env constants program - | Initialize_symbol (symbol, _tag, fields, program) -> - (* The tag is ignored here: It is used separately to generate the - preallocated block. Only the initialisation code is generated - here. *) - let e1 = to_clambda_initialize_symbol t env symbol fields in - let e2, constants = loop env constants program in - Usequence (e1, e2), constants + | Initialize_symbol (symbol, tag, fields, program) -> + let fields = + List.mapi (fun i field -> + i, field, + Initialize_symbol_to_let_symbol.constant_field field) + fields + in + let init_fields = + Misc.Stdlib.List.filter_map (function + | (i, field, None) -> Some (i, field) + | (_, _, Some _) -> None) + fields + in + let constant_fields = + List.map (fun (_, _, constant_field) -> + match constant_field with + | None -> None + | Some (Flambda.Const const) -> + let n = + match const with + | Int i -> i + | Char c -> Char.code c + | Const_pointer i -> i + in + Some (Clambda.Uconst_field_int n) + | Some (Flambda.Symbol sym) -> + let lbl = Linkage_name.to_string (Symbol.label sym) in + Some (Clambda.Uconst_field_ref lbl)) + fields + in + let e1 = to_clambda_initialize_symbol t env symbol init_fields in + let preallocated_block : Clambda.preallocated_block = + { symbol = Linkage_name.to_string (Symbol.label symbol); + exported = true; + tag = Tag.to_int tag; + fields = constant_fields; + } + in + let e2, constants, preallocated_blocks = loop env constants program in + Usequence (e1, e2), constants, preallocated_block :: preallocated_blocks | Effect (expr, program) -> let e1 = to_clambda t env expr in - let e2, constants = loop env constants program in - Usequence (e1, e2), constants + let e2, constants, preallocated_blocks = loop env constants program in + Usequence (e1, e2), constants, preallocated_blocks | End _ -> - Uconst (Uconst_ptr 0), constants + Uconst (Uconst_ptr 0), constants, [] in loop env constants program.program_body @@ -647,49 +683,52 @@ type result = { exported : Export_info.t; } -let convert (program, exported) : result = +let convert (program, exported_transient) : result = let current_unit = + let closures = + Closure_id.Map.keys (Flambda_utils.make_closure_map program) + in + let constant_closures = + Flambda_utils.all_lifted_constant_closures program + in let offsets = Closure_offsets.compute program in { fun_offset_table = offsets.function_offsets; fv_offset_table = offsets.free_variable_offsets; - closures = Flambda_utils.make_closure_map program; - constant_sets_of_closures = - Flambda_utils.all_lifted_constant_sets_of_closures program; + constant_closures; + closures; } in let imported_units = let imported = Compilenv.approx_env () in + let closures = + Set_of_closures_id.Map.fold + (fun (_ : Set_of_closures_id.t) fun_decls acc -> + Variable.Map.fold + (fun var (_ : Simple_value_approx.function_declaration) acc -> + let closure_id = Closure_id.wrap var in + Closure_id.Set.add closure_id acc) + fun_decls.Simple_value_approx.funs + acc) + imported.sets_of_closures + Closure_id.Set.empty + in { fun_offset_table = imported.offset_fun; fv_offset_table = imported.offset_fv; - closures = imported.closures; - constant_sets_of_closures = imported.constant_sets_of_closures; + constant_closures = imported.constant_closures; + closures; } in let t = { current_unit; imported_units; } in - let preallocated_blocks = - List.map (fun (symbol, tag, fields) -> - { Clambda. - symbol = Linkage_name.to_string (Symbol.label symbol); - exported = true; - tag = Tag.to_int tag; - size = List.length fields; - }) - (Flambda_utils.initialize_symbols program) - in - let expr, structured_constants = + let expr, structured_constants, preallocated_blocks = to_clambda_program t Env.empty Symbol.Map.empty program in - let offset_fun, offset_fv = - Closure_offsets.compute_reexported_offsets program - ~current_unit_offset_fun:current_unit.fun_offset_table - ~current_unit_offset_fv:current_unit.fv_offset_table - ~imported_units_offset_fun:imported_units.fun_offset_table - ~imported_units_offset_fv:imported_units.fv_offset_table - in let exported = - Export_info.add_clambda_info exported - ~offset_fun - ~offset_fv - ~constant_sets_of_closures:current_unit.constant_sets_of_closures + Export_info.t_of_transient exported_transient + ~program + ~local_offset_fun:current_unit.fun_offset_table + ~local_offset_fv:current_unit.fv_offset_table + ~imported_offset_fun:imported_units.fun_offset_table + ~imported_offset_fv:imported_units.fv_offset_table + ~constant_closures:current_unit.constant_closures in { expr; preallocated_blocks; structured_constants; exported; } diff --git a/asmcomp/flambda_to_clambda.mli b/asmcomp/flambda_to_clambda.mli index 39cbc40f..8c493d40 100644 --- a/asmcomp/flambda_to_clambda.mli +++ b/asmcomp/flambda_to_clambda.mli @@ -35,4 +35,4 @@ type result = { For direct calls, the hidden closure parameter is added. Switch tables are also built. *) -val convert : Flambda.program * Export_info.t -> result +val convert : Flambda.program * Export_info.transient -> result diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp index 0d984d5d..2d633f55 100644 --- a/asmcomp/i386/emit.mlp +++ b/asmcomp/i386/emit.mlp @@ -356,7 +356,7 @@ let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false (* Emit the code for a floating-point comparison *) -let emit_float_test cmp neg arg lbl = +let emit_float_test cmp arg lbl = let actual_cmp = match (is_tos arg.(0), is_tos arg.(1)) with | (true, true) -> @@ -370,7 +370,7 @@ let emit_float_test cmp neg arg lbl = | (false, true) -> (* second arg on top of FP stack *) I.fcomp (reg arg.(0)); - Cmm.swap_comparison cmp + Cmm.swap_float_comparison cmp | (false, false) -> I.fld (reg arg.(0)); I.fcomp (reg arg.(1)); @@ -378,49 +378,44 @@ let emit_float_test cmp neg arg lbl = in I.fnstsw ax; match actual_cmp with - | Ceq -> - if neg then begin - I.and_ (int 68) ah; - I.xor (int 64) ah; - I.jne lbl - end else begin - I.and_ (int 69) ah; - I.cmp (int 64) ah; - I.je lbl - end - | Cne -> - if neg then begin - I.and_ (int 69) ah; - I.cmp (int 64) ah; - I.je lbl - end else begin - I.and_ (int 68) ah; - I.xor (int 64) ah; - I.jne lbl - end - | Cle -> + | CFeq -> + I.and_ (int 69) ah; + I.cmp (int 64) ah; + I.je lbl + | CFneq -> + I.and_ (int 68) ah; + I.xor (int 64) ah; + I.jne lbl + | CFle -> I.and_ (int 69) ah; I.dec ah; I.cmp (int 64) ah; - if neg - then I.jae lbl - else I.jb lbl - | Cge -> + I.jb lbl + | CFnle -> + I.and_ (int 69) ah; + I.dec ah; + I.cmp (int 64) ah; + I.jae lbl + | CFge -> + I.and_ (int 5) ah; + I.je lbl + | CFnge -> I.and_ (int 5) ah; - if neg - then I.jne lbl - else I.je lbl - | Clt -> + I.jne lbl + | CFlt -> I.and_ (int 69) ah; I.cmp (int 1) ah; - if neg - then I.jne lbl - else I.je lbl - | Cgt -> + I.je lbl + | CFnlt -> + I.and_ (int 69) ah; + I.cmp (int 1) ah; + I.jne lbl + | CFgt -> + I.and_ (int 69) ah; + I.je lbl + | CFngt -> I.and_ (int 69) ah; - if neg - then I.jne lbl - else I.je lbl + I.jne lbl (* Emit a Ifloatspecial instruction *) @@ -825,8 +820,8 @@ let emit_instr fallthrough i = | Iinttest_imm(cmp, n) -> I.cmp (int n) (reg i.arg.(0)); I.j (cond cmp) lbl - | Ifloattest(cmp, neg) -> - emit_float_test cmp neg i.arg lbl + | Ifloattest cmp -> + emit_float_test cmp i.arg lbl | Ioddtest -> I.test (int 1) (reg i.arg.(0)); I.jne lbl diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml index 9350fc96..7e883fc7 100644 --- a/asmcomp/i386/proc.ml +++ b/asmcomp/i386/proc.ml @@ -191,7 +191,7 @@ let destroyed_at_oper = function | Iop(Ialloc _ | Iintop Imulh) -> [| eax |] | Iop(Iintop(Icomp _) | Iintop_imm(Icomp _, _)) -> [| eax |] | Iop(Iintoffloat) -> [| eax |] - | Iifthenelse(Ifloattest(_, _), _, _) -> [| eax |] + | Iifthenelse(Ifloattest _, _, _) -> [| eax |] | _ -> [||] let destroyed_at_raise = all_phys_regs diff --git a/asmcomp/import_approx.ml b/asmcomp/import_approx.ml index 0ab09ca0..64fbbb8b 100644 --- a/asmcomp/import_approx.ml +++ b/asmcomp/import_approx.ml @@ -19,11 +19,11 @@ module A = Simple_value_approx let import_set_of_closures = - let import_function_declarations (clos : Flambda.function_declarations) - : Flambda.function_declarations = + let import_function_declarations (clos : A.function_declarations) + : A.function_declarations = (* CR-soon mshinwell for pchambart: Do we still need to do this rewriting? I'm wondering if maybe we don't have to any more. *) - let sym_to_fun_var_map (clos : Flambda.function_declarations) = + let sym_to_fun_var_map (clos : A.function_declarations) = Variable.Map.fold (fun fun_var _ acc -> let closure_id = Closure_id.wrap fun_var in let sym = Compilenv.closure_symbol closure_id in @@ -40,118 +40,144 @@ let import_set_of_closures = | named -> named in let funs = - Variable.Map.map (fun (function_decl : Flambda.function_declaration) -> - let body = - Flambda_iterators.map_toplevel_named f_named function_decl.body - in - Flambda.create_function_declaration ~params:function_decl.params - ~body ~stub:function_decl.stub ~dbg:function_decl.dbg - ~inline:function_decl.inline - ~specialise:function_decl.specialise - ~is_a_functor:function_decl.is_a_functor) + Variable.Map.map (fun (function_decl : A.function_declaration) -> + A.update_function_declaration_body function_decl + (Flambda_iterators.map_toplevel_named f_named)) clos.funs in - Flambda.update_function_declarations clos ~funs + A.update_function_declarations clos ~funs in let aux set_of_closures_id = - ignore (Compilenv.approx_for_global - (Set_of_closures_id.get_compilation_unit set_of_closures_id)); - let ex_info = Compilenv.approx_env () in - let function_declarations = + match + Compilenv.approx_for_global + (Set_of_closures_id.get_compilation_unit set_of_closures_id) + with + | None -> None + | Some ex_info -> try - Some (Set_of_closures_id.Map.find set_of_closures_id - ex_info.sets_of_closures) + let function_declarations = + Set_of_closures_id.Map.find set_of_closures_id + ex_info.sets_of_closures + in + Some (import_function_declarations function_declarations) with Not_found -> - None - in - match function_declarations with - | None -> None - | Some function_declarations -> - Some (import_function_declarations function_declarations) + Misc.fatal_error "Cannot find set of closures" in Set_of_closures_id.Tbl.memoize Compilenv.imported_sets_of_closures_table aux let rec import_ex ex = - ignore (Compilenv.approx_for_global (Export_id.get_compilation_unit ex)); - let ex_info = Compilenv.approx_env () in - let import_value_set_of_closures ~set_of_closures_id ~bound_vars + let import_value_set_of_closures ~set_of_closures_id ~bound_vars ~free_vars ~(ex_info : Export_info.t) ~what : A.value_set_of_closures option = let bound_vars = Var_within_closure.Map.map import_approx bound_vars in - match - Set_of_closures_id.Map.find set_of_closures_id ex_info.invariant_params - with - | exception Not_found -> - Misc.fatal_errorf "Set of closures ID %a not found in invariant_params \ - (when importing [%a: %s])" - Set_of_closures_id.print set_of_closures_id - Export_id.print ex - what - | invariant_params -> - match import_set_of_closures set_of_closures_id with - | None -> None - | Some function_decls -> - Some (A.create_value_set_of_closures - ~function_decls - ~bound_vars - ~invariant_params:(lazy invariant_params) - ~specialised_args:Variable.Map.empty - ~freshening:Freshening.Project_var.empty - ~direct_call_surrogates:Closure_id.Map.empty) + match import_set_of_closures set_of_closures_id with + | None -> None + | Some function_decls -> + (* CR-someday xclerc: add a test to the test suite to ensure that + classic mode behaves as expected. *) + let is_classic_mode = function_decls.is_classic_mode in + let invariant_params = + match + Set_of_closures_id.Map.find set_of_closures_id + ex_info.invariant_params + with + | exception Not_found -> + if is_classic_mode then + Variable.Map.empty + else + Misc.fatal_errorf "Set of closures ID %a not found in \ + invariant_params (when importing [%a: %s])" + Set_of_closures_id.print set_of_closures_id + Export_id.print ex + what + | found -> found + in + let recursive = + match + Set_of_closures_id.Map.find set_of_closures_id ex_info.recursive + with + | exception Not_found -> + if is_classic_mode then + Variable.Set.empty + else + Misc.fatal_errorf "Set of closures ID %a not found in \ + recursive (when importing [%a: %s])" + Set_of_closures_id.print set_of_closures_id + Export_id.print ex + what + | found -> found + in + Some (A.create_value_set_of_closures + ~function_decls + ~bound_vars + ~free_vars + ~invariant_params:(lazy invariant_params) + ~recursive:(lazy recursive) + ~specialised_args:Variable.Map.empty + ~freshening:Freshening.Project_var.empty + ~direct_call_surrogates:Closure_id.Map.empty) in - match Export_info.find_description ex_info ex with - | exception Not_found -> A.value_unknown Other - | Value_int i -> A.value_int i - | Value_char c -> A.value_char c - | Value_constptr i -> A.value_constptr i - | Value_float f -> A.value_float f - | Value_float_array float_array -> - begin match float_array.contents with - | Unknown_or_mutable -> - A.value_mutable_float_array ~size:float_array.size - | Contents contents -> - A.value_immutable_float_array - (Array.map (function - | None -> A.value_any_float - | Some f -> A.value_float f) - contents) - end - | Export_info.Value_boxed_int (t, i) -> A.value_boxed_int t i - | Value_string { size; contents } -> - let contents = - match contents with - | Unknown_or_mutable -> None - | Contents contents -> Some contents - in - A.value_string size contents - | Value_mutable_block _ -> A.value_unknown Other - | Value_block (tag, fields) -> - A.value_block tag (Array.map import_approx fields) - | Value_closure { closure_id; - set_of_closures = - { set_of_closures_id; bound_vars; aliased_symbol } } -> - let value_set_of_closures = - import_value_set_of_closures ~set_of_closures_id ~bound_vars ~ex_info - ~what:(Format.asprintf "Value_closure %a" Closure_id.print closure_id) - in - begin match value_set_of_closures with - | None -> A.value_unresolved (Set_of_closures_id set_of_closures_id) - | Some value_set_of_closures -> - A.value_closure ?set_of_closures_symbol:aliased_symbol - value_set_of_closures closure_id - end - | Value_set_of_closures { set_of_closures_id; bound_vars; aliased_symbol } -> - let value_set_of_closures = - import_value_set_of_closures ~set_of_closures_id ~bound_vars ~ex_info - ~what:"Value_set_of_closures" - in - match value_set_of_closures with - | None -> - A.value_unresolved (Set_of_closures_id set_of_closures_id) - | Some value_set_of_closures -> - let approx = A.value_set_of_closures value_set_of_closures in - match aliased_symbol with - | None -> approx - | Some symbol -> A.augment_with_symbol approx symbol + let compilation_unit = Export_id.get_compilation_unit ex in + match Compilenv.approx_for_global compilation_unit with + | None -> A.value_unknown Other + | Some ex_info -> + match Export_info.find_description ex_info ex with + | exception Not_found -> + Misc.fatal_errorf "Cannot find export id %a" Export_id.print ex + | Value_unknown_descr -> A.value_unknown Other + | Value_int i -> A.value_int i + | Value_char c -> A.value_char c + | Value_constptr i -> A.value_constptr i + | Value_float f -> A.value_float f + | Value_float_array float_array -> + begin match float_array.contents with + | Unknown_or_mutable -> + A.value_mutable_float_array ~size:float_array.size + | Contents contents -> + A.value_immutable_float_array + (Array.map (function + | None -> A.value_any_float + | Some f -> A.value_float f) + contents) + end + | Export_info.Value_boxed_int (t, i) -> A.value_boxed_int t i + | Value_string { size; contents } -> + let contents = + match contents with + | Unknown_or_mutable -> None + | Contents contents -> Some contents + in + A.value_string size contents + | Value_mutable_block _ -> A.value_unknown Other + | Value_block (tag, fields) -> + A.value_block tag (Array.map import_approx fields) + | Value_closure { closure_id; + set_of_closures = + { set_of_closures_id; bound_vars; free_vars; aliased_symbol } } -> + let value_set_of_closures = + import_value_set_of_closures + ~set_of_closures_id ~bound_vars ~free_vars ~ex_info + ~what:(Format.asprintf "Value_closure %a" Closure_id.print closure_id) + in + begin match value_set_of_closures with + | None -> A.value_unresolved (Set_of_closures_id set_of_closures_id) + | Some value_set_of_closures -> + A.value_closure ?set_of_closures_symbol:aliased_symbol + value_set_of_closures closure_id + end + | Value_set_of_closures + { set_of_closures_id; bound_vars; free_vars; aliased_symbol } -> + let value_set_of_closures = + import_value_set_of_closures ~set_of_closures_id + ~bound_vars ~free_vars ~ex_info ~what:"Value_set_of_closures" + in + match value_set_of_closures with + | None -> + A.value_unresolved (Set_of_closures_id set_of_closures_id) + | Some value_set_of_closures -> + let approx = A.value_set_of_closures value_set_of_closures in + match aliased_symbol with + | None -> approx + | Some symbol -> A.augment_with_symbol approx symbol and import_approx (ap : Export_info.approx) = match ap with @@ -162,15 +188,19 @@ and import_approx (ap : Export_info.approx) = let import_symbol sym = if Compilenv.is_predefined_exception sym then A.value_unknown Other - else - let symbol_id_map = - let global = Symbol.compilation_unit sym in - (Compilenv.approx_for_global global).symbol_id - in - match Symbol.Map.find sym symbol_id_map with - | approx -> A.augment_with_symbol (import_ex approx) sym - | exception Not_found -> - A.value_unresolved (Symbol sym) + else begin + let compilation_unit = Symbol.compilation_unit sym in + match Compilenv.approx_for_global compilation_unit with + | None -> A.value_unresolved (Symbol sym) + | Some export_info -> + match Symbol.Map.find sym export_info.symbol_id with + | approx -> A.augment_with_symbol (import_ex approx) sym + | exception Not_found -> + Misc.fatal_errorf + "Compilation unit = %a Cannot find symbol %a" + Compilation_unit.print compilation_unit + Symbol.print sym + end (* Note for code reviewers: Observe that [really_import] iterates until the approximation description is fully resolved (or a necessary .cmx diff --git a/asmcomp/linearize.ml b/asmcomp/linearize.ml index 1aa5d90f..e915bb5e 100644 --- a/asmcomp/linearize.ml +++ b/asmcomp/linearize.ml @@ -59,15 +59,15 @@ type fundecl = (* Invert a test *) let invert_integer_test = function - Isigned cmp -> Isigned(Cmm.negate_comparison cmp) - | Iunsigned cmp -> Iunsigned(Cmm.negate_comparison cmp) + Isigned cmp -> Isigned(Cmm.negate_integer_comparison cmp) + | Iunsigned cmp -> Iunsigned(Cmm.negate_integer_comparison cmp) let invert_test = function Itruetest -> Ifalsetest | Ifalsetest -> Itruetest | Iinttest(cmp) -> Iinttest(invert_integer_test cmp) | Iinttest_imm(cmp, n) -> Iinttest_imm(invert_integer_test cmp, n) - | Ifloattest(cmp, neg) -> Ifloattest(cmp, not neg) + | Ifloattest(cmp) -> Ifloattest(Cmm.negate_float_comparison cmp) | Ieventest -> Ioddtest | Ioddtest -> Ieventest @@ -311,7 +311,7 @@ let rec linear i n = let fundecl f = { fun_name = f.Mach.fun_name; fun_body = linear f.Mach.fun_body end_instr; - fun_fast = f.Mach.fun_fast; + fun_fast = not (List.mem Cmm.Reduce_code_size f.Mach.fun_codegen_options); fun_dbg = f.Mach.fun_dbg; fun_spacetime_shape = f.Mach.fun_spacetime_shape; } diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml index cfed6373..756ec61a 100644 --- a/asmcomp/mach.ml +++ b/asmcomp/mach.ml @@ -18,8 +18,8 @@ type label = Cmm.label type integer_comparison = - Isigned of Cmm.comparison - | Iunsigned of Cmm.comparison + Isigned of Cmm.integer_comparison + | Iunsigned of Cmm.integer_comparison type integer_operation = Iadd | Isub | Imul | Imulh | Idiv | Imod @@ -28,12 +28,14 @@ type integer_operation = | Icheckbound of { label_after_error : label option; spacetime_index : int; } +type float_comparison = Cmm.float_comparison + type test = Itruetest | Ifalsetest | Iinttest of integer_comparison | Iinttest_imm of integer_comparison * int - | Ifloattest of Cmm.comparison * bool + | Ifloattest of float_comparison | Ioddtest | Ieventest @@ -96,7 +98,7 @@ type fundecl = { fun_name: string; fun_args: Reg.t array; fun_body: instruction; - fun_fast: bool; + fun_codegen_options : Cmm.codegen_option list; fun_dbg : Debuginfo.t; fun_spacetime_shape : spacetime_shape option; } diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli index 784bba62..be17ba2b 100644 --- a/asmcomp/mach.mli +++ b/asmcomp/mach.mli @@ -22,8 +22,8 @@ type label = Cmm.label type integer_comparison = - Isigned of Cmm.comparison - | Iunsigned of Cmm.comparison + Isigned of Cmm.integer_comparison + | Iunsigned of Cmm.integer_comparison type integer_operation = Iadd | Isub | Imul | Imulh | Idiv | Imod @@ -35,12 +35,14 @@ type integer_operation = second being the pointer to the trie node for the current function (and the first being as per non-Spacetime mode). *) +type float_comparison = Cmm.float_comparison + type test = Itruetest | Ifalsetest | Iinttest of integer_comparison | Iinttest_imm of integer_comparison * int - | Ifloattest of Cmm.comparison * bool + | Ifloattest of float_comparison | Ioddtest | Ieventest @@ -118,7 +120,7 @@ type fundecl = { fun_name: string; fun_args: Reg.t array; fun_body: instruction; - fun_fast: bool; + fun_codegen_options : Cmm.codegen_option list; fun_dbg : Debuginfo.t; fun_spacetime_shape : spacetime_shape option; } diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp index 0ab36376..bd0c7134 100644 --- a/asmcomp/power/emit.mlp +++ b/asmcomp/power/emit.mlp @@ -847,22 +847,27 @@ let emit_instr i = let (comp, branch) = name_for_int_comparison cmp in ` {emit_string comp}i {emit_reg i.arg.(0)}, {emit_int n}\n`; ` {emit_string branch} {emit_label lbl}\n` - | Ifloattest(cmp, neg) -> + | Ifloattest cmp -> begin ` fcmpu 0, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; (* bit 0 = lt, bit 1 = gt, bit 2 = eq *) - let (bitnum, negtst) = + let bitnum = match cmp with - Ceq -> (2, neg) - | Cne -> (2, not neg) - | Cle -> ` cror 3, 0, 2\n`; (* lt or eq *) - (3, neg) - | Cgt -> (1, neg) - | Cge -> ` cror 3, 1, 2\n`; (* gt or eq *) - (3, neg) - | Clt -> (0, neg) in - if negtst - then ` bf {emit_int bitnum}, {emit_label lbl}\n` - else ` bt {emit_int bitnum}, {emit_label lbl}\n` + | CFeq | CFneq -> 2 + | CFle | CFnle -> + ` cror 3, 0, 2\n`; (* lt or eq *) + 3 + | CFgt | CFngt -> 1 + | CFge | CFnge -> + ` cror 3, 1, 2\n`; (* gt or eq *) + 3 + | CFlt | CFnlt -> 0 + in + match cmp with + | CFneq | CFngt | CFnge | CFnlt | CFnle -> + ` bf {emit_int bitnum}, {emit_label lbl}\n` + | CFeq | CFgt | CFge | CFlt | CFle -> + ` bt {emit_int bitnum}, {emit_label lbl}\n` + end | Ioddtest -> ` andi. 0, {emit_reg i.arg.(0)}, 1\n`; ` bne {emit_label lbl}\n` diff --git a/asmcomp/printcmm.ml b/asmcomp/printcmm.ml index 697ebca8..51682690 100644 --- a/asmcomp/printcmm.ml +++ b/asmcomp/printcmm.ml @@ -36,7 +36,7 @@ let machtype ppf mty = fprintf ppf "*%a" machtype_component mty.(i) done -let comparison = function +let integer_comparison = function | Ceq -> "==" | Cne -> "!=" | Clt -> "<" @@ -44,6 +44,18 @@ let comparison = function | Cgt -> ">" | Cge -> ">=" +let float_comparison = function + | CFeq -> "==" + | CFneq -> "!=" + | CFlt -> "<" + | CFnlt -> "!<" + | CFle -> "<=" + | CFnle -> "!<=" + | CFgt -> ">" + | CFngt -> "!>" + | CFge -> ">=" + | CFnge -> "!>=" + let chunk = function | Byte_unsigned -> "unsigned int8" | Byte_signed -> "signed int8" @@ -88,10 +100,10 @@ let operation d = function | Clsl -> "<<" | Clsr -> ">>u" | Casr -> ">>s" - | Ccmpi c -> comparison c + | Ccmpi c -> integer_comparison c | Caddv -> "+v" | Cadda -> "+a" - | Ccmpa c -> Printf.sprintf "%sa" (comparison c) + | Ccmpa c -> Printf.sprintf "%sa" (integer_comparison c) | Cnegf -> "~f" | Cabsf -> "absf" | Caddf -> "+f" @@ -100,7 +112,7 @@ let operation d = function | Cdivf -> "/f" | Cfloatofint -> "floatofint" | Cintoffloat -> "intoffloat" - | Ccmpf c -> Printf.sprintf "%sf" (comparison c) + | Ccmpf c -> Printf.sprintf "%sf" (float_comparison c) | Craise k -> Format.asprintf "%a%s" raise_kind k (Debuginfo.to_string d) | Ccheckbound -> "checkbound" ^ Debuginfo.to_string d diff --git a/asmcomp/printcmm.mli b/asmcomp/printcmm.mli index bd4739b2..0a631d3d 100644 --- a/asmcomp/printcmm.mli +++ b/asmcomp/printcmm.mli @@ -20,7 +20,8 @@ open Format val rec_flag : formatter -> Cmm.rec_flag -> unit val machtype_component : formatter -> Cmm.machtype_component -> unit val machtype : formatter -> Cmm.machtype_component array -> unit -val comparison : Cmm.comparison -> string +val integer_comparison : Cmm.integer_comparison -> string +val float_comparison : Cmm.float_comparison -> string val chunk : Cmm.memory_chunk -> string val operation : Debuginfo.t -> Cmm.operation -> string val expression : formatter -> Cmm.expression -> unit diff --git a/asmcomp/printmach.ml b/asmcomp/printmach.ml index cd7e8a77..57b111ce 100644 --- a/asmcomp/printmach.ml +++ b/asmcomp/printmach.ml @@ -68,11 +68,11 @@ let regsetaddr ppf s = s let intcomp = function - | Isigned c -> Printf.sprintf " %ss " (Printcmm.comparison c) - | Iunsigned c -> Printf.sprintf " %su " (Printcmm.comparison c) + | Isigned c -> Printf.sprintf " %ss " (Printcmm.integer_comparison c) + | Iunsigned c -> Printf.sprintf " %su " (Printcmm.integer_comparison c) let floatcomp c = - Printf.sprintf " %sf " (Printcmm.comparison c) + Printf.sprintf " %sf " (Printcmm.float_comparison c) let intop = function | Iadd -> " + " @@ -105,9 +105,8 @@ let test tst ppf arg = | Ifalsetest -> fprintf ppf "not %a" reg arg.(0) | Iinttest cmp -> fprintf ppf "%a%s%a" reg arg.(0) (intcomp cmp) reg arg.(1) | Iinttest_imm(cmp, n) -> fprintf ppf "%a%s%i" reg arg.(0) (intcomp cmp) n - | Ifloattest(cmp, neg) -> - fprintf ppf "%s%a%s%a" - (if neg then "not " else "") + | Ifloattest cmp -> + fprintf ppf "%a%s%a" reg arg.(0) (floatcomp cmp) reg arg.(1) | Ieventest -> fprintf ppf "%a & 1 == 0" reg arg.(0) | Ioddtest -> fprintf ppf "%a & 1 == 1" reg arg.(0) diff --git a/asmcomp/reloadgen.ml b/asmcomp/reloadgen.ml index 3c0b9873..d2bf9150 100644 --- a/asmcomp/reloadgen.ml +++ b/asmcomp/reloadgen.ml @@ -129,7 +129,7 @@ method fundecl f = redo_regalloc <- false; let new_body = self#reload f.fun_body in ({fun_name = f.fun_name; fun_args = f.fun_args; - fun_body = new_body; fun_fast = f.fun_fast; + fun_body = new_body; fun_codegen_options = f.fun_codegen_options; fun_dbg = f.fun_dbg; fun_spacetime_shape = f.fun_spacetime_shape}, redo_regalloc) end diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp index 3487005e..cef1022a 100644 --- a/asmcomp/s390x/emit.mlp +++ b/asmcomp/s390x/emit.mlp @@ -238,10 +238,11 @@ let int_literals = ref ([] : (nativeint * int) list) (* Masks for conditional branches after comparisons *) +(* bit 0 = eq, bit 1 = lt, bit 2 = gt, bit 3 = overflow*) let branch_for_comparison = function - Ceq -> 8 | Cne -> 7 - | Cle -> 12 | Cgt -> 2 - | Cge -> 10 | Clt -> 4 + | Ceq -> 0b1000 | Cne -> 0b0111 (* BRNEL is 0111 rather than 0110 *) + | Cle -> 0b1100 | Cgt -> 0b0010 + | Cge -> 0b1010 | Clt -> 0b0100 let name_for_int_comparison = function Isigned cmp -> ("cgr", branch_for_comparison cmp) @@ -252,14 +253,21 @@ let name_for_int_comparison_imm = function | Iunsigned cmp -> ("clgfi", branch_for_comparison cmp) (* bit 0 = eq, bit 1 = lt, bit 2 = gt, bit 3 = unordered*) -let branch_for_float_comparison cmp neg = - match cmp with - Ceq -> if neg then 7 else 8 - | Cne -> if neg then 8 else 7 - | Cle -> if neg then 3 else 12 - | Cgt -> if neg then 13 else 2 - | Cge -> if neg then 5 else 10 - | Clt -> if neg then 11 else 4 +let branch_for_float_comparison = function + | CFeq -> 0b1000 + | CFneq -> 0b0111 + + | CFle -> 0b1100 + | CFnle -> 0b0011 + + | CFgt -> 0b0010 + | CFngt -> 0b1101 + + | CFge -> 0b1010 + | CFnge -> 0b0101 + + | CFlt -> 0b0100 + | CFnlt -> 0b1011 (* Names for various instructions *) @@ -554,9 +562,9 @@ let emit_instr i = let (comp, mask) = name_for_int_comparison_imm cmp in ` {emit_string comp} {emit_reg i.arg.(0)}, {emit_int n}\n`; ` brcl {emit_int mask}, {emit_label lbl}\n` - | Ifloattest(cmp, neg) -> + | Ifloattest cmp -> ` cdbr {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; - let mask = branch_for_float_comparison cmp neg in + let mask = branch_for_float_comparison cmp in ` brcl {emit_int mask}, {emit_label lbl}\n` | Ioddtest -> ` tmll {emit_reg i.arg.(0)}, 1\n`; diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml index 1158fc0d..d0520ba9 100644 --- a/asmcomp/selectgen.ml +++ b/asmcomp/selectgen.ml @@ -105,8 +105,8 @@ let size_expr (env:environment) exp = (* Swap the two arguments of an integer comparison *) let swap_intcomp = function - Isigned cmp -> Isigned(swap_comparison cmp) - | Iunsigned cmp -> Iunsigned(swap_comparison cmp) + Isigned cmp -> Isigned(swap_integer_comparison cmp) + | Iunsigned cmp -> Iunsigned(swap_integer_comparison cmp) (* Naming of registers *) @@ -511,11 +511,11 @@ method select_condition = function Cop(Ccmpi cmp, [arg1; Cconst_int n], _) when self#is_immediate n -> (Iinttest_imm(Isigned cmp, n), arg1) | Cop(Ccmpi cmp, [Cconst_int n; arg2], _) when self#is_immediate n -> - (Iinttest_imm(Isigned(swap_comparison cmp), n), arg2) + (Iinttest_imm(Isigned(swap_integer_comparison cmp), n), arg2) | Cop(Ccmpi cmp, [arg1; Cconst_pointer n], _) when self#is_immediate n -> (Iinttest_imm(Isigned cmp, n), arg1) | Cop(Ccmpi cmp, [Cconst_pointer n; arg2], _) when self#is_immediate n -> - (Iinttest_imm(Isigned(swap_comparison cmp), n), arg2) + (Iinttest_imm(Isigned(swap_integer_comparison cmp), n), arg2) | Cop(Ccmpi cmp, args, _) -> (Iinttest(Isigned cmp), Ctuple args) | Cop(Ccmpa cmp, [arg1; Cconst_pointer n], _) when self#is_immediate n -> @@ -523,13 +523,13 @@ method select_condition = function | Cop(Ccmpa cmp, [arg1; Cconst_int n], _) when self#is_immediate n -> (Iinttest_imm(Iunsigned cmp, n), arg1) | Cop(Ccmpa cmp, [Cconst_pointer n; arg2], _) when self#is_immediate n -> - (Iinttest_imm(Iunsigned(swap_comparison cmp), n), arg2) + (Iinttest_imm(Iunsigned(swap_integer_comparison cmp), n), arg2) | Cop(Ccmpa cmp, [Cconst_int n; arg2], _) when self#is_immediate n -> - (Iinttest_imm(Iunsigned(swap_comparison cmp), n), arg2) + (Iinttest_imm(Iunsigned(swap_integer_comparison cmp), n), arg2) | Cop(Ccmpa cmp, args, _) -> (Iinttest(Iunsigned cmp), Ctuple args) | Cop(Ccmpf cmp, args, _) -> - (Ifloattest(cmp, false), Ctuple args) + (Ifloattest cmp, Ctuple args) | Cop(Cand, [arg; Cconst_int 1], _) -> (Ioddtest, arg) | arg -> @@ -1215,7 +1215,7 @@ method emit_fundecl f = { fun_name = f.Cmm.fun_name; fun_args = loc_arg; fun_body = body; - fun_fast = f.Cmm.fun_fast; + fun_codegen_options = f.Cmm.fun_codegen_options; fun_dbg = f.Cmm.fun_dbg; fun_spacetime_shape; } diff --git a/asmcomp/spill.ml b/asmcomp/spill.ml index a02b0c36..7e3a3188 100644 --- a/asmcomp/spill.ml +++ b/asmcomp/spill.ml @@ -471,7 +471,7 @@ let fundecl f = { fun_name = f.fun_name; fun_args = f.fun_args; fun_body = new_body; - fun_fast = f.fun_fast; + fun_codegen_options = f.fun_codegen_options; fun_dbg = f.fun_dbg; fun_spacetime_shape = f.fun_spacetime_shape; } diff --git a/asmcomp/split.ml b/asmcomp/split.ml index ec1a52de..59826284 100644 --- a/asmcomp/split.ml +++ b/asmcomp/split.ml @@ -221,7 +221,7 @@ let fundecl f = { fun_name = f.fun_name; fun_args = new_args; fun_body = new_body; - fun_fast = f.fun_fast; + fun_codegen_options = f.fun_codegen_options; fun_dbg = f.fun_dbg; fun_spacetime_shape = f.fun_spacetime_shape; } diff --git a/asmcomp/traverse_for_exported_symbols.ml b/asmcomp/traverse_for_exported_symbols.ml new file mode 100644 index 00000000..1b7ce57f --- /dev/null +++ b/asmcomp/traverse_for_exported_symbols.ml @@ -0,0 +1,267 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Fu Yong Quah, Jane Street Europe *) +(* *) +(* Copyright 2017 Jane Street Group LLC *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +[@@@ocaml.warning "+a-4-9-30-40-41-42"] + +module A = Simple_value_approx + +type queue_elem = + | Q_symbol of Symbol.t + | Q_set_of_closures_id of Set_of_closures_id.t + | Q_export_id of Export_id.t + +type symbols_to_export = + { symbols : Symbol.Set.t; + export_ids : Export_id.Set.t; + set_of_closure_ids : Set_of_closures_id.Set.t; + set_of_closure_ids_keep_declaration : Set_of_closures_id.Set.t; + relevant_imported_closure_ids : Closure_id.Set.t; + relevant_local_closure_ids : Closure_id.Set.t; + relevant_imported_vars_within_closure : Var_within_closure.Set.t; + relevant_local_vars_within_closure : Var_within_closure.Set.t; + } + +let traverse + ~(sets_of_closures_map : + Flambda.set_of_closures Set_of_closures_id.Map.t) + ~(closure_id_to_set_of_closures_id : + Set_of_closures_id.t Closure_id.Map.t) + ~(function_declarations_map : + A.function_declarations Set_of_closures_id.Map.t) + ~(values : Export_info.descr Export_id.Map.t) + ~(symbol_id : Export_id.t Symbol.Map.t) + ~(root_symbol: Symbol.t) = + let relevant_set_of_closures_declaration_only = + ref Set_of_closures_id.Set.empty + in + let relevant_symbols = ref (Symbol.Set.singleton root_symbol) in + let relevant_set_of_closures = ref Set_of_closures_id.Set.empty in + let relevant_export_ids = ref Export_id.Set.empty in + let relevant_imported_closure_ids = ref Closure_id.Set.empty in + let relevant_local_closure_ids = ref Closure_id.Set.empty in + let relevant_imported_vars_within_closure = + ref Var_within_closure.Set.empty + in + let relevant_local_vars_with_closure = ref Var_within_closure.Set.empty in + let (queue : queue_elem Queue.t) = Queue.create () in + let conditionally_add_symbol symbol = + if not (Symbol.Set.mem symbol !relevant_symbols) then begin + relevant_symbols := + Symbol.Set.add symbol !relevant_symbols; + Queue.add (Q_symbol symbol) queue + end + in + let conditionally_add_set_of_closures_id set_of_closures_id = + if not (Set_of_closures_id.Set.mem + set_of_closures_id !relevant_set_of_closures) then begin + relevant_set_of_closures := + Set_of_closures_id.Set.add set_of_closures_id + !relevant_set_of_closures; + Queue.add (Q_set_of_closures_id set_of_closures_id) queue + end + in + let conditionally_add_export_id export_id = + if not (Export_id.Set.mem export_id !relevant_export_ids) then begin + relevant_export_ids := + Export_id.Set.add export_id !relevant_export_ids; + Queue.add (Q_export_id export_id) queue + end + in + let process_approx (approx : Export_info.approx) = + match approx with + | Value_id export_id -> + conditionally_add_export_id export_id + | Value_symbol symbol -> + conditionally_add_symbol symbol + | Value_unknown -> () + in + let process_value_set_of_closures + (soc : Export_info.value_set_of_closures) = + conditionally_add_set_of_closures_id soc.set_of_closures_id; + Var_within_closure.Map.iter + (fun _ value -> process_approx value) soc.bound_vars; + Closure_id.Map.iter + (fun _ value -> process_approx value) soc.results; + begin match soc.aliased_symbol with + | None -> () + | Some symbol -> conditionally_add_symbol symbol + end + in + let process_function_body (function_body : A.function_body) = + Flambda_iterators.iter + (fun (term : Flambda.t) -> + match term with + | Flambda.Apply { kind ; _ } -> + begin match kind with + | Indirect -> () + | Direct closure_id -> + begin match + Closure_id.Map.find + closure_id + closure_id_to_set_of_closures_id + with + | exception Not_found -> + relevant_imported_closure_ids := + Closure_id.Set.add closure_id + !relevant_imported_closure_ids + | set_of_closures_id -> + relevant_local_closure_ids := + Closure_id.Set.add closure_id + !relevant_local_closure_ids; + conditionally_add_set_of_closures_id + set_of_closures_id + end + end + | _ -> ()) + (fun (named : Flambda.named) -> + let process_closure_id closure_id = + match + Closure_id.Map.find closure_id closure_id_to_set_of_closures_id + with + | exception Not_found -> + relevant_imported_closure_ids := + Closure_id.Set.add closure_id !relevant_imported_closure_ids + | set_of_closure_id -> + relevant_local_closure_ids := + Closure_id.Set.add closure_id !relevant_local_closure_ids; + relevant_set_of_closures_declaration_only := + Set_of_closures_id.Set.add + set_of_closure_id + !relevant_set_of_closures_declaration_only + in + match named with + | Symbol symbol + | Read_symbol_field (symbol, _) -> + conditionally_add_symbol symbol + | Set_of_closures soc -> + conditionally_add_set_of_closures_id + soc.function_decls.set_of_closures_id + | Project_closure { closure_id; _ } -> + process_closure_id closure_id + | Move_within_set_of_closures { start_from; move_to; _ } -> + process_closure_id start_from; + process_closure_id move_to + | Project_var { closure_id ; var; _ } -> + begin match + Closure_id.Map.find + closure_id closure_id_to_set_of_closures_id + with + | exception Not_found -> + relevant_imported_closure_ids := + Closure_id.Set.add closure_id + !relevant_imported_closure_ids; + relevant_imported_vars_within_closure := + Var_within_closure.Set.add var + !relevant_imported_vars_within_closure + | set_of_closure_id -> + relevant_local_closure_ids := + Closure_id.Set.add closure_id + !relevant_local_closure_ids; + relevant_local_vars_with_closure := + Var_within_closure.Set.add var + !relevant_local_vars_with_closure; + relevant_set_of_closures_declaration_only := + Set_of_closures_id.Set.add + set_of_closure_id + !relevant_set_of_closures_declaration_only + end + | Prim _ + | Expr _ + | Const _ + | Allocated_const _ + | Read_mutable _ -> ()) + function_body.body + in + let rec loop () = + if Queue.is_empty queue then + () + else begin + begin match Queue.pop queue with + | Q_export_id export_id -> + begin match Export_id.Map.find export_id values with + | exception Not_found -> () + | Value_block (_, approxes) -> + Array.iter process_approx approxes + | Value_closure value_closure -> + process_value_set_of_closures value_closure.set_of_closures + | Value_set_of_closures soc -> + process_value_set_of_closures soc + | _ -> () + end + | Q_symbol symbol -> + let compilation_unit = Symbol.compilation_unit symbol in + if Compilation_unit.is_current compilation_unit then begin + match Symbol.Map.find symbol symbol_id with + | exception Not_found -> + Misc.fatal_errorf "cannot find symbol's export id %a\n" + Symbol.print symbol + | export_id -> + conditionally_add_export_id export_id + end + | Q_set_of_closures_id set_of_closures_id -> + begin match + Set_of_closures_id.Map.find + set_of_closures_id function_declarations_map + with + | exception Not_found -> () + | function_declarations -> + Variable.Map.iter + (fun (_ : Variable.t) (fun_decl : A.function_declaration) -> + match fun_decl.function_body with + | None -> () + | Some function_body -> process_function_body function_body) + function_declarations.funs + end + end; + loop () + end + in + Queue.add (Q_symbol root_symbol) queue; + loop (); + + Closure_id.Map.iter (fun closure_id set_of_closure_id -> + if Set_of_closures_id.Set.mem + set_of_closure_id !relevant_set_of_closures + then begin + relevant_local_closure_ids := + Closure_id.Set.add closure_id !relevant_local_closure_ids + end) + closure_id_to_set_of_closures_id; + + Set_of_closures_id.Set.iter (fun set_of_closures_id -> + match + Set_of_closures_id.Map.find set_of_closures_id sets_of_closures_map + with + | exception Not_found -> () + | set_of_closures -> + Variable.Map.iter (fun var _ -> + relevant_local_vars_with_closure := + Var_within_closure.Set.add + (Var_within_closure.wrap var) + !relevant_local_vars_with_closure) + set_of_closures.free_vars) + !relevant_set_of_closures; + + { symbols = !relevant_symbols; + export_ids = !relevant_export_ids; + set_of_closure_ids = !relevant_set_of_closures; + set_of_closure_ids_keep_declaration = + !relevant_set_of_closures_declaration_only; + relevant_imported_closure_ids = !relevant_imported_closure_ids; + relevant_local_closure_ids = !relevant_local_closure_ids; + relevant_imported_vars_within_closure = + !relevant_imported_vars_within_closure; + relevant_local_vars_within_closure = + !relevant_local_vars_with_closure; + } diff --git a/asmcomp/traverse_for_exported_symbols.mli b/asmcomp/traverse_for_exported_symbols.mli new file mode 100644 index 00000000..2825a386 --- /dev/null +++ b/asmcomp/traverse_for_exported_symbols.mli @@ -0,0 +1,41 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Fu Yong Quah, Jane Street Europe *) +(* *) +(* Copyright 2017 Jane Street Group LLC *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +[@@@ocaml.warning "+a-4-9-30-40-41-42"] + +type symbols_to_export = + { symbols : Symbol.Set.t; + export_ids : Export_id.Set.t; + set_of_closure_ids : Set_of_closures_id.Set.t; + set_of_closure_ids_keep_declaration : Set_of_closures_id.Set.t; + relevant_imported_closure_ids : Closure_id.Set.t; + relevant_local_closure_ids : Closure_id.Set.t; + relevant_imported_vars_within_closure : Var_within_closure.Set.t; + relevant_local_vars_within_closure : Var_within_closure.Set.t; + } + +(** Computes the transitive closure in [Symbol.t], [Closure_id.t] and + [Set_of_closures_id.t] and determines which ones of those should be + exported (i.e: included in the cmx files). +**) +val traverse + : sets_of_closures_map: Flambda.set_of_closures Set_of_closures_id.Map.t + -> closure_id_to_set_of_closures_id: + Set_of_closures_id.t Closure_id.Map.t + -> function_declarations_map: + Simple_value_approx.function_declarations Set_of_closures_id.Map.t + -> values: Export_info.descr Export_id.Map.t + -> symbol_id: Export_id.t Symbol.Map.t + -> root_symbol: Symbol.t + -> symbols_to_export diff --git a/asmrun/.depend b/asmrun/.depend index 2a253ebe..f80abd3f 100644 --- a/asmrun/.depend +++ b/asmrun/.depend @@ -193,9 +193,6 @@ gc_ctrl.$(O): gc_ctrl.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ ../byterun/caml/gc_ctrl.h ../byterun/caml/signals.h \ ../byterun/caml/stack.h ../byterun/caml/startup_aux.h -terminfo.$(O): terminfo.c ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/alloc.h ../byterun/caml/misc.h \ - ../byterun/caml/mlvalues.h ../byterun/caml/fail.h ../byterun/caml/io.h md5.$(O): md5.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ @@ -269,7 +266,8 @@ custom.$(O): custom.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/mlvalues.h ../byterun/caml/custom.h \ ../byterun/caml/fail.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h globroots.$(O): globroots.c ../byterun/caml/memory.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/gc.h ../byterun/caml/mlvalues.h ../byterun/caml/misc.h \ @@ -351,15 +349,6 @@ spacetime_snapshot.$(O): spacetime_snapshot.c ../byterun/caml/alloc.h \ ../byterun/caml/address_class.h ../byterun/caml/roots.h \ ../byterun/caml/signals.h ../byterun/caml/stack.h \ ../byterun/caml/sys.h ../byterun/caml/spacetime.h -spacetime_offline.$(O): spacetime_offline.c ../byterun/caml/alloc.h \ - ../byterun/caml/misc.h ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ - ../byterun/caml/gc.h ../byterun/caml/intext.h ../byterun/caml/io.h \ - ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/memory.h ../byterun/caml/minor_gc.h \ - ../byterun/caml/address_class.h ../byterun/caml/roots.h \ - ../byterun/caml/signals.h ../byterun/caml/stack.h \ - ../byterun/caml/sys.h ../byterun/caml/spacetime.h afl.$(O): afl.c ../byterun/caml/config.h ../byterun/caml/m.h \ ../byterun/caml/s.h ../byterun/caml/misc.h ../byterun/caml/mlvalues.h \ ../byterun/caml/osdeps.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ @@ -372,7 +361,8 @@ bigarray.$(O): bigarray.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/intext.h ../byterun/caml/io.h ../byterun/caml/hash.h \ ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h startup_aux.p.$(O): startup_aux.c ../byterun/caml/backtrace.h \ ../byterun/caml/mlvalues.h ../byterun/caml/config.h \ ../byterun/caml/m.h ../byterun/caml/s.h ../byterun/caml/misc.h \ @@ -568,9 +558,6 @@ gc_ctrl.p.$(O): gc_ctrl.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ ../byterun/caml/gc_ctrl.h ../byterun/caml/signals.h \ ../byterun/caml/stack.h ../byterun/caml/startup_aux.h -terminfo.p.$(O): terminfo.c ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/alloc.h ../byterun/caml/misc.h \ - ../byterun/caml/mlvalues.h ../byterun/caml/fail.h ../byterun/caml/io.h md5.p.$(O): md5.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ @@ -644,7 +631,8 @@ custom.p.$(O): custom.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/mlvalues.h ../byterun/caml/custom.h \ ../byterun/caml/fail.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h globroots.p.$(O): globroots.c ../byterun/caml/memory.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/gc.h ../byterun/caml/mlvalues.h ../byterun/caml/misc.h \ @@ -726,15 +714,6 @@ spacetime_snapshot.p.$(O): spacetime_snapshot.c ../byterun/caml/alloc.h \ ../byterun/caml/address_class.h ../byterun/caml/roots.h \ ../byterun/caml/signals.h ../byterun/caml/stack.h \ ../byterun/caml/sys.h ../byterun/caml/spacetime.h -spacetime_offline.p.$(O): spacetime_offline.c ../byterun/caml/alloc.h \ - ../byterun/caml/misc.h ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ - ../byterun/caml/gc.h ../byterun/caml/intext.h ../byterun/caml/io.h \ - ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/memory.h ../byterun/caml/minor_gc.h \ - ../byterun/caml/address_class.h ../byterun/caml/roots.h \ - ../byterun/caml/signals.h ../byterun/caml/stack.h \ - ../byterun/caml/sys.h ../byterun/caml/spacetime.h afl.p.$(O): afl.c ../byterun/caml/config.h ../byterun/caml/m.h \ ../byterun/caml/s.h ../byterun/caml/misc.h ../byterun/caml/mlvalues.h \ ../byterun/caml/osdeps.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ @@ -747,7 +726,8 @@ bigarray.p.$(O): bigarray.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/intext.h ../byterun/caml/io.h ../byterun/caml/hash.h \ ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h startup_aux.d.$(O): startup_aux.c ../byterun/caml/backtrace.h \ ../byterun/caml/mlvalues.h ../byterun/caml/config.h \ ../byterun/caml/m.h ../byterun/caml/s.h ../byterun/caml/misc.h \ @@ -943,9 +923,6 @@ gc_ctrl.d.$(O): gc_ctrl.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ ../byterun/caml/gc_ctrl.h ../byterun/caml/signals.h \ ../byterun/caml/stack.h ../byterun/caml/startup_aux.h -terminfo.d.$(O): terminfo.c ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/alloc.h ../byterun/caml/misc.h \ - ../byterun/caml/mlvalues.h ../byterun/caml/fail.h ../byterun/caml/io.h md5.d.$(O): md5.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ @@ -1019,7 +996,8 @@ custom.d.$(O): custom.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/mlvalues.h ../byterun/caml/custom.h \ ../byterun/caml/fail.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h globroots.d.$(O): globroots.c ../byterun/caml/memory.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/gc.h ../byterun/caml/mlvalues.h ../byterun/caml/misc.h \ @@ -1101,15 +1079,6 @@ spacetime_snapshot.d.$(O): spacetime_snapshot.c ../byterun/caml/alloc.h \ ../byterun/caml/address_class.h ../byterun/caml/roots.h \ ../byterun/caml/signals.h ../byterun/caml/stack.h \ ../byterun/caml/sys.h ../byterun/caml/spacetime.h -spacetime_offline.d.$(O): spacetime_offline.c ../byterun/caml/alloc.h \ - ../byterun/caml/misc.h ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ - ../byterun/caml/gc.h ../byterun/caml/intext.h ../byterun/caml/io.h \ - ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/memory.h ../byterun/caml/minor_gc.h \ - ../byterun/caml/address_class.h ../byterun/caml/roots.h \ - ../byterun/caml/signals.h ../byterun/caml/stack.h \ - ../byterun/caml/sys.h ../byterun/caml/spacetime.h afl.d.$(O): afl.c ../byterun/caml/config.h ../byterun/caml/m.h \ ../byterun/caml/s.h ../byterun/caml/misc.h ../byterun/caml/mlvalues.h \ ../byterun/caml/osdeps.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ @@ -1122,7 +1091,8 @@ bigarray.d.$(O): bigarray.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/intext.h ../byterun/caml/io.h ../byterun/caml/hash.h \ ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h startup_aux.i.$(O): startup_aux.c ../byterun/caml/backtrace.h \ ../byterun/caml/mlvalues.h ../byterun/caml/config.h \ ../byterun/caml/m.h ../byterun/caml/s.h ../byterun/caml/misc.h \ @@ -1318,9 +1288,6 @@ gc_ctrl.i.$(O): gc_ctrl.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ ../byterun/caml/gc_ctrl.h ../byterun/caml/signals.h \ ../byterun/caml/stack.h ../byterun/caml/startup_aux.h -terminfo.i.$(O): terminfo.c ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/alloc.h ../byterun/caml/misc.h \ - ../byterun/caml/mlvalues.h ../byterun/caml/fail.h ../byterun/caml/io.h md5.i.$(O): md5.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ @@ -1394,7 +1361,8 @@ custom.i.$(O): custom.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/mlvalues.h ../byterun/caml/custom.h \ ../byterun/caml/fail.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h globroots.i.$(O): globroots.c ../byterun/caml/memory.h \ ../byterun/caml/config.h ../byterun/caml/m.h ../byterun/caml/s.h \ ../byterun/caml/gc.h ../byterun/caml/mlvalues.h ../byterun/caml/misc.h \ @@ -1476,15 +1444,6 @@ spacetime_snapshot.i.$(O): spacetime_snapshot.c ../byterun/caml/alloc.h \ ../byterun/caml/address_class.h ../byterun/caml/roots.h \ ../byterun/caml/signals.h ../byterun/caml/stack.h \ ../byterun/caml/sys.h ../byterun/caml/spacetime.h -spacetime_offline.i.$(O): spacetime_offline.c ../byterun/caml/alloc.h \ - ../byterun/caml/misc.h ../byterun/caml/config.h ../byterun/caml/m.h \ - ../byterun/caml/s.h ../byterun/caml/mlvalues.h ../byterun/caml/fail.h \ - ../byterun/caml/gc.h ../byterun/caml/intext.h ../byterun/caml/io.h \ - ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/memory.h ../byterun/caml/minor_gc.h \ - ../byterun/caml/address_class.h ../byterun/caml/roots.h \ - ../byterun/caml/signals.h ../byterun/caml/stack.h \ - ../byterun/caml/sys.h ../byterun/caml/spacetime.h afl.i.$(O): afl.c ../byterun/caml/config.h ../byterun/caml/m.h \ ../byterun/caml/s.h ../byterun/caml/misc.h ../byterun/caml/mlvalues.h \ ../byterun/caml/osdeps.h ../byterun/caml/memory.h ../byterun/caml/gc.h \ @@ -1497,4 +1456,5 @@ bigarray.i.$(O): bigarray.c ../byterun/caml/alloc.h ../byterun/caml/misc.h \ ../byterun/caml/intext.h ../byterun/caml/io.h ../byterun/caml/hash.h \ ../byterun/caml/memory.h ../byterun/caml/gc.h \ ../byterun/caml/major_gc.h ../byterun/caml/freelist.h \ - ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h + ../byterun/caml/minor_gc.h ../byterun/caml/address_class.h \ + ../byterun/caml/signals.h diff --git a/asmrun/Makefile b/asmrun/Makefile index 62608a46..f275e2a0 100644 --- a/asmrun/Makefile +++ b/asmrun/Makefile @@ -14,10 +14,11 @@ #************************************************************************** include ../config/Makefile +include ../Makefile.common LINKEDFILES=misc.c freelist.c major_gc.c minor_gc.c memory.c alloc.c array.c \ compare.c ints.c floats.c str.c io.c extern.c intern.c hash.c sys.c \ - parsing.c gc_ctrl.c terminfo.c md5.c obj.c lexing.c printexc.c callback.c \ + parsing.c gc_ctrl.c md5.c obj.c lexing.c printexc.c callback.c \ weak.c compact.c finalise.c meta.c custom.c main.c globroots.c \ $(UNIX_OR_WIN32).c dynlink.c signals.c debugger.c startup_aux.c \ backtrace.c afl.c bigarray.c @@ -27,8 +28,6 @@ LINKEDFILES=misc.c freelist.c major_gc.c minor_gc.c memory.c alloc.c array.c \ # compiled on the platform where make depend is run sources := $(LINKEDFILES) -INSTALL_LIBDIR=$(DESTDIR)$(LIBDIR) - ifeq "$(UNIX_OR_WIN32)" "win32" LN = cp sources += ../byterun/unix.c @@ -53,6 +52,10 @@ ifeq "$(TOOLCHAIN)" "msvc" DFLAGS = $(CFLAGS) -DDEBUG PFLAGS=$(CFLAGS) -DPROFILING $(NATIVECCPROFOPTS) ASMOBJS=$(ARCH)nt.$(O) +ASMFLAGS= +ifeq ($(WITH_SPACETIME),true) +ASMFLAGS=/DWITH_SPACETIME +endif else DFLAGS = $(CFLAGS) -g -DDEBUG PFLAGS=$(CFLAGS) -pg -DPROFILING $(NATIVECCPROFOPTS) @@ -72,12 +75,12 @@ COBJS=startup_aux.$(O) startup.$(O) main.$(O) fail.$(O) \ major_gc.$(O) minor_gc.$(O) memory.$(O) alloc.$(O) compare.$(O) \ ints.$(O) floats.$(O) str.$(O) array.$(O) io.$(O) extern.$(O) \ intern.$(O) hash.$(O) sys.$(O) parsing.$(O) gc_ctrl.$(O) \ - terminfo.$(O) md5.$(O) obj.$(O) lexing.$(O) $(UNIX_OR_WIN32).$(O) \ + md5.$(O) obj.$(O) lexing.$(O) $(UNIX_OR_WIN32).$(O) \ printexc.$(O) callback.$(O) weak.$(O) compact.$(O) finalise.$(O) \ custom.$(O) globroots.$(O) backtrace_prim.$(O) backtrace.$(O) \ natdynlink.$(O) debugger.$(O) meta.$(O) dynlink.$(O) \ clambda_checks.$(O) spacetime.$(O) spacetime_snapshot.$(O) \ - spacetime_offline.$(O) afl.$(O) bigarray.$(O) + afl.$(O) bigarray.$(O) OBJS=$(COBJS) $(ASMOBJS) @@ -86,28 +89,30 @@ IOBJS=$(COBJS:.$(O)=.i.$(O)) $(ASMOBJS) POBJS=$(COBJS:.$(O)=.p.$(O)) $(ASMOBJS:.$(O)=.p.$(O)) PICOBJS=$(COBJS:.$(O)=.pic.$(O)) $(ASMOBJS:.$(O)=.pic.$(O)) -TARGETS = libasmrun.$(A) +TARGETS_A = libasmrun.$(A) +TARGETS_SO= ifeq "$(RUNTIMED)" "true" -TARGETS += libasmrund.$(A) +TARGETS_A += libasmrund.$(A) endif ifeq "$(RUNTIMEI)" "true" -TARGETS += libasmruni.$(A) +TARGETS_A += libasmruni.$(A) endif ifeq "$(PROFILING)" "true" -TARGETS += libasmrunp.$(A) +TARGETS_A += libasmrunp.$(A) endif ifeq "$(UNIX_OR_WIN32)" "unix" ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" -TARGETS += libasmrun_pic.$(A) libasmrun_shared.$(SO) +TARGETS_A += libasmrun_pic.$(A) +TARGETS_SO += libasmrun_shared.$(SO) endif endif .PHONY: all -all: $(TARGETS) +all: $(TARGETS_A) $(TARGETS_SO) libasmrun.$(A): $(OBJS) $(call MKLIB,$@, $^) @@ -129,7 +134,10 @@ libasmrun_shared.$(SO): $(PICOBJS) .PHONY: install install: - cp $(TARGETS) "$(INSTALL_LIBDIR)" + $(INSTALL_DATA) $(TARGETS_A) "$(INSTALL_LIBDIR)" + if test -n "$(TARGETS_SO)"; then \ + $(INSTALL_PROG) $(TARGETS_SO) "$(INSTALL_LIBDIR)"; \ + fi $(LINKEDFILES): %.c: ../byterun/%.c $(LN) $< $@ @@ -163,10 +171,10 @@ $(LINKEDFILES): %.c: ../byterun/%.c $(ASPP) $(ASPPFLAGS) $(SHAREDCCCOMPOPTS) -o $@ $< %.obj: %.asm - $(ASM)$@ $< + $(ASM)$@ $(ASMFLAGS) $< %.pic.obj: %.asm - $(ASM)$@ $< + $(ASM)$@ $(ASMFLAGS) $< .PHONY: clean clean: diff --git a/asmrun/amd64.S b/asmrun/amd64.S index 237510dd..3e3bf83e 100644 --- a/asmrun/amd64.S +++ b/asmrun/amd64.S @@ -266,6 +266,20 @@ # define PREPARE_FOR_C_CALL # define CLEANUP_AFTER_C_CALL # define STACK_PROBE_SIZE $32768 +#endif + +/* Registers holding arguments of C functions. */ + +#if defined(SYS_mingw64) || defined(SYS_cygwin) +#define C_ARG_1 %rcx +#define C_ARG_2 %rdx +#define C_ARG_3 %r8 +#define C_ARG_4 %r9 +#else +#define C_ARG_1 %rdi +#define C_ARG_2 %rsi +#define C_ARG_3 %rdx +#define C_ARG_4 %rcx #endif .text @@ -508,8 +522,8 @@ LBL(caml_start_program): pushq %rdi; CFI_ADJUST (8) pushq %rsi; CFI_ADJUST (8) /* No need to push %r12: it's callee-save. */ - movq %r12, %rdi - LEA_VAR(caml_start_program, %rsi) + movq %r12, C_ARG_1 + LEA_VAR(caml_start_program, C_ARG_2) call GCALL(caml_spacetime_c_to_ocaml) popq %rsi; CFI_ADJUST (-8) popq %rdi; CFI_ADJUST (-8) @@ -557,20 +571,6 @@ LBL(108): jmp LBL(109) CFI_ENDPROC -/* Registers holding arguments of C functions. */ - -#if defined(SYS_mingw64) || defined(SYS_cygwin) -#define C_ARG_1 %rcx -#define C_ARG_2 %rdx -#define C_ARG_3 %r8 -#define C_ARG_4 %r9 -#else -#define C_ARG_1 %rdi -#define C_ARG_2 %rsi -#define C_ARG_3 %rdx -#define C_ARG_4 %rcx -#endif - /* Raise an exception from OCaml */ FUNCTION(G(caml_raise_exn)) diff --git a/asmrun/amd64nt.asm b/asmrun/amd64nt.asm index c4534ea0..7915c1e3 100644 --- a/asmrun/amd64nt.asm +++ b/asmrun/amd64nt.asm @@ -34,6 +34,10 @@ EXTRN caml_backtrace_pos: DWORD EXTRN caml_backtrace_active: DWORD EXTRN caml_stash_backtrace: NEAR +IFDEF WITH_SPACETIME + EXTRN caml_spacetime_trie_node_ptr: QWORD + EXTRN caml_spacetime_c_to_ocaml: NEAR +ENDIF .CODE @@ -61,6 +65,9 @@ L105: ; Save caml_young_ptr, caml_exception_pointer mov caml_young_ptr, r15 mov caml_exception_pointer, r14 +IFDEF WITH_SPACETIME + mov caml_spacetime_trie_node_ptr, r13 +ENDIF ; Build array of registers, save it into caml_gc_regs push rbp push r11 @@ -212,6 +219,11 @@ caml_c_call: pop r12 mov caml_last_return_address, r12 mov caml_bottom_of_stack, rsp +IFDEF WITH_SPACETIME + ; Record the trie node hole pointer that corresponds to + ; [caml_last_return_address] + mov caml_spacetime_trie_node_ptr, r13 +ENDIF ; Touch the stack to trigger a recoverable segfault ; if insufficient space remains sub rsp, 01000h @@ -258,10 +270,29 @@ caml_start_program: ; Common code for caml_start_program and caml_callback* L106: ; Build a callback link +IFDEF WITH_SPACETIME + push caml_spacetime_trie_node_ptr +ELSE sub rsp, 8 ; stack 16-aligned +ENDIF push caml_gc_regs push caml_last_return_address push caml_bottom_of_stack +IFDEF WITH_SPACETIME + ; Save arguments to caml_callback + push rax + push rbx + push rdi + push rsi + ; No need to push r12: it is callee-save. + mov rcx, r12 + lea rdx, caml_start_program + call caml_spacetime_c_to_ocaml + pop rsi + pop rdi + pop rbx + pop rax +ENDIF ; Setup alloc ptr and exception ptr mov r15, caml_young_ptr mov r14, caml_exception_pointer @@ -270,6 +301,9 @@ L106: push r13 push r14 mov r14, rsp +IFDEF WITH_SPACETIME + mov r13, caml_spacetime_trie_node_ptr +ENDIF ; Call the OCaml code call r12 L107: @@ -284,7 +318,11 @@ L109: pop caml_bottom_of_stack pop caml_last_return_address pop caml_gc_regs +IFDEF WITH_SPACETIME + pop caml_spacetime_trie_node_ptr +ELSE add rsp, 8 +ENDIF ; Restore callee-save registers. movapd xmm6, OWORD PTR [rsp + 0*16] movapd xmm7, OWORD PTR [rsp + 1*16] @@ -472,6 +510,19 @@ caml_system__frametable LABEL QWORD WORD 0 ; no roots here ALIGN 8 +IFDEF WITH_SPACETIME + .DATA + PUBLIC caml_system__spacetime_shapes + ALIGN 8 +caml_system__spacetime_shapes LABEL QWORD + QWORD caml_start_program + QWORD 2 ; indirect call point to OCaml code + QWORD L107 ; in caml_start_program / caml_callback* + QWORD 0 ; end of shapes in caml_start_program + QWORD 0 ; end of shape table + ALIGN 8 +ENDIF + PUBLIC caml_negf_mask ALIGN 16 caml_negf_mask LABEL QWORD diff --git a/asmrun/spacetime.c b/asmrun/spacetime.c index bf4b6f3c..f6fc5c30 100644 --- a/asmrun/spacetime.c +++ b/asmrun/spacetime.c @@ -27,6 +27,10 @@ #ifdef HAS_UNISTD #include #endif +#ifdef _WIN32 +#include /* for _getpid */ +#include /* for _wgetcwd */ +#endif #include "caml/alloc.h" #include "caml/backtrace_prim.h" @@ -49,7 +53,11 @@ /* We force "noinline" in certain places to be sure we know how many frames there will be on the stack. */ +#ifdef _MSC_VER +#define NOINLINE __declspec(noinline) +#else #define NOINLINE __attribute__((noinline)) +#endif #ifdef HAS_LIBUNWIND #define UNW_LOCAL_ONLY @@ -99,6 +107,14 @@ allocation_point* caml_all_allocation_points = NULL; static const uintnat chunk_size = 1024 * 1024; +#ifdef _WIN32 +#define strdup_os wcsdup +#define snprintf_os _snwprintf +#else +#define strdup_os strdup +#define snprintf_os snprintf +#endif + static void reinitialise_free_node_block(void) { size_t index; @@ -115,10 +131,6 @@ static void reinitialise_free_node_block(void) #define O_BINARY 0 #endif -#if defined (_WIN32) || defined (_WIN64) -extern value val_process_id; -#endif - enum { FEATURE_CALL_COUNTS = 1, } features; @@ -151,21 +163,23 @@ CAMLprim value caml_spacetime_write_magic_number(value v_channel) return Val_unit; } -static char* automatic_snapshot_dir; +static char_os* automatic_snapshot_dir; static void open_snapshot_channel(void) { int fd; - char filename[8192]; + char_os filename[8192]; int pid; -#if defined (_WIN32) || defined (_WIN64) - pid = Int_val(val_process_id); + int filename_len = sizeof(filename)/sizeof(char_os); +#ifdef _WIN32 + pid = _getpid(); #else pid = getpid(); #endif - snprintf(filename, 8192, "%s/spacetime-%d", automatic_snapshot_dir, pid); - filename[8191] = '\0'; - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666); + snprintf_os(filename, filename_len, _T("%s/spacetime-%d"), + automatic_snapshot_dir, pid); + filename[filename_len-1] = _T('\0'); + fd = open_os(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666); if (fd == -1) { automatic_snapshots = 0; } @@ -187,8 +201,8 @@ static void maybe_reopen_snapshot_channel(void) was written during that time) and then open a new one. */ int pid; -#if defined (_WIN32) || defined (_WIN64) - pid = Int_val(val_process_id); +#ifdef _WIN32 + pid = _getpid(); #else pid = getpid(); #endif @@ -205,40 +219,40 @@ void caml_spacetime_initialize(void) { /* Note that this is called very early (even prior to GC initialisation). */ - char *ap_interval; + char_os *ap_interval; reinitialise_free_node_block(); caml_spacetime_static_shape_tables = &caml_spacetime_shapes; - ap_interval = caml_secure_getenv ("OCAML_SPACETIME_INTERVAL"); + ap_interval = caml_secure_getenv (_T("OCAML_SPACETIME_INTERVAL")); if (ap_interval != NULL) { unsigned int interval = 0; - sscanf(ap_interval, "%u", &interval); + sscanf_os(ap_interval, _T("%u"), &interval); if (interval != 0) { double time; - char cwd[4096]; - char* user_specified_automatic_snapshot_dir; + char_os cwd[4096]; + char_os* user_specified_automatic_snapshot_dir; int dir_ok = 1; user_specified_automatic_snapshot_dir = - caml_secure_getenv("OCAML_SPACETIME_SNAPSHOT_DIR"); + caml_secure_getenv(_T("OCAML_SPACETIME_SNAPSHOT_DIR")); if (user_specified_automatic_snapshot_dir == NULL) { #if defined(HAS_GETCWD) - if (getcwd(cwd, sizeof(cwd)) == NULL) { + if (getcwd_os(cwd, sizeof(cwd)/sizeof(char_os)) == NULL) { dir_ok = 0; } #else dir_ok = 0; #endif if (dir_ok) { - automatic_snapshot_dir = strdup(cwd); + automatic_snapshot_dir = strdup_os(cwd); } } else { automatic_snapshot_dir = - strdup(user_specified_automatic_snapshot_dir); + strdup_os(user_specified_automatic_snapshot_dir); } if (dir_ok) { diff --git a/asmrun/spacetime_offline.c b/asmrun/spacetime_offline.c deleted file mode 100644 index fa93e5da..00000000 --- a/asmrun/spacetime_offline.c +++ /dev/null @@ -1,251 +0,0 @@ -/**************************************************************************/ -/* */ -/* OCaml */ -/* */ -/* Mark Shinwell and Leo White, Jane Street Europe */ -/* */ -/* Copyright 2013--2016, Jane Street Group, LLC */ -/* */ -/* All rights reserved. This file is distributed under the terms of */ -/* the GNU Lesser General Public License version 2.1, with the */ -/* special exception on linking described in the file LICENSE. */ -/* */ -/**************************************************************************/ - -#define CAML_INTERNALS - -#include -#include -#include -#include -#include - -#include "caml/alloc.h" -#include "caml/config.h" -#include "caml/fail.h" -#include "caml/gc.h" -#include "caml/intext.h" -#include "caml/major_gc.h" -#include "caml/memory.h" -#include "caml/minor_gc.h" -#include "caml/misc.h" -#include "caml/mlvalues.h" -#include "caml/roots.h" -#include "caml/signals.h" -#include "caml/stack.h" -#include "caml/sys.h" -#include "caml/spacetime.h" - -#include "caml/s.h" - -#define SPACETIME_PROFINFO_WIDTH 26 -#define Spacetime_profinfo_hd(hd) \ - (Gen_profinfo_hd(SPACETIME_PROFINFO_WIDTH, hd)) - -#ifdef ARCH_SIXTYFOUR - -/* CR-someday lwhite: The following two definitions are copied from spacetime.c - because they are needed here, but must be inlined in spacetime.c - for performance. Perhaps a macro or "static inline" would be - more appropriate. */ - -c_node* caml_spacetime_offline_c_node_of_stored_pointer_not_null - (value node_stored) -{ - CAMLassert(Is_c_node(node_stored)); - return (c_node*) Hp_val(node_stored); -} - -c_node_type caml_spacetime_offline_classify_c_node(c_node* node) -{ - return (node->pc & 2) ? CALL : ALLOCATION; -} - -CAMLprim value caml_spacetime_compare_node( - value node1, value node2) -{ - CAMLassert(!Is_in_value_area(node1)); - CAMLassert(!Is_in_value_area(node2)); - - if (node1 == node2) { - return Val_long(0); - } - if (node1 < node2) { - return Val_long(-1); - } - return Val_long(1); -} - -CAMLprim value caml_spacetime_unmarshal_trie (value v_channel) -{ - return caml_input_value_to_outside_heap(v_channel); -} - -CAMLprim value caml_spacetime_node_num_header_words(value unit) -{ - unit = Val_unit; - return Val_long(Node_num_header_words); -} - -CAMLprim value caml_spacetime_is_ocaml_node(value node) -{ - CAMLassert(Is_ocaml_node(node) || Is_c_node(node)); - return Val_bool(Is_ocaml_node(node)); -} - -CAMLprim value caml_spacetime_ocaml_function_identifier(value node) -{ - CAMLassert(Is_ocaml_node(node)); - return caml_copy_int64((uint64_t) Decode_node_pc(Node_pc(node))); -} - -CAMLprim value caml_spacetime_ocaml_tail_chain(value node) -{ - CAMLassert(Is_ocaml_node(node)); - return Tail_link(node); -} - -CAMLprim value caml_spacetime_classify_direct_call_point - (value node, value offset) -{ - uintnat field; - value callee_node; - - CAMLassert(Is_ocaml_node(node)); - - field = Long_val(offset); - - callee_node = Direct_callee_node(node, field); - if (!Is_block(callee_node)) { - /* An unused call point (may be a tail call point). */ - return Val_long(0); - } else if (Is_ocaml_node(callee_node)) { - return Val_long(1); /* direct call point to OCaml code */ - } else { - return Val_long(2); /* direct call point to non-OCaml code */ - } -} - -CAMLprim value caml_spacetime_ocaml_allocation_point_annotation - (value node, value offset) -{ - uintnat profinfo_shifted; - profinfo_shifted = (uintnat) Alloc_point_profinfo(node, Long_val(offset)); - return Val_long(Spacetime_profinfo_hd(profinfo_shifted)); -} - -CAMLprim value caml_spacetime_ocaml_allocation_point_count - (value node, value offset) -{ - value count = Alloc_point_count(node, Long_val(offset)); - CAMLassert(!Is_block(count)); - return count; -} - -CAMLprim value caml_spacetime_ocaml_direct_call_point_callee_node - (value node, value offset) -{ - return Direct_callee_node(node, Long_val(offset)); -} - -CAMLprim value caml_spacetime_ocaml_direct_call_point_call_count -(value node, value offset) -{ - return Direct_call_count(node, Long_val(offset)); -} - -CAMLprim value caml_spacetime_ocaml_indirect_call_point_callees - (value node, value offset) -{ - value callees = Indirect_pc_linked_list(node, Long_val(offset)); - CAMLassert(Is_block(callees)); - CAMLassert(Is_c_node(callees)); - return callees; -} - -CAMLprim value caml_spacetime_c_node_is_call(value node) -{ - c_node* c_node; - CAMLassert(node != (value) NULL); - CAMLassert(Is_c_node(node)); - c_node = caml_spacetime_offline_c_node_of_stored_pointer_not_null(node); - switch (caml_spacetime_offline_classify_c_node(c_node)) { - case CALL: return Val_true; - case ALLOCATION: return Val_false; - } - CAMLassert(0); - return Val_unit; /* silence compiler warning */ -} - -CAMLprim value caml_spacetime_c_node_next(value node) -{ - c_node* c_node; - - CAMLassert(node != (value) NULL); - CAMLassert(Is_c_node(node)); - c_node = caml_spacetime_offline_c_node_of_stored_pointer_not_null(node); - CAMLassert(c_node->next == Val_unit || Is_c_node(c_node->next)); - return c_node->next; -} - -CAMLprim value caml_spacetime_c_node_call_site(value node) -{ - c_node* c_node; - CAMLassert(node != (value) NULL); - CAMLassert(Is_c_node(node)); - c_node = caml_spacetime_offline_c_node_of_stored_pointer_not_null(node); - return caml_copy_int64((uint64_t) Decode_c_node_pc(c_node->pc)); -} - -CAMLprim value caml_spacetime_c_node_callee_node(value node) -{ - c_node* c_node; - CAMLassert(node != (value) NULL); - CAMLassert(Is_c_node(node)); - c_node = caml_spacetime_offline_c_node_of_stored_pointer_not_null(node); - CAMLassert(caml_spacetime_offline_classify_c_node(c_node) == CALL); - /* This might be an uninitialised tail call point: for example if an OCaml - callee was indirectly called but the callee wasn't instrumented (e.g. a - leaf function that doesn't allocate). */ - if (Is_tail_caller_node_encoded(c_node->data.call.callee_node)) { - return Val_unit; - } - return c_node->data.call.callee_node; -} - -CAMLprim value caml_spacetime_c_node_call_count(value node) -{ - c_node* c_node; - CAMLassert(node != (value) NULL); - CAMLassert(Is_c_node(node)); - c_node = caml_spacetime_offline_c_node_of_stored_pointer_not_null(node); - CAMLassert(caml_spacetime_offline_classify_c_node(c_node) == CALL); - if (Is_tail_caller_node_encoded(c_node->data.call.callee_node)) { - return Val_long(0); - } - return c_node->data.call.call_count; -} - -CAMLprim value caml_spacetime_c_node_profinfo(value node) -{ - c_node* c_node; - CAMLassert(node != (value) NULL); - CAMLassert(Is_c_node(node)); - c_node = caml_spacetime_offline_c_node_of_stored_pointer_not_null(node); - CAMLassert(caml_spacetime_offline_classify_c_node(c_node) == ALLOCATION); - CAMLassert(!Is_block(c_node->data.allocation.profinfo)); - return Val_long(Spacetime_profinfo_hd(c_node->data.allocation.profinfo)); -} - -CAMLprim value caml_spacetime_c_node_allocation_count(value node) -{ - c_node* c_node; - CAMLassert(node != (value) NULL); - CAMLassert(Is_c_node(node)); - c_node = caml_spacetime_offline_c_node_of_stored_pointer_not_null(node); - CAMLassert(caml_spacetime_offline_classify_c_node(c_node) == ALLOCATION); - CAMLassert(!Is_block(c_node->data.allocation.count)); - return c_node->data.allocation.count; -} - -#endif diff --git a/asmrun/startup.c b/asmrun/startup.c index 60dca013..070f0c64 100644 --- a/asmrun/startup.c +++ b/asmrun/startup.c @@ -46,6 +46,7 @@ extern int caml_parser_trace; CAMLexport header_t caml_atom_table[256]; char * caml_code_area_start, * caml_code_area_end; +struct ext_table caml_code_fragments_table; /* Initialize the atom table and the static data and code area limits. */ diff --git a/boot/ocamlc b/boot/ocamlc index a73dfdea..beac6097 100755 Binary files a/boot/ocamlc and b/boot/ocamlc differ diff --git a/boot/ocamldep b/boot/ocamldep deleted file mode 100755 index feec830c..00000000 Binary files a/boot/ocamldep and /dev/null differ diff --git a/boot/ocamllex b/boot/ocamllex index 36fe17f9..1b3e201e 100755 Binary files a/boot/ocamllex and b/boot/ocamllex differ diff --git a/bytecomp/bytegen.ml b/bytecomp/bytegen.ml index 6368cdca..afb6530e 100644 --- a/bytecomp/bytegen.ml +++ b/bytecomp/bytegen.ml @@ -146,7 +146,7 @@ let rec size_of_lambda env = function | Lvar id -> begin try Ident.find_same id env with Not_found -> RHS_nonrec end | Lfunction{params} as funct -> - RHS_function (1 + IdentSet.cardinal(free_variables funct), + RHS_function (1 + Ident.Set.cardinal(free_variables funct), List.length params) | Llet (Strict, _k, id, Lprim (Pduprecord (kind, size), _, _), body) when check_recordwith_updates id body -> @@ -355,25 +355,23 @@ let comp_primitive p args = | Psubfloat -> Kccall("caml_sub_float", 2) | Pmulfloat -> Kccall("caml_mul_float", 2) | Pdivfloat -> Kccall("caml_div_float", 2) - | Pfloatcomp Ceq -> Kccall("caml_eq_float", 2) - | Pfloatcomp Cneq -> Kccall("caml_neq_float", 2) - | Pfloatcomp Clt -> Kccall("caml_lt_float", 2) - | Pfloatcomp Cgt -> Kccall("caml_gt_float", 2) - | Pfloatcomp Cle -> Kccall("caml_le_float", 2) - | Pfloatcomp Cge -> Kccall("caml_ge_float", 2) | Pstringlength -> Kccall("caml_ml_string_length", 1) | Pbyteslength -> Kccall("caml_ml_bytes_length", 1) | Pstringrefs -> Kccall("caml_string_get", 2) | Pbytesrefs -> Kccall("caml_bytes_get", 2) | Pbytessets -> Kccall("caml_bytes_set", 3) - | Pstringrefu | Pbytesrefu -> Kgetstringchar - | Pbytessetu -> Ksetstringchar + | Pstringrefu -> Kgetstringchar + | Pbytesrefu -> Kgetbyteschar + | Pbytessetu -> Ksetbyteschar | Pstring_load_16(_) -> Kccall("caml_string_get16", 2) | Pstring_load_32(_) -> Kccall("caml_string_get32", 2) | Pstring_load_64(_) -> Kccall("caml_string_get64", 2) - | Pstring_set_16(_) -> Kccall("caml_string_set16", 3) - | Pstring_set_32(_) -> Kccall("caml_string_set32", 3) - | Pstring_set_64(_) -> Kccall("caml_string_set64", 3) + | Pbytes_set_16(_) -> Kccall("caml_bytes_set16", 3) + | Pbytes_set_32(_) -> Kccall("caml_bytes_set32", 3) + | Pbytes_set_64(_) -> Kccall("caml_bytes_set64", 3) + | Pbytes_load_16(_) -> Kccall("caml_bytes_get16", 2) + | Pbytes_load_32(_) -> Kccall("caml_bytes_get32", 2) + | Pbytes_load_64(_) -> Kccall("caml_bytes_get64", 2) | Parraylength _ -> Kvectlength | Parrayrefs Pgenarray -> Kccall("caml_array_get", 2) | Parrayrefs Pfloatarray -> Kccall("caml_floatarray_get", 2) @@ -400,7 +398,6 @@ let comp_primitive p args = Kccall(Printf.sprintf "caml_sys_const_%s" const_name, 1) | Pisint -> Kisint | Pisout -> Kisout - | Pbittest -> Kccall("caml_bitvect_test", 2) | Pbintofint bi -> comp_bint_primitive bi "of_int" args | Pintofbint bi -> comp_bint_primitive bi "to_int" args | Pcvtbint(Pint32, Pnativeint) -> Kccall("caml_nativeint_of_int32", 1) @@ -422,7 +419,7 @@ let comp_primitive p args = | Plsrbint bi -> comp_bint_primitive bi "shift_right_unsigned" args | Pasrbint bi -> comp_bint_primitive bi "shift_right" args | Pbintcomp(_, Ceq) -> Kccall("caml_equal", 2) - | Pbintcomp(_, Cneq) -> Kccall("caml_notequal", 2) + | Pbintcomp(_, Cne) -> Kccall("caml_notequal", 2) | Pbintcomp(_, Clt) -> Kccall("caml_lessthan", 2) | Pbintcomp(_, Cgt) -> Kccall("caml_greaterthan", 2) | Pbintcomp(_, Cle) -> Kccall("caml_lessequal", 2) @@ -439,6 +436,8 @@ let comp_primitive p args = | Pbswap16 -> Kccall("caml_bswap16", 1) | Pbbswap(bi) -> comp_bint_primitive bi "bswap" args | Pint_as_pointer -> Kccall("caml_int_as_pointer", 1) + | Pbytes_to_string -> Kccall("caml_string_of_bytes", 1) + | Pbytes_of_string -> Kccall("caml_bytes_of_string", 1) | _ -> fatal_error "Bytegen.comp_primitive" let is_immed n = immed_min <= n && n <= immed_max @@ -519,7 +518,7 @@ let rec comp_expr env exp sz cont = end | Lfunction{params; body} -> (* assume kind = Curried *) let lbl = new_label() in - let fv = IdentSet.elements(free_variables exp) in + let fv = Ident.Set.elements(free_variables exp) in let to_compile = { params = params; body = body; label = lbl; free_vars = fv; num_defs = 1; rec_vars = []; rec_pos = 0 } in @@ -536,7 +535,7 @@ let rec comp_expr env exp sz cont = decl then begin (* let rec of functions *) let fv = - IdentSet.elements (free_variables (Lletrec(decl, lambda_unit))) in + Ident.Set.elements (free_variables (Lletrec(decl, lambda_unit))) in let rec_idents = List.map (fun (id, _lam) -> id) decl in let rec comp_fun pos = function [] -> [] @@ -595,8 +594,7 @@ let rec comp_expr env exp sz cont = in comp_init env sz decl_size end - | Lprim((Pidentity | Popaque | Pbytes_to_string | Pbytes_of_string), [arg], _) - -> + | Lprim((Pidentity | Popaque), [arg], _) -> comp_expr env arg sz cont | Lprim(Pignore, [arg], _) -> comp_expr env arg sz (add_const_unit cont) @@ -684,9 +682,24 @@ let rec comp_expr env exp sz cont = Misc.fatal_error "Bytegen.comp_expr: Pduparray takes exactly one arg" (* Integer first for enabling further optimization (cf. emitcode.ml) *) | Lprim (Pintcomp c, [arg ; (Lconst _ as k)], _) -> - let p = Pintcomp (commute_comparison c) + let p = Pintcomp (swap_integer_comparison c) and args = [k ; arg] in comp_args env args sz (comp_primitive p args :: cont) + | Lprim (Pfloatcomp cmp, args, _) -> + let cont = + match cmp with + | CFeq -> Kccall("caml_eq_float", 2) :: cont + | CFneq -> Kccall("caml_neq_float", 2) :: cont + | CFlt -> Kccall("caml_lt_float", 2) :: cont + | CFnlt -> Kccall("caml_lt_float", 2) :: Kboolnot :: cont + | CFgt -> Kccall("caml_gt_float", 2) :: cont + | CFngt -> Kccall("caml_gt_float", 2) :: Kboolnot :: cont + | CFle -> Kccall("caml_le_float", 2) :: cont + | CFnle -> Kccall("caml_le_float", 2) :: Kboolnot :: cont + | CFge -> Kccall("caml_ge_float", 2) :: cont + | CFnge -> Kccall("caml_ge_float", 2) :: Kboolnot :: cont + in + comp_args env args sz cont | Lprim(p, args, _) -> comp_args env args sz (comp_primitive p args :: cont) | Lstaticcatch (body, (i, vars) , handler) -> @@ -765,7 +778,7 @@ let rec comp_expr env exp sz cont = Klabel lbl_loop :: Kcheck_signals :: comp_expr (add_var param (sz+1) env) body (sz+2) (Kacc 1 :: Kpush :: Koffsetint offset :: Kassign 2 :: - Kacc 1 :: Kintcomp Cneq :: Kbranchif lbl_loop :: + Kacc 1 :: Kintcomp Cne :: Kbranchif lbl_loop :: Klabel lbl_exit :: add_const_unit (add_pop 2 cont)))) | Lswitch(arg, sw, _loc) -> let (branch, cont1) = make_branch cont in @@ -776,13 +789,13 @@ let rec comp_expr env exp sz cont = let act_consts = Array.make sw.sw_numconsts 0 and act_blocks = Array.make sw.sw_numblocks 0 in begin match sw.sw_failaction with (* default is index 0 *) - | Some fail -> ignore (store.act_store fail) + | Some fail -> ignore (store.act_store () fail) | None -> () end ; List.iter - (fun (n, act) -> act_consts.(n) <- store.act_store act) sw.sw_consts; + (fun (n, act) -> act_consts.(n) <- store.act_store () act) sw.sw_consts; List.iter - (fun (n, act) -> act_blocks.(n) <- store.act_store act) sw.sw_blocks; + (fun (n, act) -> act_blocks.(n) <- store.act_store () act) sw.sw_blocks; (* Compile and label actions *) let acts = store.act_get () in (* diff --git a/bytecomp/bytegen.mli b/bytecomp/bytegen.mli index b23a1dc6..24855ec6 100644 --- a/bytecomp/bytegen.mli +++ b/bytecomp/bytegen.mli @@ -21,3 +21,5 @@ open Instruct val compile_implementation: string -> lambda -> instruction list val compile_phrase: lambda -> instruction list * instruction list val reset: unit -> unit + +val merge_events : Instruct.debug_event -> Instruct.debug_event -> Instruct.debug_event diff --git a/bytecomp/bytelink.ml b/bytecomp/bytelink.ml index 3ed3e68c..87e0d62b 100644 --- a/bytecomp/bytelink.ml +++ b/bytecomp/bytelink.ml @@ -85,25 +85,23 @@ let add_ccobjs origin l = (* First pass: determine which units are needed *) -module IdentSet = Lambda.IdentSet - -let missing_globals = ref IdentSet.empty +let missing_globals = ref Ident.Set.empty let is_required (rel, _pos) = match rel with Reloc_setglobal id -> - IdentSet.mem id !missing_globals + Ident.Set.mem id !missing_globals | _ -> false let add_required compunit = let add_required_by_reloc (rel, _pos) = match rel with Reloc_getglobal id -> - missing_globals := IdentSet.add id !missing_globals + missing_globals := Ident.Set.add id !missing_globals | _ -> () in let add_required_for_effects id = - missing_globals := IdentSet.add id !missing_globals + missing_globals := Ident.Set.add id !missing_globals in List.iter add_required_by_reloc compunit.cu_reloc; List.iter add_required_for_effects compunit.cu_required_globals @@ -111,7 +109,7 @@ let add_required compunit = let remove_required (rel, _pos) = match rel with Reloc_setglobal id -> - missing_globals := IdentSet.remove id !missing_globals + missing_globals := Ident.Set.remove id !missing_globals | _ -> () let scan_file obj_name tolink = @@ -294,9 +292,9 @@ let output_stringlist oc l = (* Transform a file name into an absolute file name *) let make_absolute file = - if Filename.is_relative file - then Filename.concat (Sys.getcwd()) file - else file + if not (Filename.is_relative file) then file + else Location.rewrite_absolute_path + (Filename.concat (Sys.getcwd()) file) (* Create a bytecode executable file *) @@ -569,10 +567,10 @@ let link ppf objfiles output_name = else "stdlib.cma" :: (objfiles @ ["std_exit.cmo"]) in let tolink = List.fold_right scan_file objfiles [] in let missing_modules = - IdentSet.filter (fun id -> not (Ident.is_predef_exn id)) !missing_globals + Ident.Set.filter (fun id -> not (Ident.is_predef_exn id)) !missing_globals in begin - match IdentSet.elements missing_modules with + match Ident.Set.elements missing_modules with | [] -> () | id :: _ -> raise (Error (Required_module_unavailable (Ident.name id))) end; @@ -707,7 +705,7 @@ let reset () = lib_ccobjs := []; lib_ccopts := []; lib_dllibs := []; - missing_globals := IdentSet.empty; + missing_globals := Ident.Set.empty; Consistbl.clear crc_interfaces; implementations_defined := []; debug_info := []; diff --git a/bytecomp/emitcode.ml b/bytecomp/emitcode.ml index 3c7f848a..aed2666c 100644 --- a/bytecomp/emitcode.ml +++ b/bytecomp/emitcode.ml @@ -168,8 +168,10 @@ let record_event ev = let path = ev.ev_loc.Location.loc_start.Lexing.pos_fname in let abspath = Location.absolute_path path in debug_dirs := StringSet.add (Filename.dirname abspath) !debug_dirs; - if Filename.is_relative path then - debug_dirs := StringSet.add (Sys.getcwd ()) !debug_dirs; + if Filename.is_relative path then begin + let cwd = Location.rewrite_absolute_path (Sys.getcwd ()) in + debug_dirs := StringSet.add cwd !debug_dirs; + end; ev.ev_pos <- !out_position; events := ev :: !events @@ -185,12 +187,12 @@ let init () = (* Emission of one instruction *) let emit_comp = function -| Ceq -> out opEQ | Cneq -> out opNEQ +| Ceq -> out opEQ | Cne -> out opNEQ | Clt -> out opLTINT | Cle -> out opLEINT | Cgt -> out opGTINT | Cge -> out opGEINT and emit_branch_comp = function -| Ceq -> out opBEQ | Cneq -> out opBNEQ +| Ceq -> out opBEQ | Cne -> out opBNEQ | Clt -> out opBLTINT | Cle -> out opBLEINT | Cgt -> out opBGTINT | Cge -> out opBGEINT @@ -262,7 +264,8 @@ let emit_instr = function | Kgetvectitem -> out opGETVECTITEM | Ksetvectitem -> out opSETVECTITEM | Kgetstringchar -> out opGETSTRINGCHAR - | Ksetstringchar -> out opSETSTRINGCHAR + | Kgetbyteschar -> out opGETBYTESCHAR + | Ksetbyteschar -> out opSETBYTESCHAR | Kbranch lbl -> out opBRANCH; out_label lbl | Kbranchif lbl -> out opBRANCHIF; out_label lbl | Kbranchifnot lbl -> out opBRANCHIFNOT; out_label lbl @@ -304,6 +307,11 @@ let emit_instr = function (* Emission of a list of instructions. Include some peephole optimization. *) +let remerge_events ev1 = function + | Kevent ev2 :: c -> + Kevent (Bytegen.merge_events ev1 ev2) :: c + | c -> Kevent ev1 :: c + let rec emit = function [] -> () (* Peephole optimizations *) @@ -316,7 +324,7 @@ let rec emit = function emit rem | Kpush::Kconst k::Kintcomp c::Kbranchifnot lbl::rem when is_immed_const k -> - emit_branch_comp (negate_comparison c) ; + emit_branch_comp (negate_integer_comparison c) ; out_const k ; out_label lbl ; emit rem @@ -372,13 +380,13 @@ let rec emit = function out opPUSHGETGLOBAL; slot_for_literal sc end; emit c - | Kpush :: (Kevent {ev_kind = Event_before} as ev) :: + | Kpush :: (Kevent ({ev_kind = Event_before} as ev)) :: (Kgetglobal _ as instr1) :: (Kgetfield _ as instr2) :: c -> - emit (Kpush :: instr1 :: instr2 :: ev :: c) - | Kpush :: (Kevent {ev_kind = Event_before} as ev) :: + emit (Kpush :: instr1 :: instr2 :: remerge_events ev c) + | Kpush :: (Kevent ({ev_kind = Event_before} as ev)) :: (Kacc _ | Kenvacc _ | Koffsetclosure _ | Kgetglobal _ | Kconst _ as instr):: c -> - emit (Kpush :: instr :: ev :: c) + emit (Kpush :: instr :: remerge_events ev c) | Kgetglobal id :: Kgetfield n :: c -> out opGETGLOBALFIELD; slot_for_getglobal id; out_int n; emit c (* Default case *) diff --git a/bytecomp/instruct.ml b/bytecomp/instruct.ml index 0360a6d3..d7dae7fc 100644 --- a/bytecomp/instruct.ml +++ b/bytecomp/instruct.ml @@ -78,7 +78,8 @@ type instruction = | Kgetvectitem | Ksetvectitem | Kgetstringchar - | Ksetstringchar + | Kgetbyteschar + | Ksetbyteschar | Kbranch of label | Kbranchif of label | Kbranchifnot of label @@ -93,7 +94,7 @@ type instruction = | Kccall of string * int | Knegint | Kaddint | Ksubint | Kmulint | Kdivint | Kmodint | Kandint | Korint | Kxorint | Klslint | Klsrint | Kasrint - | Kintcomp of comparison + | Kintcomp of integer_comparison | Koffsetint of int | Koffsetref of int | Kisint diff --git a/bytecomp/instruct.mli b/bytecomp/instruct.mli index e49edefd..f52f322d 100644 --- a/bytecomp/instruct.mli +++ b/bytecomp/instruct.mli @@ -98,7 +98,8 @@ type instruction = | Kgetvectitem | Ksetvectitem | Kgetstringchar - | Ksetstringchar + | Kgetbyteschar + | Ksetbyteschar | Kbranch of label | Kbranchif of label | Kbranchifnot of label @@ -113,7 +114,7 @@ type instruction = | Kccall of string * int | Knegint | Kaddint | Ksubint | Kmulint | Kdivint | Kmodint | Kandint | Korint | Kxorint | Klslint | Klsrint | Kasrint - | Kintcomp of comparison + | Kintcomp of integer_comparison | Koffsetint of int | Koffsetref of int | Kisint diff --git a/bytecomp/lambda.ml b/bytecomp/lambda.ml index d6f91ef0..4b396af1 100644 --- a/bytecomp/lambda.ml +++ b/bytecomp/lambda.ml @@ -27,13 +27,6 @@ type compile_time_constant = | Ostype_cygwin | Backend_type -type loc_kind = - | Loc_FILE - | Loc_LINE - | Loc_MODULE - | Loc_LOC - | Loc_POS - type immediate_or_pointer = | Immediate | Pointer @@ -54,7 +47,6 @@ type primitive = | Pignore | Prevapply | Pdirapply - | Ploc of loc_kind (* Globals *) | Pgetglobal of Ident.t | Psetglobal of Ident.t @@ -68,7 +60,6 @@ type primitive = | Psetfloatfield of int * initialization_or_assignment | Pduprecord of Types.record_representation * int (* Force lazy values *) - | Plazyforce (* External call *) | Pccall of Primitive.description (* Exceptions *) @@ -80,14 +71,14 @@ type primitive = | Pdivint of is_safe | Pmodint of is_safe | Pandint | Porint | Pxorint | Plslint | Plsrint | Pasrint - | Pintcomp of comparison + | Pintcomp of integer_comparison | Poffsetint of int | Poffsetref of int (* Float operations *) | Pintoffloat | Pfloatofint | Pnegfloat | Pabsfloat | Paddfloat | Psubfloat | Pmulfloat | Pdivfloat - | Pfloatcomp of comparison + | Pfloatcomp of float_comparison (* String operations *) | Pstringlength | Pstringrefu | Pstringrefs | Pbyteslength | Pbytesrefu | Pbytessetu | Pbytesrefs | Pbytessets @@ -103,8 +94,6 @@ type primitive = | Pisint (* Test if the (integer) argument is outside an interval *) | Pisout - (* Bitvect operations *) - | Pbittest (* Operations on boxed integers (Nativeint.t, Int32.t, Int64.t) *) | Pbintofint of boxed_integer | Pintofbint of boxed_integer @@ -121,7 +110,7 @@ type primitive = | Plslbint of boxed_integer | Plsrbint of boxed_integer | Pasrbint of boxed_integer - | Pbintcomp of boxed_integer * comparison + | Pbintcomp of boxed_integer * integer_comparison (* Operations on big arrays: (unsafe, #dimensions, kind, layout) *) | Pbigarrayref of bool * int * bigarray_kind * bigarray_layout | Pbigarrayset of bool * int * bigarray_kind * bigarray_layout @@ -131,9 +120,12 @@ type primitive = | Pstring_load_16 of bool | Pstring_load_32 of bool | Pstring_load_64 of bool - | Pstring_set_16 of bool - | Pstring_set_32 of bool - | Pstring_set_64 of bool + | Pbytes_load_16 of bool + | Pbytes_load_32 of bool + | Pbytes_load_64 of bool + | Pbytes_set_16 of bool + | Pbytes_set_32 of bool + | Pbytes_set_64 of bool (* load/set 16,32,64 bits from a (char, int8_unsigned_elt, c_layout) Bigarray.Array1.t : (unsafe) *) | Pbigstring_load_16 of bool @@ -152,8 +144,11 @@ type primitive = (* Inhibition of optimisation *) | Popaque -and comparison = - Ceq | Cneq | Clt | Cgt | Cle | Cge +and integer_comparison = + Ceq | Cne | Clt | Cgt | Cle | Cge + +and float_comparison = + CFeq | CFneq | CFlt | CFnlt | CFgt | CFngt | CFle | CFnle | CFge | CFnge and value_kind = Pgenval | Pfloatval | Pboxedintval of boxed_integer | Pintval @@ -408,7 +403,7 @@ let iter_opt f = function | None -> () | Some e -> f e -let iter f = function +let iter_head_constructor f = function Lvar _ | Lconst _ -> () | Lapply{ap_func = fn; ap_args = args} -> @@ -454,40 +449,83 @@ let iter f = function | Lifused (_v, e) -> f e +let rec free_variables = function + | Lvar id -> Ident.Set.singleton id + | Lconst _ -> Ident.Set.empty + | Lapply{ap_func = fn; ap_args = args} -> + free_variables_list (free_variables fn) args + | Lfunction{body; params} -> + Ident.Set.diff (free_variables body) + (Ident.Set.of_list params) + | Llet(_str, _k, id, arg, body) -> + Ident.Set.union + (free_variables arg) + (Ident.Set.remove id (free_variables body)) + | Lletrec(decl, body) -> + let set = free_variables_list (free_variables body) (List.map snd decl) in + Ident.Set.diff set (Ident.Set.of_list (List.map fst decl)) + | Lprim(_p, args, _loc) -> + free_variables_list Ident.Set.empty args + | Lswitch(arg, sw,_) -> + let set = + free_variables_list + (free_variables_list (free_variables arg) + (List.map snd sw.sw_consts)) + (List.map snd sw.sw_blocks) + in + begin match sw.sw_failaction with + | None -> set + | Some failaction -> Ident.Set.union set (free_variables failaction) + end + | Lstringswitch (arg,cases,default,_) -> + let set = + free_variables_list (free_variables arg) + (List.map snd cases) + in + begin match default with + | None -> set + | Some default -> Ident.Set.union set (free_variables default) + end + | Lstaticraise (_,args) -> + free_variables_list Ident.Set.empty args + | Lstaticcatch(body, (_, params), handler) -> + Ident.Set.union + (Ident.Set.diff + (free_variables handler) + (Ident.Set.of_list params)) + (free_variables body) + | Ltrywith(body, param, handler) -> + Ident.Set.union + (Ident.Set.remove + param + (free_variables handler)) + (free_variables body) + | Lifthenelse(e1, e2, e3) -> + Ident.Set.union + (Ident.Set.union (free_variables e1) (free_variables e2)) + (free_variables e3) + | Lsequence(e1, e2) -> + Ident.Set.union (free_variables e1) (free_variables e2) + | Lwhile(e1, e2) -> + Ident.Set.union (free_variables e1) (free_variables e2) + | Lfor(v, lo, hi, _dir, body) -> + let set = Ident.Set.union (free_variables lo) (free_variables hi) in + Ident.Set.union set (Ident.Set.remove v (free_variables body)) + | Lassign(id, e) -> + Ident.Set.add id (free_variables e) + | Lsend (_k, met, obj, args, _) -> + free_variables_list + (Ident.Set.union (free_variables met) (free_variables obj)) + args + | Levent (lam, _evt) -> + free_variables lam + | Lifused (_v, e) -> + (* Shouldn't v be considered a free variable ? *) + free_variables e -module IdentSet = Set.Make(Ident) - -let free_ids get l = - let fv = ref IdentSet.empty in - let rec free l = - iter free l; - fv := List.fold_right IdentSet.add (get l) !fv; - match l with - Lfunction{params} -> - List.iter (fun param -> fv := IdentSet.remove param !fv) params - | Llet(_str, _k, id, _arg, _body) -> - fv := IdentSet.remove id !fv - | Lletrec(decl, _body) -> - List.iter (fun (id, _exp) -> fv := IdentSet.remove id !fv) decl - | Lstaticcatch(_e1, (_,vars), _e2) -> - List.iter (fun id -> fv := IdentSet.remove id !fv) vars - | Ltrywith(_e1, exn, _e2) -> - fv := IdentSet.remove exn !fv - | Lfor(v, _e1, _e2, _dir, _e3) -> - fv := IdentSet.remove v !fv - | Lassign(id, _e) -> - fv := IdentSet.add id !fv - | Lvar _ | Lconst _ | Lapply _ - | Lprim _ | Lswitch _ | Lstringswitch _ | Lstaticraise _ - | Lifthenelse _ | Lsequence _ | Lwhile _ - | Lsend _ | Levent _ | Lifused _ -> () - in free l; !fv - -let free_variables l = - free_ids (function Lvar id -> [id] | _ -> []) l - -let free_methods l = - free_ids (function Lsend(Self, Lvar meth, _, _, _) -> [meth] | _ -> []) l +and free_variables_list set exprs = + List.fold_left (fun set expr -> Ident.Set.union (free_variables expr) set) + set exprs (* Check if an action has a "when" guard *) let raise_count = ref 0 @@ -496,12 +534,6 @@ let next_raise_count () = incr raise_count ; !raise_count -let negative_raise_count = ref 0 - -let next_negative_raise_count () = - decr negative_raise_count ; - !negative_raise_count - (* Anticipated staticraise, for guards *) let staticfail = Lstaticraise (0,[]) @@ -555,52 +587,69 @@ let rec make_sequence fn = function let lam = fn x in Lsequence(lam, make_sequence fn rem) (* Apply a substitution to a lambda-term. - Assumes that the bound variables of the lambda-term do not - belong to the domain of the substitution. Assumes that the image of the substitution is out of reach of the bound variables of the lambda-term (no capture). *) -let subst_lambda s lam = - let rec subst = function - Lvar id as l -> - begin try Ident.find_same id s with Not_found -> l end +let rec subst s lam = + let remove_list l s = + List.fold_left (fun s id -> Ident.Map.remove id s) s l + in + let module M = Ident.Map in + match lam with + | Lvar id as l -> + begin try Ident.Map.find id s with Not_found -> l end | Lconst _ as l -> l | Lapply ap -> - Lapply{ap with ap_func = subst ap.ap_func; - ap_args = List.map subst ap.ap_args} + Lapply{ap with ap_func = subst s ap.ap_func; + ap_args = subst_list s ap.ap_args} | Lfunction{kind; params; body; attr; loc} -> - Lfunction{kind; params; body = subst body; attr; loc} - | Llet(str, k, id, arg, body) -> Llet(str, k, id, subst arg, subst body) - | Lletrec(decl, body) -> Lletrec(List.map subst_decl decl, subst body) - | Lprim(p, args, loc) -> Lprim(p, List.map subst args, loc) + let s = List.fold_right Ident.Map.remove params s in + Lfunction{kind; params; body = subst s body; attr; loc} + | Llet(str, k, id, arg, body) -> + Llet(str, k, id, subst s arg, subst (Ident.Map.remove id s) body) + | Lletrec(decl, body) -> + let s = + List.fold_left (fun s (id, _) -> Ident.Map.remove id s) + s decl + in + Lletrec(List.map (subst_decl s) decl, subst s body) + | Lprim(p, args, loc) -> Lprim(p, subst_list s args, loc) | Lswitch(arg, sw, loc) -> - Lswitch(subst arg, - {sw with sw_consts = List.map subst_case sw.sw_consts; - sw_blocks = List.map subst_case sw.sw_blocks; - sw_failaction = subst_opt sw.sw_failaction; }, + Lswitch(subst s arg, + {sw with sw_consts = List.map (subst_case s) sw.sw_consts; + sw_blocks = List.map (subst_case s) sw.sw_blocks; + sw_failaction = subst_opt s sw.sw_failaction; }, loc) | Lstringswitch (arg,cases,default,loc) -> Lstringswitch - (subst arg,List.map subst_strcase cases,subst_opt default,loc) - | Lstaticraise (i,args) -> Lstaticraise (i, List.map subst args) - | Lstaticcatch(e1, io, e2) -> Lstaticcatch(subst e1, io, subst e2) - | Ltrywith(e1, exn, e2) -> Ltrywith(subst e1, exn, subst e2) - | Lifthenelse(e1, e2, e3) -> Lifthenelse(subst e1, subst e2, subst e3) - | Lsequence(e1, e2) -> Lsequence(subst e1, subst e2) - | Lwhile(e1, e2) -> Lwhile(subst e1, subst e2) - | Lfor(v, e1, e2, dir, e3) -> Lfor(v, subst e1, subst e2, dir, subst e3) - | Lassign(id, e) -> Lassign(id, subst e) + (subst s arg,List.map (subst_strcase s) cases,subst_opt s default,loc) + | Lstaticraise (i,args) -> Lstaticraise (i, subst_list s args) + | Lstaticcatch(body, (id, params), handler) -> + Lstaticcatch(subst s body, (id, params), + subst (remove_list params s) handler) + | Ltrywith(body, exn, handler) -> + Ltrywith(subst s body, exn, subst (Ident.Map.remove exn s) handler) + | Lifthenelse(e1, e2, e3) -> Lifthenelse(subst s e1, subst s e2, subst s e3) + | Lsequence(e1, e2) -> Lsequence(subst s e1, subst s e2) + | Lwhile(e1, e2) -> Lwhile(subst s e1, subst s e2) + | Lfor(v, lo, hi, dir, body) -> + Lfor(v, subst s lo, subst s hi, dir, + subst (Ident.Map.remove v s) body) + | Lassign(id, e) -> + assert(not (Ident.Map.mem id s)); + Lassign(id, subst s e) | Lsend (k, met, obj, args, loc) -> - Lsend (k, subst met, subst obj, List.map subst args, loc) - | Levent (lam, evt) -> Levent (subst lam, evt) - | Lifused (v, e) -> Lifused (v, subst e) - and subst_decl (id, exp) = (id, subst exp) - and subst_case (key, case) = (key, subst case) - and subst_strcase (key, case) = (key, subst case) - and subst_opt = function - | None -> None - | Some e -> Some (subst e) - in subst lam + Lsend (k, subst s met, subst s obj, subst_list s args, loc) + | Levent (lam, evt) -> Levent (subst s lam, evt) + | Lifused (v, e) -> Lifused (v, subst s e) +and subst_list s l = List.map (subst s) l +and subst_decl s (id, exp) = (id, subst s exp) +and subst_case s (key, case) = (key, subst s case) +and subst_strcase s (key, case) = (key, subst s case) +and subst_opt s = function + | None -> None + | Some e -> Some (subst s e) + let rec map f lam = let lam = @@ -672,46 +721,51 @@ let bind str var exp body = Lvar var' when Ident.same var var' -> body | _ -> Llet(str, Pgenval, var, exp, body) -and commute_comparison = function -| Ceq -> Ceq| Cneq -> Cneq -| Clt -> Cgt | Cle -> Cge -| Cgt -> Clt | Cge -> Cle - -and negate_comparison = function -| Ceq -> Cneq| Cneq -> Ceq -| Clt -> Cge | Cle -> Cgt -| Cgt -> Cle | Cge -> Clt +let negate_integer_comparison = function + | Ceq -> Cne + | Cne -> Ceq + | Clt -> Cge + | Cle -> Cgt + | Cgt -> Cle + | Cge -> Clt + +let swap_integer_comparison = function + | Ceq -> Ceq + | Cne -> Cne + | Clt -> Cgt + | Cle -> Cge + | Cgt -> Clt + | Cge -> Cle + +let negate_float_comparison = function + | CFeq -> CFneq + | CFneq -> CFeq + | CFlt -> CFnlt + | CFnlt -> CFlt + | CFgt -> CFngt + | CFngt -> CFgt + | CFle -> CFnle + | CFnle -> CFle + | CFge -> CFnge + | CFnge -> CFge + +let swap_float_comparison = function + | CFeq -> CFeq + | CFneq -> CFneq + | CFlt -> CFgt + | CFnlt -> CFngt + | CFle -> CFge + | CFnle -> CFnge + | CFgt -> CFlt + | CFngt -> CFnlt + | CFge -> CFle + | CFnge -> CFnle let raise_kind = function | Raise_regular -> "raise" | Raise_reraise -> "reraise" | Raise_notrace -> "raise_notrace" -let lam_of_loc kind loc = - let loc_start = loc.Location.loc_start in - let (file, lnum, cnum) = Location.get_pos_info loc_start in - let enum = loc.Location.loc_end.Lexing.pos_cnum - - loc_start.Lexing.pos_cnum + cnum in - match kind with - | Loc_POS -> - Lconst (Const_block (0, [ - Const_immstring file; - Const_base (Const_int lnum); - Const_base (Const_int cnum); - Const_base (Const_int enum); - ])) - | Loc_FILE -> Lconst (Const_immstring file) - | Loc_MODULE -> - let filename = Filename.basename file in - let name = Env.get_unit_name () in - let module_name = if name = "" then "//"^filename^"//" else name in - Lconst (Const_immstring module_name) - | Loc_LOC -> - let loc = Printf.sprintf "File %S, line %d, characters %d-%d" - file lnum cnum enum in - Lconst (Const_immstring loc) - | Loc_LINE -> Lconst (Const_base (Const_int lnum)) - let merge_inline_attributes attr1 attr2 = match attr1, attr2 with | Default_inline, _ -> Some attr2 diff --git a/bytecomp/lambda.mli b/bytecomp/lambda.mli index fef608d4..e82aa3e7 100644 --- a/bytecomp/lambda.mli +++ b/bytecomp/lambda.mli @@ -27,13 +27,6 @@ type compile_time_constant = | Ostype_cygwin | Backend_type -type loc_kind = - | Loc_FILE - | Loc_LINE - | Loc_MODULE - | Loc_LOC - | Loc_POS - type immediate_or_pointer = | Immediate | Pointer @@ -59,7 +52,6 @@ type primitive = | Pignore | Prevapply | Pdirapply - | Ploc of loc_kind (* Globals *) | Pgetglobal of Ident.t | Psetglobal of Ident.t @@ -72,8 +64,6 @@ type primitive = | Pfloatfield of int | Psetfloatfield of int * initialization_or_assignment | Pduprecord of Types.record_representation * int - (* Force lazy values *) - | Plazyforce (* External call *) | Pccall of Primitive.description (* Exceptions *) @@ -85,14 +75,14 @@ type primitive = | Pdivint of is_safe | Pmodint of is_safe | Pandint | Porint | Pxorint | Plslint | Plsrint | Pasrint - | Pintcomp of comparison + | Pintcomp of integer_comparison | Poffsetint of int | Poffsetref of int (* Float operations *) | Pintoffloat | Pfloatofint | Pnegfloat | Pabsfloat | Paddfloat | Psubfloat | Pmulfloat | Pdivfloat - | Pfloatcomp of comparison + | Pfloatcomp of float_comparison (* String operations *) | Pstringlength | Pstringrefu | Pstringrefs | Pbyteslength | Pbytesrefu | Pbytessetu | Pbytesrefs | Pbytessets @@ -111,8 +101,6 @@ type primitive = | Pisint (* Test if the (integer) argument is outside an interval *) | Pisout - (* Bitvect operations *) - | Pbittest (* Operations on boxed integers (Nativeint.t, Int32.t, Int64.t) *) | Pbintofint of boxed_integer | Pintofbint of boxed_integer @@ -129,7 +117,7 @@ type primitive = | Plslbint of boxed_integer | Plsrbint of boxed_integer | Pasrbint of boxed_integer - | Pbintcomp of boxed_integer * comparison + | Pbintcomp of boxed_integer * integer_comparison (* Operations on big arrays: (unsafe, #dimensions, kind, layout) *) | Pbigarrayref of bool * int * bigarray_kind * bigarray_layout | Pbigarrayset of bool * int * bigarray_kind * bigarray_layout @@ -139,9 +127,12 @@ type primitive = | Pstring_load_16 of bool | Pstring_load_32 of bool | Pstring_load_64 of bool - | Pstring_set_16 of bool - | Pstring_set_32 of bool - | Pstring_set_64 of bool + | Pbytes_load_16 of bool + | Pbytes_load_32 of bool + | Pbytes_load_64 of bool + | Pbytes_set_16 of bool + | Pbytes_set_32 of bool + | Pbytes_set_64 of bool (* load/set 16,32,64 bits from a (char, int8_unsigned_elt, c_layout) Bigarray.Array1.t : (unsafe) *) | Pbigstring_load_16 of bool @@ -160,8 +151,11 @@ type primitive = (* Inhibition of optimisation *) | Popaque -and comparison = - Ceq | Cneq | Clt | Cgt | Cle | Cge +and integer_comparison = + Ceq | Cne | Clt | Cgt | Cle | Cge + +and float_comparison = + CFeq | CFneq | CFlt | CFnlt | CFgt | CFngt | CFle | CFnle | CFge | CFnge and array_kind = Pgenarray | Paddrarray | Pintarray | Pfloatarray @@ -322,10 +316,12 @@ val lambda_unit: lambda val name_lambda: let_kind -> lambda -> (Ident.t -> lambda) -> lambda val name_lambda_list: lambda list -> (lambda list -> lambda) -> lambda -val iter: (lambda -> unit) -> lambda -> unit -module IdentSet: Set.S with type elt = Ident.t -val free_variables: lambda -> IdentSet.t -val free_methods: lambda -> IdentSet.t +val iter_head_constructor: (lambda -> unit) -> lambda -> unit +(** [iter_head_constructor f lam] apply [f] to only the first level of + sub expressions of [lam]. It does not recursively traverse the + expression. *) + +val free_variables: lambda -> Ident.Set.t val transl_normal_path: Path.t -> lambda (* Path.t is already normal *) val transl_path: ?loc:Location.t -> Env.t -> Path.t -> lambda @@ -338,12 +334,19 @@ val transl_class_path: ?loc:Location.t -> Env.t -> Path.t -> lambda val make_sequence: ('a -> lambda) -> 'a list -> lambda -val subst_lambda: lambda Ident.tbl -> lambda -> lambda +val subst: lambda Ident.Map.t -> lambda -> lambda +(** Apply a substitution to a lambda-term. + Assumes that the image of the substitution is out of reach + of the bound variables of the lambda-term (no capture). *) + val map : (lambda -> lambda) -> lambda -> lambda val bind : let_kind -> Ident.t -> lambda -> lambda -> lambda -val commute_comparison : comparison -> comparison -val negate_comparison : comparison -> comparison +val negate_integer_comparison : integer_comparison -> integer_comparison +val swap_integer_comparison : integer_comparison -> integer_comparison + +val negate_float_comparison : float_comparison -> float_comparison +val swap_float_comparison : float_comparison -> float_comparison val default_function_attribute : function_attribute val default_stub_attribute : function_attribute @@ -354,11 +357,6 @@ val default_stub_attribute : function_attribute (* Get a new static failure ident *) val next_raise_count : unit -> int -val next_negative_raise_count : unit -> int - (* Negative raise counts are used to compile 'match ... with - exception x -> ...'. This disabled some simplifications - performed by the Simplif module that assume that static raises - are in tail position in their handler. *) val staticfail : lambda (* Anticipated static failure *) @@ -367,7 +365,6 @@ val is_guarded: lambda -> bool val patch_guarded : lambda -> lambda -> lambda val raise_kind: raise_kind -> string -val lam_of_loc : loc_kind -> Location.t -> lambda val merge_inline_attributes : inline_attribute diff --git a/bytecomp/matching.ml b/bytecomp/matching.ml index 32e8043d..6ed69827 100644 --- a/bytecomp/matching.ml +++ b/bytecomp/matching.ml @@ -22,6 +22,7 @@ open Typedtree open Lambda open Parmatch open Printf +open Printpat let dbg = false @@ -60,6 +61,18 @@ let string_of_lam lam = Printlambda.lambda Format.str_formatter lam ; Format.flush_str_formatter () +let all_record_args lbls = match lbls with +| (_,{lbl_all=lbl_all},_)::_ -> + let t = + Array.map + (fun lbl -> mknoloc (Longident.Lident "?temp?"), lbl,omega) + lbl_all in + List.iter + (fun ((_, lbl,_) as x) -> t.(lbl.lbl_pos) <- x) + lbls ; + Array.to_list t +| _ -> fatal_error "Parmatch.all_record_args" + type matrix = pattern list list let add_omega_column pss = List.map (fun ps -> omega::ps) pss @@ -70,9 +83,9 @@ let pretty_ctx ctx = List.iter (fun {left=left ; right=right} -> prerr_string "LEFT:" ; - pretty_line left ; + pretty_line Format.err_formatter left ; prerr_string " RIGHT:" ; - pretty_line right ; + pretty_line Format.err_formatter right ; prerr_endline "") ctx @@ -163,7 +176,7 @@ let filter_matrix matcher pss = end | [] -> [] | _ -> - pretty_matrix pss ; + pretty_matrix Format.err_formatter pss ; fatal_error "Matching.filter_matrix" in filter_rec pss @@ -404,7 +417,7 @@ let pretty_cases cases = (fun (ps,_l) -> List.iter (fun p -> - Parmatch.top_pretty Format.str_formatter p ; + top_pretty Format.str_formatter p ; prerr_string " " ; prerr_string (Format.flush_str_formatter ())) ps ; @@ -421,7 +434,7 @@ let pretty_def def = List.iter (fun (pss,i) -> Printf.fprintf stderr "Matrix for %d\n" i ; - pretty_matrix pss) + pretty_matrix Format.err_formatter pss) def ; prerr_endline "+++++++++++++++++++++" @@ -441,7 +454,7 @@ let rec pretty_precompiled = function | PmOr x -> prerr_endline "++++ OR ++++" ; pretty_pm x.body ; - pretty_matrix x.or_matrix ; + pretty_matrix Format.err_formatter x.or_matrix ; List.iter (fun (_,i,_,pm) -> eprintf "++ Handler %d ++\n" i ; @@ -666,9 +679,9 @@ let default_compat p def = (* Or-pattern expansion, variables are a complication w.r.t. the article *) let rec extract_vars r p = match p.pat_desc with -| Tpat_var (id, _) -> IdentSet.add id r +| Tpat_var (id, _) -> Ident.Set.add id r | Tpat_alias (p, id,_ ) -> - extract_vars (IdentSet.add id r) p + extract_vars (Ident.Set.add id r) p | Tpat_tuple pats -> List.fold_left extract_vars r pats | Tpat_record (lpats,_) -> @@ -714,8 +727,8 @@ let rec explode_or_pat arg patl mk_action rem vars aliases = function let pm_free_variables {cases=cases} = List.fold_right - (fun (_,act) r -> IdentSet.union (free_variables act) r) - cases IdentSet.empty + (fun (_,act) r -> Ident.Set.union (free_variables act) r) + cases Ident.Set.empty (* Basic grouping predicates *) @@ -804,8 +817,8 @@ let insert_or_append p ps act ors no = if is_or q then begin if may_compat p q then if - IdentSet.is_empty (extract_vars IdentSet.empty p) && - IdentSet.is_empty (extract_vars IdentSet.empty q) && + Ident.Set.is_empty (extract_vars Ident.Set.empty p) && + Ident.Set.is_empty (extract_vars Ident.Set.empty q) && equiv_pat p q then (* attempt insert, for equivalent orpats with no variables *) let _, not_e = get_equiv q rem in @@ -1101,9 +1114,9 @@ and precompile_or argo cls ors args def k = match ors with args = (match args with _::r -> r | _ -> assert false) ; default = default_compat orp def} in let vars = - IdentSet.elements - (IdentSet.inter - (extract_vars IdentSet.empty orp) + Ident.Set.elements + (Ident.Set.inter + (extract_vars Ident.Set.empty orp) (pm_free_variables orpm)) in let or_num = next_raise_count () in let new_patl = Parmatch.omega_list patl in @@ -1807,10 +1820,10 @@ let share_actions_tree sw d = let d = match d with | None -> None - | Some d -> Some (store.Switch.act_store_shared d) in + | Some d -> Some (store.Switch.act_store_shared () d) in (* Store all other actions *) let sw = - List.map (fun (cst,act) -> cst,store.Switch.act_store act) sw in + List.map (fun (cst,act) -> cst,store.Switch.act_store () act) sw in (* Retrieve all actions, including potential default *) let acts = store.Switch.act_get_shared () in @@ -1888,7 +1901,7 @@ module SArg = struct type primitive = Lambda.primitive let eqint = Pintcomp Ceq - let neint = Pintcomp Cneq + let neint = Pintcomp Cne let leint = Pintcomp Cle let ltint = Pintcomp Clt let geint = Pintcomp Cge @@ -1934,14 +1947,14 @@ let share_actions_sw sw = | None -> None | Some fail -> (* Fail is translated to exit, whatever happens *) - Some (store.Switch.act_store_shared fail) in + Some (store.Switch.act_store_shared () fail) in let consts = List.map - (fun (i,e) -> i,store.Switch.act_store e) + (fun (i,e) -> i,store.Switch.act_store () e) sw.sw_consts and blocks = List.map - (fun (i,e) -> i,store.Switch.act_store e) + (fun (i,e) -> i,store.Switch.act_store () e) sw.sw_blocks in let acts = store.Switch.act_get_shared () in let hs,handle_shared = handle_shared () in @@ -2009,7 +2022,7 @@ let as_interval_canfail fail low high l = let do_store _tag act = - let i = store.act_store act in + let i = store.act_store () act in (* eprintf "STORE [%s] %i %s\n" tag i (string_of_lam act) ; *) @@ -2073,7 +2086,7 @@ let as_interval_nofail l = | [] -> [cur_low, cur_high, cur_act] | (i,act)::rem -> - let act_index = store.act_store act in + let act_index = store.act_store () act in if act_index = cur_act then i_rec cur_low i cur_act rem else @@ -2087,9 +2100,9 @@ let as_interval_nofail l = cases (cf. switch.ml, make_switch). Hence, this action will be shared *) if some_hole rem then - store.act_store_shared act + store.act_store_shared () act else - store.act_store act in + store.act_store () act in assert (act_index = 0) ; i_rec i i act_index rem | _ -> assert false in @@ -2241,22 +2254,22 @@ let combine_constant loc arg cst partial ctx def | Const_float _ -> make_test_sequence loc fail - (Pfloatcomp Cneq) (Pfloatcomp Clt) + (Pfloatcomp CFneq) (Pfloatcomp CFlt) arg const_lambda_list | Const_int32 _ -> make_test_sequence loc fail - (Pbintcomp(Pint32, Cneq)) (Pbintcomp(Pint32, Clt)) + (Pbintcomp(Pint32, Cne)) (Pbintcomp(Pint32, Clt)) arg const_lambda_list | Const_int64 _ -> make_test_sequence loc fail - (Pbintcomp(Pint64, Cneq)) (Pbintcomp(Pint64, Clt)) + (Pbintcomp(Pint64, Cne)) (Pbintcomp(Pint64, Clt)) arg const_lambda_list | Const_nativeint _ -> make_test_sequence loc fail - (Pbintcomp(Pnativeint, Cneq)) (Pbintcomp(Pnativeint, Clt)) + (Pbintcomp(Pnativeint, Cne)) (Pbintcomp(Pnativeint, Clt)) arg const_lambda_list in lambda1,jumps_union local_jumps total diff --git a/bytecomp/printinstr.ml b/bytecomp/printinstr.ml index 62bdfd22..6b3754cb 100644 --- a/bytecomp/printinstr.ml +++ b/bytecomp/printinstr.ml @@ -56,7 +56,8 @@ let instruction ppf = function | Kgetvectitem -> fprintf ppf "\tgetvectitem" | Ksetvectitem -> fprintf ppf "\tsetvectitem" | Kgetstringchar -> fprintf ppf "\tgetstringchar" - | Ksetstringchar -> fprintf ppf "\tsetstringchar" + | Kgetbyteschar -> fprintf ppf "\tgetbyteschar" + | Ksetbyteschar -> fprintf ppf "\tsetbyteschar" | Kbranch lbl -> fprintf ppf "\tbranch L%i" lbl | Kbranchif lbl -> fprintf ppf "\tbranchif L%i" lbl | Kbranchifnot lbl -> fprintf ppf "\tbranchifnot L%i" lbl @@ -87,7 +88,7 @@ let instruction ppf = function | Klsrint -> fprintf ppf "\tlsrint" | Kasrint -> fprintf ppf "\tasrint" | Kintcomp Ceq -> fprintf ppf "\teqint" - | Kintcomp Cneq -> fprintf ppf "\tneqint" + | Kintcomp Cne -> fprintf ppf "\tneqint" | Kintcomp Clt -> fprintf ppf "\tltint" | Kintcomp Cgt -> fprintf ppf "\tgtint" | Kintcomp Cle -> fprintf ppf "\tleint" diff --git a/bytecomp/printlambda.ml b/bytecomp/printlambda.ml index 54a64bee..f128db5e 100644 --- a/bytecomp/printlambda.ml +++ b/bytecomp/printlambda.ml @@ -109,13 +109,6 @@ let record_rep ppf r = | Record_extension -> fprintf ppf "ext" ;; -let string_of_loc_kind = function - | Loc_FILE -> "loc_FILE" - | Loc_LINE -> "loc_LINE" - | Loc_MODULE -> "loc_MODULE" - | Loc_POS -> "loc_POS" - | Loc_LOC -> "loc_LOC" - let block_shape ppf shape = match shape with | None | Some [] -> () | Some l when List.for_all ((=) Pgenval) l -> () @@ -128,6 +121,26 @@ let block_shape ppf shape = match shape with t; Format.fprintf ppf ")" +let integer_comparison ppf = function + | Ceq -> fprintf ppf "==" + | Cne -> fprintf ppf "!=" + | Clt -> fprintf ppf "<" + | Cle -> fprintf ppf "<=" + | Cgt -> fprintf ppf ">" + | Cge -> fprintf ppf ">=" + +let float_comparison ppf = function + | CFeq -> fprintf ppf "==." + | CFneq -> fprintf ppf "!=." + | CFlt -> fprintf ppf "<." + | CFnlt -> fprintf ppf "!<." + | CFle -> fprintf ppf "<=." + | CFnle -> fprintf ppf "!<=." + | CFgt -> fprintf ppf ">." + | CFngt -> fprintf ppf "!>." + | CFge -> fprintf ppf ">=." + | CFnge -> fprintf ppf "!>=." + let primitive ppf = function | Pidentity -> fprintf ppf "id" | Pbytes_to_string -> fprintf ppf "bytes_to_string" @@ -135,7 +148,6 @@ let primitive ppf = function | Pignore -> fprintf ppf "ignore" | Prevapply -> fprintf ppf "revapply" | Pdirapply -> fprintf ppf "dirapply" - | Ploc kind -> fprintf ppf "%s" (string_of_loc_kind kind) | Pgetglobal id -> fprintf ppf "global %a" Ident.print id | Psetglobal id -> fprintf ppf "setglobal %a" Ident.print id | Pmakeblock(tag, Immutable, shape) -> @@ -180,7 +192,6 @@ let primitive ppf = function in fprintf ppf "setfloatfield%s %i" init n | Pduprecord (rep, size) -> fprintf ppf "duprecord %a %i" record_rep rep size - | Plazyforce -> fprintf ppf "force" | Pccall p -> fprintf ppf "%s" p.prim_name | Praise k -> fprintf ppf "%s" (Lambda.raise_kind k) | Psequand -> fprintf ppf "&&" @@ -200,12 +211,7 @@ let primitive ppf = function | Plslint -> fprintf ppf "lsl" | Plsrint -> fprintf ppf "lsr" | Pasrint -> fprintf ppf "asr" - | Pintcomp(Ceq) -> fprintf ppf "==" - | Pintcomp(Cneq) -> fprintf ppf "!=" - | Pintcomp(Clt) -> fprintf ppf "<" - | Pintcomp(Cle) -> fprintf ppf "<=" - | Pintcomp(Cgt) -> fprintf ppf ">" - | Pintcomp(Cge) -> fprintf ppf ">=" + | Pintcomp(cmp) -> integer_comparison ppf cmp | Poffsetint n -> fprintf ppf "%i+" n | Poffsetref n -> fprintf ppf "+:=%i"n | Pintoffloat -> fprintf ppf "int_of_float" @@ -216,12 +222,7 @@ let primitive ppf = function | Psubfloat -> fprintf ppf "-." | Pmulfloat -> fprintf ppf "*." | Pdivfloat -> fprintf ppf "/." - | Pfloatcomp(Ceq) -> fprintf ppf "==." - | Pfloatcomp(Cneq) -> fprintf ppf "!=." - | Pfloatcomp(Clt) -> fprintf ppf "<." - | Pfloatcomp(Cle) -> fprintf ppf "<=." - | Pfloatcomp(Cgt) -> fprintf ppf ">." - | Pfloatcomp(Cge) -> fprintf ppf ">=." + | Pfloatcomp(cmp) -> float_comparison ppf cmp | Pstringlength -> fprintf ppf "string.length" | Pstringrefu -> fprintf ppf "string.unsafe_get" | Pstringrefs -> fprintf ppf "string.get" @@ -253,7 +254,6 @@ let primitive ppf = function fprintf ppf "sys.constant_%s" const_name | Pisint -> fprintf ppf "isint" | Pisout -> fprintf ppf "isout" - | Pbittest -> fprintf ppf "testbit" | Pbintofint bi -> print_boxed_integer "of_int" ppf bi | Pintofbint bi -> print_boxed_integer "to_int" ppf bi | Pcvtbint (bi1, bi2) -> print_boxed_integer_conversion ppf bi1 bi2 @@ -276,7 +276,7 @@ let primitive ppf = function | Plsrbint bi -> print_boxed_integer "lsr" ppf bi | Pasrbint bi -> print_boxed_integer "asr" ppf bi | Pbintcomp(bi, Ceq) -> print_boxed_integer "==" ppf bi - | Pbintcomp(bi, Cneq) -> print_boxed_integer "!=" ppf bi + | Pbintcomp(bi, Cne) -> print_boxed_integer "!=" ppf bi | Pbintcomp(bi, Clt) -> print_boxed_integer "<" ppf bi | Pbintcomp(bi, Cgt) -> print_boxed_integer ">" ppf bi | Pbintcomp(bi, Cle) -> print_boxed_integer "<=" ppf bi @@ -295,15 +295,24 @@ let primitive ppf = function | Pstring_load_64(unsafe) -> if unsafe then fprintf ppf "string.unsafe_get64" else fprintf ppf "string.get64" - | Pstring_set_16(unsafe) -> - if unsafe then fprintf ppf "string.unsafe_set16" - else fprintf ppf "string.set16" - | Pstring_set_32(unsafe) -> - if unsafe then fprintf ppf "string.unsafe_set32" - else fprintf ppf "string.set32" - | Pstring_set_64(unsafe) -> - if unsafe then fprintf ppf "string.unsafe_set64" - else fprintf ppf "string.set64" + | Pbytes_load_16(unsafe) -> + if unsafe then fprintf ppf "bytes.unsafe_get16" + else fprintf ppf "bytes.get16" + | Pbytes_load_32(unsafe) -> + if unsafe then fprintf ppf "bytes.unsafe_get32" + else fprintf ppf "bytes.get32" + | Pbytes_load_64(unsafe) -> + if unsafe then fprintf ppf "bytes.unsafe_get64" + else fprintf ppf "bytes.get64" + | Pbytes_set_16(unsafe) -> + if unsafe then fprintf ppf "bytes.unsafe_set16" + else fprintf ppf "bytes.set16" + | Pbytes_set_32(unsafe) -> + if unsafe then fprintf ppf "bytes.unsafe_set32" + else fprintf ppf "bytes.set32" + | Pbytes_set_64(unsafe) -> + if unsafe then fprintf ppf "bytes.unsafe_set64" + else fprintf ppf "bytes.set64" | Pbigstring_load_16(unsafe) -> if unsafe then fprintf ppf "bigarray.array1.unsafe_get16" else fprintf ppf "bigarray.array1.get16" @@ -334,7 +343,6 @@ let name_of_primitive = function | Pignore -> "Pignore" | Prevapply -> "Prevapply" | Pdirapply -> "Pdirapply" - | Ploc _ -> "Ploc" | Pgetglobal _ -> "Pgetglobal" | Psetglobal _ -> "Psetglobal" | Pmakeblock _ -> "Pmakeblock" @@ -345,7 +353,6 @@ let name_of_primitive = function | Pfloatfield _ -> "Pfloatfield" | Psetfloatfield _ -> "Psetfloatfield" | Pduprecord _ -> "Pduprecord" - | Plazyforce -> "Plazyforce" | Pccall _ -> "Pccall" | Praise _ -> "Praise" | Psequand -> "Psequand" @@ -393,7 +400,6 @@ let name_of_primitive = function | Pctconst _ -> "Pctconst" | Pisint -> "Pisint" | Pisout -> "Pisout" - | Pbittest -> "Pbittest" | Pbintofint _ -> "Pbintofint" | Pintofbint _ -> "Pintofbint" | Pcvtbint _ -> "Pcvtbint" @@ -416,9 +422,12 @@ let name_of_primitive = function | Pstring_load_16 _ -> "Pstring_load_16" | Pstring_load_32 _ -> "Pstring_load_32" | Pstring_load_64 _ -> "Pstring_load_64" - | Pstring_set_16 _ -> "Pstring_set_16" - | Pstring_set_32 _ -> "Pstring_set_32" - | Pstring_set_64 _ -> "Pstring_set_64" + | Pbytes_load_16 _ -> "Pbytes_load_16" + | Pbytes_load_32 _ -> "Pbytes_load_32" + | Pbytes_load_64 _ -> "Pbytes_load_64" + | Pbytes_set_16 _ -> "Pbytes_set_16" + | Pbytes_set_32 _ -> "Pbytes_set_32" + | Pbytes_set_64 _ -> "Pbytes_set_64" | Pbigstring_load_16 _ -> "Pbigstring_load_16" | Pbigstring_load_32 _ -> "Pbigstring_load_32" | Pbigstring_load_64 _ -> "Pbigstring_load_64" diff --git a/bytecomp/semantics_of_primitives.ml b/bytecomp/semantics_of_primitives.ml index f963d867..b6b09e19 100644 --- a/bytecomp/semantics_of_primitives.ml +++ b/bytecomp/semantics_of_primitives.ml @@ -21,7 +21,9 @@ type coeffects = No_coeffects | Has_coeffects let for_primitive (prim : Lambda.primitive) = match prim with - | Pignore | Pidentity | Pbytes_to_string | Pbytes_of_string -> + | Pignore | Pidentity -> + No_effects, No_coeffects + | Pbytes_to_string | Pbytes_of_string -> No_effects, No_coeffects | Pmakeblock _ | Pmakearray (_, Mutable) -> Only_generative_effects, No_coeffects @@ -35,7 +37,6 @@ let for_primitive (prim : Lambda.primitive) = ( "caml_format_float" | "caml_format_int" | "caml_int32_format" | "caml_nativeint_format" | "caml_int64_format" ) } -> No_effects, No_coeffects - | Plazyforce | Pccall _ -> Arbitrary_effects, Has_coeffects | Praise _ -> Arbitrary_effects, No_coeffects | Pnot @@ -76,7 +77,6 @@ let for_primitive (prim : Lambda.primitive) = No_effects, Has_coeffects (* That old chestnut: [Obj.truncate]. *) | Pisint | Pisout - | Pbittest | Pbintofint _ | Pintofbint _ | Pcvtbint _ @@ -103,6 +103,9 @@ let for_primitive (prim : Lambda.primitive) = | Pstring_load_16 true | Pstring_load_32 true | Pstring_load_64 true + | Pbytes_load_16 true + | Pbytes_load_32 true + | Pbytes_load_64 true | Pbigarrayref (true, _, _, _) | Pbigstring_load_16 true | Pbigstring_load_32 true @@ -114,6 +117,9 @@ let for_primitive (prim : Lambda.primitive) = | Pstring_load_16 false | Pstring_load_32 false | Pstring_load_64 false + | Pbytes_load_16 false + | Pbytes_load_32 false + | Pbytes_load_64 false | Pbigarrayref (false, _, _, _) | Pbigstring_load_16 false | Pbigstring_load_32 false @@ -128,9 +134,9 @@ let for_primitive (prim : Lambda.primitive) = | Parraysets _ | Pbytessetu | Pbytessets - | Pstring_set_16 _ - | Pstring_set_32 _ - | Pstring_set_64 _ + | Pbytes_set_16 _ + | Pbytes_set_32 _ + | Pbytes_set_64 _ | Pbigarrayset _ | Pbigstring_set_16 _ | Pbigstring_set_32 _ @@ -143,9 +149,6 @@ let for_primitive (prim : Lambda.primitive) = | Pbbswap _ -> No_effects, No_coeffects | Pint_as_pointer -> No_effects, No_coeffects | Popaque -> Arbitrary_effects, Has_coeffects - | Ploc _ -> - (* Removed by [Translcore]. *) - No_effects, No_coeffects | Prevapply | Pdirapply -> (* Removed by [Simplif], but there is no reason to prevent using diff --git a/bytecomp/simplif.ml b/bytecomp/simplif.ml index f07fbc04..ea9513f9 100644 --- a/bytecomp/simplif.ml +++ b/bytecomp/simplif.ml @@ -31,7 +31,7 @@ let rec eliminate_ref id = function Lapply{ap with ap_func = eliminate_ref id ap.ap_func; ap_args = List.map (eliminate_ref id) ap.ap_args} | Lfunction _ as lam -> - if IdentSet.mem id (free_variables lam) + if Ident.Set.mem id (free_variables lam) then raise Real_reference else lam | Llet(str, kind, v, e1, e2) -> @@ -92,22 +92,31 @@ let rec eliminate_ref id = function (* Simplification of exits *) +type exit = { + mutable count: int; + mutable max_depth: int; +} + let simplify_exits lam = (* Count occurrences of (exit n ...) statements *) let exits = Hashtbl.create 17 in - let count_exit i = - try - !(Hashtbl.find exits i) - with - | Not_found -> 0 + let try_depth = ref 0 in - and incr_exit i = - try - incr (Hashtbl.find exits i) - with - | Not_found -> Hashtbl.add exits i (ref 1) in + let get_exit i = + try Hashtbl.find exits i + with Not_found -> {count = 0; max_depth = 0} + + and incr_exit i nb d = + match Hashtbl.find_opt exits i with + | Some r -> + r.count <- r.count + nb; + r.max_depth <- max r.max_depth d + | None -> + let r = {count = nb; max_depth = d} in + Hashtbl.add exits i r + in let rec count = function | (Lvar _| Lconst _) -> () @@ -133,25 +142,20 @@ let simplify_exits lam = | []|[_] -> count d | _ -> count d; count d (* default will get replicated *) end - | Lstaticraise (i,ls) -> incr_exit i ; List.iter count ls + | Lstaticraise (i,ls) -> incr_exit i 1 !try_depth; List.iter count ls | Lstaticcatch (l1,(i,[]),Lstaticraise (j,[])) -> (* i will be replaced by j in l1, so each occurrence of i in l1 increases j's ref count *) count l1 ; - let ic = count_exit i in - begin try - let r = Hashtbl.find exits j in r := !r + ic - with - | Not_found -> - Hashtbl.add exits j (ref ic) - end + let ic = get_exit i in + incr_exit j ic.count (max !try_depth ic.max_depth) | Lstaticcatch(l1, (i,_), l2) -> count l1; (* If l1 does not contain (exit i), l2 will be removed, so don't count its exits *) - if count_exit i > 0 then + if (get_exit i).count > 0 then count l2 - | Ltrywith(l1, _v, l2) -> count l1; count l2 + | Ltrywith(l1, _v, l2) -> incr try_depth; count l1; decr try_depth; count l2 | Lifthenelse(l1, l2, l3) -> count l1; count l2; count l3 | Lsequence(l1, l2) -> count l1; count l2 | Lwhile(l1, l2) -> count l1; count l2 @@ -176,6 +180,7 @@ let simplify_exits lam = end in count lam; + assert(!try_depth = 0); (* Second pass simplify ``catch body with (i ...) handler'' @@ -261,11 +266,11 @@ let simplify_exits lam = let ys = List.map Ident.rename xs in let env = List.fold_right2 - (fun x y t -> Ident.add x (Lvar y) t) - xs ys Ident.empty in + (fun x y t -> Ident.Map.add x (Lvar y) t) + xs ys Ident.Map.empty in List.fold_right2 (fun y l r -> Llet (Alias, Pgenval, y, l, r)) - ys ls (Lambda.subst_lambda env handler) + ys ls (Lambda.subst env handler) with | Not_found -> Lstaticraise (i,ls) end @@ -273,15 +278,23 @@ let simplify_exits lam = Hashtbl.add subst i ([],simplif l2) ; simplif l1 | Lstaticcatch (l1,(i,xs),l2) -> - begin match count_exit i with - | 0 -> simplif l1 - | 1 when i >= 0 -> - Hashtbl.add subst i (xs,simplif l2) ; - simplif l1 - | _ -> - Lstaticcatch (simplif l1, (i,xs), simplif l2) - end - | Ltrywith(l1, v, l2) -> Ltrywith(simplif l1, v, simplif l2) + let {count; max_depth} = get_exit i in + if count = 0 then + (* Discard staticcatch: not matching exit *) + simplif l1 + else if count = 1 && max_depth <= !try_depth then begin + (* Inline handler if there is a single occurrence and it is not + nested within an inner try..with *) + assert(max_depth = !try_depth); + Hashtbl.add subst i (xs,simplif l2); + simplif l1 + end else + Lstaticcatch (simplif l1, (i,xs), simplif l2) + | Ltrywith(l1, v, l2) -> + incr try_depth; + let l1 = simplif l1 in + decr try_depth; + Ltrywith(l1, v, simplif l2) | Lifthenelse(l1, l2, l3) -> Lifthenelse(simplif l1, simplif l2, simplif l3) | Lsequence(l1, l2) -> Lsequence(simplif l1, simplif l2) | Lwhile(l1, l2) -> Lwhile(simplif l1, simplif l2) @@ -569,7 +582,9 @@ let rec emit_tail_infos is_tail lambda = | Lletrec (bindings, body) -> List.iter (fun (_, lam) -> emit_tail_infos false lam) bindings; emit_tail_infos is_tail body - | Lprim ((Pidentity | Pbytes_to_string | Pbytes_of_string), [arg], _) -> + | Lprim (Pidentity, [arg], _) -> + emit_tail_infos is_tail arg + | Lprim ((Pbytes_to_string | Pbytes_of_string), [arg], _) -> emit_tail_infos is_tail arg | Lprim (Psequand, [arg1; arg2], _) | Lprim (Psequor, [arg1; arg2], _) -> @@ -648,7 +663,7 @@ let split_default_wrapper ~id:fun_id ~kind ~params ~body ~attr ~loc = (* Check that those *opt* identifiers don't appear in the remaining body. This should not appear, but let's be on the safe side. *) let fv = Lambda.free_variables body in - List.iter (fun (id, _) -> if IdentSet.mem id fv then raise Exit) map; + List.iter (fun (id, _) -> if Ident.Set.mem id fv then raise Exit) map; let inner_id = Ident.create (Ident.name fun_id ^ "_inner") in let map_param p = try List.assoc p map with Not_found -> p in @@ -667,10 +682,10 @@ let split_default_wrapper ~id:fun_id ~kind ~params ~body ~attr ~loc = let new_ids = List.map Ident.rename inner_params in let subst = List.fold_left2 (fun s id new_id -> - Ident.add id (Lvar new_id) s) - Ident.empty inner_params new_ids + Ident.Map.add id (Lvar new_id) s) + Ident.Map.empty inner_params new_ids in - let body = Lambda.subst_lambda subst body in + let body = Lambda.subst subst body in let inner_fun = Lfunction { kind = Curried; params = new_ids; body; attr; loc; } in diff --git a/bytecomp/switch.ml b/bytecomp/switch.ml index 2e373239..b03982dd 100644 --- a/bytecomp/switch.ml +++ b/bytecomp/switch.ml @@ -16,11 +16,11 @@ type 'a shared = Shared of 'a | Single of 'a -type 'a t_store = +type ('a, 'ctx) t_store = {act_get : unit -> 'a array ; act_get_shared : unit -> 'a shared array ; - act_store : 'a -> int ; - act_store_shared : 'a -> int ; } + act_store : 'ctx -> 'a -> int ; + act_store_shared : 'ctx -> 'a -> int ; } exception Not_simple @@ -31,7 +31,13 @@ module type Stored = sig val make_key : t -> key option end -module Store(A:Stored) = struct +module type CtxStored = sig + include Stored + type context + val make_key : context -> t -> key option +end + +module CtxStore(A:CtxStored) = struct module AMap = Map.Make(struct type t = A.key let compare = A.compare_key end) @@ -52,7 +58,7 @@ module Store(A:Stored) = struct st.next <- i+1 ; i in - let store mustshare act = match A.make_key act with + let store mustshare ctx act = match A.make_key ctx act with | Some key -> begin try let (shared,i) = AMap.find key st.map in @@ -86,6 +92,18 @@ module Store(A:Stored) = struct act_get = get; act_get_shared = get_shared; } end +module Store(A:Stored) = struct + module Me = + CtxStore + (struct + include A + type context = unit + let make_key () = A.make_key + end) + + let mk_store = Me.mk_store +end + module type S = diff --git a/bytecomp/switch.mli b/bytecomp/switch.mli index 2d0cfd7f..b4058c17 100644 --- a/bytecomp/switch.mli +++ b/bytecomp/switch.mli @@ -31,11 +31,11 @@ type 'a shared = Shared of 'a | Single of 'a -type 'a t_store = +type ('a, 'ctx) t_store = {act_get : unit -> 'a array ; act_get_shared : unit -> 'a shared array ; - act_store : 'a -> int ; - act_store_shared : 'a -> int ; } + act_store : 'ctx -> 'a -> int ; + act_store_shared : 'ctx -> 'a -> int ; } exception Not_simple @@ -46,9 +46,20 @@ module type Stored = sig val make_key : t -> key option end +module type CtxStored = sig + include Stored + type context + val make_key : context -> t -> key option +end + +module CtxStore(A:CtxStored) : + sig + val mk_store : unit -> (A.t, A.context) t_store + end + module Store(A:Stored) : sig - val mk_store : unit -> A.t t_store + val mk_store : unit -> (A.t, unit) t_store end (* Arguments to the Make functor *) @@ -106,13 +117,13 @@ module Make : (int * int) -> Arg.act -> (int * int * int) array -> - Arg.act t_store -> + (Arg.act, _) t_store -> Arg.act (* Output test sequence, sharing tracked *) val test_sequence : Arg.act -> (int * int * int) array -> - Arg.act t_store -> + (Arg.act, _) t_store -> Arg.act end diff --git a/bytecomp/translattribute.ml b/bytecomp/translattribute.ml index 68a0dc4a..8881dc77 100644 --- a/bytecomp/translattribute.ml +++ b/bytecomp/translattribute.ml @@ -188,11 +188,26 @@ let get_and_remove_inlined_attribute e = inlined, { e with exp_attributes } let get_and_remove_inlined_attribute_on_module e = - let attr, mod_attributes = - find_attribute is_inlined_attribute e.mod_attributes + let rec get_and_remove mod_expr = + let attr, mod_attributes = + find_attribute is_inlined_attribute mod_expr.mod_attributes + in + let attr = parse_inline_attribute attr in + let attr, mod_desc = + match mod_expr.Typedtree.mod_desc with + | Tmod_constraint (me, mt, mtc, mc) -> + let inner_attr, me = get_and_remove me in + let attr = + match attr with + | Always_inline | Never_inline | Unroll _ -> attr + | Default_inline -> inner_attr + in + attr, Tmod_constraint (me, mt, mtc, mc) + | md -> attr, md + in + attr, { mod_expr with mod_desc; mod_attributes } in - let inlined = parse_inline_attribute attr in - inlined, { e with mod_attributes } + get_and_remove e let get_and_remove_specialised_attribute e = let attr, exp_attributes = diff --git a/bytecomp/translclass.ml b/bytecomp/translclass.ml index d5ffd339..4f89faa9 100644 --- a/bytecomp/translclass.ml +++ b/bytecomp/translclass.ml @@ -402,8 +402,8 @@ let rec build_class_lets cl = let rec get_class_meths cl = match cl.cl_desc with Tcl_structure cl -> - Meths.fold (fun _ -> IdentSet.add) cl.cstr_meths IdentSet.empty - | Tcl_ident _ -> IdentSet.empty + Meths.fold (fun _ -> Ident.Set.add) cl.cstr_meths Ident.Set.empty + | Tcl_ident _ -> Ident.Set.empty | Tcl_fun (_, _, _, cl, _) | Tcl_let (_, _, _, cl) | Tcl_apply (cl, _) @@ -518,7 +518,7 @@ let const_path local = function | Lconst _ -> true | Lfunction {kind = Curried; body} -> let fv = free_variables body in - List.for_all (fun x -> not (IdentSet.mem x fv)) local + List.for_all (fun x -> not (Ident.Set.mem x fv)) local | p -> module_path p let rec builtin_meths self env env2 body = @@ -630,6 +630,33 @@ let prerr_ids msg ids = prerr_endline (String.concat " " (msg :: names)) *) +let free_methods l = + let fv = ref Ident.Set.empty in + let rec free l = + Lambda.iter_head_constructor free l; + match l with + | Lsend(Self, Lvar meth, _, _, _) -> + fv := Ident.Set.add meth !fv + | Lsend _ -> () + | Lfunction{params} -> + List.iter (fun param -> fv := Ident.Set.remove param !fv) params + | Llet(_str, _k, id, _arg, _body) -> + fv := Ident.Set.remove id !fv + | Lletrec(decl, _body) -> + List.iter (fun (id, _exp) -> fv := Ident.Set.remove id !fv) decl + | Lstaticcatch(_e1, (_,vars), _e2) -> + List.iter (fun id -> fv := Ident.Set.remove id !fv) vars + | Ltrywith(_e1, exn, _e2) -> + fv := Ident.Set.remove exn !fv + | Lfor(v, _e1, _e2, _dir, _e3) -> + fv := Ident.Set.remove v !fv + | Lassign _ + | Lvar _ | Lconst _ | Lapply _ + | Lprim _ | Lswitch _ | Lstringswitch _ | Lstaticraise _ + | Lifthenelse _ | Lsequence _ | Lwhile _ + | Levent _ | Lifused _ -> () + in free l; !fv + let transl_class ids cl_id pub_meths cl vflag = (* First check if it is not only a rebind *) let rebind = transl_class_rebind cl vflag in @@ -645,25 +672,25 @@ let transl_class ids cl_id pub_meths cl vflag = let meth_ids = get_class_meths cl in let subst env lam i0 new_ids' = let fv = free_variables lam in - (* prerr_ids "cl_id =" [cl_id]; prerr_ids "fv =" (IdentSet.elements fv); *) - let fv = List.fold_right IdentSet.remove !new_ids' fv in + (* prerr_ids "cl_id =" [cl_id]; prerr_ids "fv =" (Ident.Set.elements fv); *) + let fv = List.fold_right Ident.Set.remove !new_ids' fv in (* We need to handle method ids specially, as they do not appear in the typing environment (PR#3576, PR#4560) *) (* very hacky: we add and remove free method ids on the fly, depending on the visit order... *) method_ids := - IdentSet.diff (IdentSet.union (free_methods lam) !method_ids) meth_ids; - (* prerr_ids "meth_ids =" (IdentSet.elements meth_ids); - prerr_ids "method_ids =" (IdentSet.elements !method_ids); *) - let new_ids = List.fold_right IdentSet.add new_ids !method_ids in - let fv = IdentSet.inter fv new_ids in - new_ids' := !new_ids' @ IdentSet.elements fv; + Ident.Set.diff (Ident.Set.union (free_methods lam) !method_ids) meth_ids; + (* prerr_ids "meth_ids =" (Ident.Set.elements meth_ids); + prerr_ids "method_ids =" (Ident.Set.elements !method_ids); *) + let new_ids = List.fold_right Ident.Set.add new_ids !method_ids in + let fv = Ident.Set.inter fv new_ids in + new_ids' := !new_ids' @ Ident.Set.elements fv; (* prerr_ids "new_ids' =" !new_ids'; *) let i = ref (i0-1) in List.fold_left (fun subst id -> - incr i; Ident.add id (lfield env !i) subst) - Ident.empty !new_ids' + incr i; Ident.Map.add id (lfield env !i) subst) + Ident.Map.empty !new_ids' in let new_ids_meths = ref [] in let msubst arr = function @@ -671,7 +698,7 @@ let transl_class ids cl_id pub_meths cl vflag = let env = Ident.create "env" in let body' = if new_ids = [] then body else - subst_lambda (subst env body 0 new_ids_meths) body in + Lambda.subst (subst env body 0 new_ids_meths) body in begin try (* Doesn't seem to improve size for bytecode *) (* if not !Clflags.native_code then raise Not_found; *) @@ -679,7 +706,7 @@ let transl_class ids cl_id pub_meths cl vflag = builtin_meths [self] env env2 (lfunction args body') with Not_found -> [lfunction (self :: args) - (if not (IdentSet.mem env (free_variables body')) then body' else + (if not (Ident.Set.mem env (free_variables body')) then body' else Llet(Alias, Pgenval, env, Lprim(Pfield_computed, [Lvar self; Lvar env2], @@ -698,7 +725,7 @@ let transl_class ids cl_id pub_meths cl vflag = and subst_env envs l lam = if top then lam else (* must be called only once! *) - let lam = subst_lambda (subst env1 lam 1 new_ids_init) lam in + let lam = Lambda.subst (subst env1 lam 1 new_ids_init) lam in Llet(Alias, Pgenval, env1, (if l = [] then Lvar envs else lfield envs 0), Llet(Alias, Pgenval, env1', (if !new_ids_init = [] then Lvar env1 else lfield env1 0), @@ -748,7 +775,7 @@ let transl_class ids cl_id pub_meths cl vflag = params = [cla]; body = cl_init}) in Llet(Strict, Pgenval, class_init, cl_init, lam (free_variables cl_init)) and lbody fv = - if List.for_all (fun id -> not (IdentSet.mem id fv)) ids then + if List.for_all (fun id -> not (Ident.Set.mem id fv)) ids then mkappl (oo_prim "make_class",[transl_meth_list pub_meths; Lvar class_init]) else diff --git a/bytecomp/translcore.ml b/bytecomp/translcore.ml index 216601e0..fa1ddbeb 100644 --- a/bytecomp/translcore.ml +++ b/bytecomp/translcore.ml @@ -26,7 +26,6 @@ open Lambda type error = Free_super_var - | Unknown_builtin_primitive of string | Unreachable_reached exception Error of Location.t * error @@ -48,6 +47,9 @@ let prim_fresh_oo_id = Pccall (Primitive.simple ~name:"caml_fresh_oo_id" ~arity:1 ~alloc:false) let transl_extension_constructor env path ext = + let path = + Stdlib.Option.map (Printtyp.rewrite_double_underscore_paths env) path + in let name = match path, !Clflags.for_package with None, _ -> Ident.name ext.ext_id @@ -64,461 +66,6 @@ let transl_extension_constructor env path ext = | Text_rebind(path, _lid) -> transl_extension_path ~loc env path -(* Translation of primitives *) - -let comparisons_table = create_hashtable 11 [ - "%equal", - (Pccall(Primitive.simple ~name:"caml_equal" ~arity:2 ~alloc:true), - Pintcomp Ceq, - Pfloatcomp Ceq, - Pccall(Primitive.simple ~name:"caml_string_equal" ~arity:2 - ~alloc:false), - Pccall(Primitive.simple ~name:"caml_bytes_equal" ~arity:2 - ~alloc:false), - Pbintcomp(Pnativeint, Ceq), - Pbintcomp(Pint32, Ceq), - Pbintcomp(Pint64, Ceq), - true); - "%notequal", - (Pccall(Primitive.simple ~name:"caml_notequal" ~arity:2 ~alloc:true), - Pintcomp Cneq, - Pfloatcomp Cneq, - Pccall(Primitive.simple ~name:"caml_string_notequal" ~arity:2 - ~alloc:false), - Pccall(Primitive.simple ~name:"caml_bytes_notequal" ~arity:2 - ~alloc:false), - Pbintcomp(Pnativeint, Cneq), - Pbintcomp(Pint32, Cneq), - Pbintcomp(Pint64, Cneq), - true); - "%lessthan", - (Pccall(Primitive.simple ~name:"caml_lessthan" ~arity:2 ~alloc:true), - Pintcomp Clt, - Pfloatcomp Clt, - Pccall(Primitive.simple ~name:"caml_string_lessthan" ~arity:2 - ~alloc:false), - Pccall(Primitive.simple ~name:"caml_bytes_lessthan" ~arity:2 - ~alloc:false), - Pbintcomp(Pnativeint, Clt), - Pbintcomp(Pint32, Clt), - Pbintcomp(Pint64, Clt), - false); - "%greaterthan", - (Pccall(Primitive.simple ~name:"caml_greaterthan" ~arity:2 ~alloc:true), - Pintcomp Cgt, - Pfloatcomp Cgt, - Pccall(Primitive.simple ~name:"caml_string_greaterthan" ~arity:2 - ~alloc: false), - Pccall(Primitive.simple ~name:"caml_bytes_greaterthan" ~arity:2 - ~alloc: false), - Pbintcomp(Pnativeint, Cgt), - Pbintcomp(Pint32, Cgt), - Pbintcomp(Pint64, Cgt), - false); - "%lessequal", - (Pccall(Primitive.simple ~name:"caml_lessequal" ~arity:2 ~alloc:true), - Pintcomp Cle, - Pfloatcomp Cle, - Pccall(Primitive.simple ~name:"caml_string_lessequal" ~arity:2 - ~alloc:false), - Pccall(Primitive.simple ~name:"caml_bytes_lessequal" ~arity:2 - ~alloc:false), - Pbintcomp(Pnativeint, Cle), - Pbintcomp(Pint32, Cle), - Pbintcomp(Pint64, Cle), - false); - "%greaterequal", - (Pccall(Primitive.simple ~name:"caml_greaterequal" ~arity:2 ~alloc:true), - Pintcomp Cge, - Pfloatcomp Cge, - Pccall(Primitive.simple ~name:"caml_string_greaterequal" ~arity:2 - ~alloc:false), - Pccall(Primitive.simple ~name:"caml_bytes_greaterequal" ~arity:2 - ~alloc:false), - Pbintcomp(Pnativeint, Cge), - Pbintcomp(Pint32, Cge), - Pbintcomp(Pint64, Cge), - false); - "%compare", - let unboxed_compare name native_repr = - Pccall( Primitive.make ~name ~alloc:false - ~native_name:(name^"_unboxed") - ~native_repr_args:[native_repr;native_repr] - ~native_repr_res:Untagged_int - ) in - (Pccall(Primitive.simple ~name:"caml_compare" ~arity:2 ~alloc:true), - (* Not unboxed since the comparison is done directly on tagged int *) - Pccall(Primitive.simple ~name:"caml_int_compare" ~arity:2 ~alloc:false), - unboxed_compare "caml_float_compare" Unboxed_float, - Pccall(Primitive.simple ~name:"caml_string_compare" ~arity:2 - ~alloc:false), - Pccall(Primitive.simple ~name:"caml_bytes_compare" ~arity:2 - ~alloc:false), - unboxed_compare "caml_nativeint_compare" (Unboxed_integer Pnativeint), - unboxed_compare "caml_int32_compare" (Unboxed_integer Pint32), - unboxed_compare "caml_int64_compare" (Unboxed_integer Pint64), - false) -] - -let gen_array_kind = - if Config.flat_float_array then Pgenarray else Paddrarray - -let primitives_table = create_hashtable 57 [ - "%identity", Pidentity; - "%bytes_to_string", Pbytes_to_string; - "%bytes_of_string", Pbytes_of_string; - "%ignore", Pignore; - "%revapply", Prevapply; - "%apply", Pdirapply; - "%loc_LOC", Ploc Loc_LOC; - "%loc_FILE", Ploc Loc_FILE; - "%loc_LINE", Ploc Loc_LINE; - "%loc_POS", Ploc Loc_POS; - "%loc_MODULE", Ploc Loc_MODULE; - "%field0", Pfield 0; - "%field1", Pfield 1; - "%setfield0", Psetfield(0, Pointer, Assignment); - "%makeblock", Pmakeblock(0, Immutable, None); - "%makemutable", Pmakeblock(0, Mutable, None); - "%raise", Praise Raise_regular; - "%reraise", Praise Raise_reraise; - "%raise_notrace", Praise Raise_notrace; - "%sequand", Psequand; - "%sequor", Psequor; - "%boolnot", Pnot; - "%big_endian", Pctconst Big_endian; - "%backend_type", Pctconst Backend_type; - "%word_size", Pctconst Word_size; - "%int_size", Pctconst Int_size; - "%max_wosize", Pctconst Max_wosize; - "%ostype_unix", Pctconst Ostype_unix; - "%ostype_win32", Pctconst Ostype_win32; - "%ostype_cygwin", Pctconst Ostype_cygwin; - "%negint", Pnegint; - "%succint", Poffsetint 1; - "%predint", Poffsetint(-1); - "%addint", Paddint; - "%subint", Psubint; - "%mulint", Pmulint; - "%divint", Pdivint Safe; - "%modint", Pmodint Safe; - "%andint", Pandint; - "%orint", Porint; - "%xorint", Pxorint; - "%lslint", Plslint; - "%lsrint", Plsrint; - "%asrint", Pasrint; - "%eq", Pintcomp Ceq; - "%noteq", Pintcomp Cneq; - "%ltint", Pintcomp Clt; - "%leint", Pintcomp Cle; - "%gtint", Pintcomp Cgt; - "%geint", Pintcomp Cge; - "%incr", Poffsetref(1); - "%decr", Poffsetref(-1); - "%intoffloat", Pintoffloat; - "%floatofint", Pfloatofint; - "%negfloat", Pnegfloat; - "%absfloat", Pabsfloat; - "%addfloat", Paddfloat; - "%subfloat", Psubfloat; - "%mulfloat", Pmulfloat; - "%divfloat", Pdivfloat; - "%eqfloat", Pfloatcomp Ceq; - "%noteqfloat", Pfloatcomp Cneq; - "%ltfloat", Pfloatcomp Clt; - "%lefloat", Pfloatcomp Cle; - "%gtfloat", Pfloatcomp Cgt; - "%gefloat", Pfloatcomp Cge; - "%string_length", Pstringlength; - "%string_safe_get", Pstringrefs; - "%string_safe_set", Pbytessets; - "%string_unsafe_get", Pstringrefu; - "%string_unsafe_set", Pbytessetu; - "%bytes_length", Pbyteslength; - "%bytes_safe_get", Pbytesrefs; - "%bytes_safe_set", Pbytessets; - "%bytes_unsafe_get", Pbytesrefu; - "%bytes_unsafe_set", Pbytessetu; - "%array_length", Parraylength gen_array_kind; - "%array_safe_get", Parrayrefs gen_array_kind; - "%array_safe_set", Parraysets gen_array_kind; - "%array_unsafe_get", Parrayrefu gen_array_kind; - "%array_unsafe_set", Parraysetu gen_array_kind; - "%obj_size", Parraylength gen_array_kind; - "%obj_field", Parrayrefu gen_array_kind; - "%obj_set_field", Parraysetu gen_array_kind; - "%floatarray_length", Parraylength Pfloatarray; - "%floatarray_safe_get", Parrayrefs Pfloatarray; - "%floatarray_safe_set", Parraysets Pfloatarray; - "%floatarray_unsafe_get", Parrayrefu Pfloatarray; - "%floatarray_unsafe_set", Parraysetu Pfloatarray; - "%obj_is_int", Pisint; - "%lazy_force", Plazyforce; - "%nativeint_of_int", Pbintofint Pnativeint; - "%nativeint_to_int", Pintofbint Pnativeint; - "%nativeint_neg", Pnegbint Pnativeint; - "%nativeint_add", Paddbint Pnativeint; - "%nativeint_sub", Psubbint Pnativeint; - "%nativeint_mul", Pmulbint Pnativeint; - "%nativeint_div", Pdivbint { size = Pnativeint; is_safe = Safe }; - "%nativeint_mod", Pmodbint { size = Pnativeint; is_safe = Safe }; - "%nativeint_and", Pandbint Pnativeint; - "%nativeint_or", Porbint Pnativeint; - "%nativeint_xor", Pxorbint Pnativeint; - "%nativeint_lsl", Plslbint Pnativeint; - "%nativeint_lsr", Plsrbint Pnativeint; - "%nativeint_asr", Pasrbint Pnativeint; - "%int32_of_int", Pbintofint Pint32; - "%int32_to_int", Pintofbint Pint32; - "%int32_neg", Pnegbint Pint32; - "%int32_add", Paddbint Pint32; - "%int32_sub", Psubbint Pint32; - "%int32_mul", Pmulbint Pint32; - "%int32_div", Pdivbint { size = Pint32; is_safe = Safe }; - "%int32_mod", Pmodbint { size = Pint32; is_safe = Safe }; - "%int32_and", Pandbint Pint32; - "%int32_or", Porbint Pint32; - "%int32_xor", Pxorbint Pint32; - "%int32_lsl", Plslbint Pint32; - "%int32_lsr", Plsrbint Pint32; - "%int32_asr", Pasrbint Pint32; - "%int64_of_int", Pbintofint Pint64; - "%int64_to_int", Pintofbint Pint64; - "%int64_neg", Pnegbint Pint64; - "%int64_add", Paddbint Pint64; - "%int64_sub", Psubbint Pint64; - "%int64_mul", Pmulbint Pint64; - "%int64_div", Pdivbint { size = Pint64; is_safe = Safe }; - "%int64_mod", Pmodbint { size = Pint64; is_safe = Safe }; - "%int64_and", Pandbint Pint64; - "%int64_or", Porbint Pint64; - "%int64_xor", Pxorbint Pint64; - "%int64_lsl", Plslbint Pint64; - "%int64_lsr", Plsrbint Pint64; - "%int64_asr", Pasrbint Pint64; - "%nativeint_of_int32", Pcvtbint(Pint32, Pnativeint); - "%nativeint_to_int32", Pcvtbint(Pnativeint, Pint32); - "%int64_of_int32", Pcvtbint(Pint32, Pint64); - "%int64_to_int32", Pcvtbint(Pint64, Pint32); - "%int64_of_nativeint", Pcvtbint(Pnativeint, Pint64); - "%int64_to_nativeint", Pcvtbint(Pint64, Pnativeint); - "%caml_ba_ref_1", - Pbigarrayref(false, 1, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_ref_2", - Pbigarrayref(false, 2, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_ref_3", - Pbigarrayref(false, 3, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_set_1", - Pbigarrayset(false, 1, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_set_2", - Pbigarrayset(false, 2, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_set_3", - Pbigarrayset(false, 3, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_unsafe_ref_1", - Pbigarrayref(true, 1, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_unsafe_ref_2", - Pbigarrayref(true, 2, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_unsafe_ref_3", - Pbigarrayref(true, 3, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_unsafe_set_1", - Pbigarrayset(true, 1, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_unsafe_set_2", - Pbigarrayset(true, 2, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_unsafe_set_3", - Pbigarrayset(true, 3, Pbigarray_unknown, Pbigarray_unknown_layout); - "%caml_ba_dim_1", Pbigarraydim(1); - "%caml_ba_dim_2", Pbigarraydim(2); - "%caml_ba_dim_3", Pbigarraydim(3); - "%caml_string_get16", Pstring_load_16(false); - "%caml_string_get16u", Pstring_load_16(true); - "%caml_string_get32", Pstring_load_32(false); - "%caml_string_get32u", Pstring_load_32(true); - "%caml_string_get64", Pstring_load_64(false); - "%caml_string_get64u", Pstring_load_64(true); - "%caml_string_set16", Pstring_set_16(false); - "%caml_string_set16u", Pstring_set_16(true); - "%caml_string_set32", Pstring_set_32(false); - "%caml_string_set32u", Pstring_set_32(true); - "%caml_string_set64", Pstring_set_64(false); - "%caml_string_set64u", Pstring_set_64(true); - "%caml_bigstring_get16", Pbigstring_load_16(false); - "%caml_bigstring_get16u", Pbigstring_load_16(true); - "%caml_bigstring_get32", Pbigstring_load_32(false); - "%caml_bigstring_get32u", Pbigstring_load_32(true); - "%caml_bigstring_get64", Pbigstring_load_64(false); - "%caml_bigstring_get64u", Pbigstring_load_64(true); - "%caml_bigstring_set16", Pbigstring_set_16(false); - "%caml_bigstring_set16u", Pbigstring_set_16(true); - "%caml_bigstring_set32", Pbigstring_set_32(false); - "%caml_bigstring_set32u", Pbigstring_set_32(true); - "%caml_bigstring_set64", Pbigstring_set_64(false); - "%caml_bigstring_set64u", Pbigstring_set_64(true); - "%bswap16", Pbswap16; - "%bswap_int32", Pbbswap(Pint32); - "%bswap_int64", Pbbswap(Pint64); - "%bswap_native", Pbbswap(Pnativeint); - "%int_as_pointer", Pint_as_pointer; - "%opaque", Popaque; -] - -let find_primitive prim_name = - Hashtbl.find primitives_table prim_name - -let prim_restore_raw_backtrace = - Primitive.simple ~name:"caml_restore_raw_backtrace" ~arity:2 ~alloc:false - -let specialize_comparison table env ty = - let (gencomp, intcomp, floatcomp, stringcomp, bytescomp, - nativeintcomp, int32comp, int64comp, _) = table in - match () with - | () when is_base_type env ty Predef.path_int - || is_base_type env ty Predef.path_char - || (maybe_pointer_type env ty = Immediate) -> intcomp - | () when is_base_type env ty Predef.path_float -> floatcomp - | () when is_base_type env ty Predef.path_string -> stringcomp - | () when is_base_type env ty Predef.path_bytes -> bytescomp - | () when is_base_type env ty Predef.path_nativeint -> nativeintcomp - | () when is_base_type env ty Predef.path_int32 -> int32comp - | () when is_base_type env ty Predef.path_int64 -> int64comp - | () -> gencomp - -(* The following function computes the greatest lower bound in the - semilattice of array kinds: - gen - / \ - addr float - | - int - Note that the GLB is not guaranteed to exist, in which case we return - our first argument instead of raising a fatal error because, although - it cannot happen in a well-typed program, (ab)use of Obj.magic can - probably trigger it. -*) -let glb_array_type t1 t2 = - match t1, t2 with - | Pfloatarray, (Paddrarray | Pintarray) - | (Paddrarray | Pintarray), Pfloatarray -> t1 - - | Pgenarray, x | x, Pgenarray -> x - | Paddrarray, x | x, Paddrarray -> x - | Pintarray, Pintarray -> Pintarray - | Pfloatarray, Pfloatarray -> Pfloatarray - -(* Specialize a primitive from available type information, - raise Not_found if primitive is unknown *) - -let specialize_primitive p env ty ~has_constant_constructor = - try - let table = Hashtbl.find comparisons_table p.prim_name in - let (gencomp, intcomp, _, _, _, _, _, _, simplify_constant_constructor) = - table in - if has_constant_constructor && simplify_constant_constructor then - intcomp - else - match is_function_type env ty with - | Some (lhs,_rhs) -> specialize_comparison table env lhs - | None -> gencomp - with Not_found -> - let p = find_primitive p.prim_name in - (* Try strength reduction based on the type of the argument *) - let params = match is_function_type env ty with - | None -> [] - | Some (p1, rhs) -> match is_function_type env rhs with - | None -> [p1] - | Some (p2, _) -> [p1;p2] - in - match (p, params) with - (Psetfield(n, _, init), [_p1; p2]) -> - Psetfield(n, maybe_pointer_type env p2, init) - | (Parraylength t, [p]) -> - Parraylength(glb_array_type t (array_type_kind env p)) - | (Parrayrefu t, p1 :: _) -> - Parrayrefu(glb_array_type t (array_type_kind env p1)) - | (Parraysetu t, p1 :: _) -> - Parraysetu(glb_array_type t (array_type_kind env p1)) - | (Parrayrefs t, p1 :: _) -> - Parrayrefs(glb_array_type t (array_type_kind env p1)) - | (Parraysets t, p1 :: _) -> - Parraysets(glb_array_type t (array_type_kind env p1)) - | (Pbigarrayref(unsafe, n, Pbigarray_unknown, Pbigarray_unknown_layout), - p1 :: _) -> - let (k, l) = bigarray_type_kind_and_layout env p1 in - Pbigarrayref(unsafe, n, k, l) - | (Pbigarrayset(unsafe, n, Pbigarray_unknown, Pbigarray_unknown_layout), - p1 :: _) -> - let (k, l) = bigarray_type_kind_and_layout env p1 in - Pbigarrayset(unsafe, n, k, l) - | (Pmakeblock(tag, mut, None), fields) -> - let shape = List.map (Typeopt.value_kind env) fields in - Pmakeblock(tag, mut, Some shape) - | _ -> p - -(* Eta-expand a primitive *) - -let used_primitives = Hashtbl.create 7 -let add_used_primitive loc env path = - match path with - Some (Path.Pdot _ as path) -> - let path = Env.normalize_path (Some loc) env path in - let unit = Path.head path in - if Ident.global unit && not (Hashtbl.mem used_primitives path) - then Hashtbl.add used_primitives path loc - | _ -> () - -let transl_primitive loc p env ty path = - let prim = - try specialize_primitive p env ty ~has_constant_constructor:false - with Not_found -> - add_used_primitive loc env path; - Pccall p - in - match prim with - | Plazyforce -> - let parm = Ident.create "prim" in - Lfunction{kind = Curried; params = [parm]; - body = Matching.inline_lazy_force (Lvar parm) Location.none; - loc = loc; - attr = default_stub_attribute } - | Ploc kind -> - let lam = lam_of_loc kind loc in - begin match p.prim_arity with - | 0 -> lam - | 1 -> (* TODO: we should issue a warning ? *) - let param = Ident.create "prim" in - Lfunction{kind = Curried; params = [param]; - attr = default_stub_attribute; - loc = loc; - body = Lprim(Pmakeblock(0, Immutable, None), - [lam; Lvar param], loc)} - | _ -> assert false - end - | _ -> - let rec make_params n = - if n <= 0 then [] else Ident.create "prim" :: make_params (n-1) in - let params = make_params p.prim_arity in - Lfunction{ kind = Curried; params; - attr = default_stub_attribute; - loc = loc; - body = Lprim(prim, List.map (fun id -> Lvar id) params, loc) } - -let transl_primitive_application loc prim env ty path args = - let prim_name = prim.prim_name in - try - let has_constant_constructor = match args with - [_; {exp_desc = Texp_construct(_, {cstr_tag = Cstr_constant _}, _)}] - | [{exp_desc = Texp_construct(_, {cstr_tag = Cstr_constant _}, _)}; _] - | [_; {exp_desc = Texp_variant(_, None)}] - | [{exp_desc = Texp_variant(_, None)}; _] -> true - | _ -> false - in - specialize_primitive prim env ty ~has_constant_constructor - with Not_found -> - if String.length prim_name > 0 && prim_name.[0] = '%' then - raise(Error(loc, Unknown_builtin_primitive prim_name)); - add_used_primitive loc env path; - Pccall prim - (* To propagate structured constants *) exception Not_constant @@ -596,23 +143,9 @@ let rec push_defaults loc bindings cases partial = (* Insertion of debugging events *) -let event_before exp lam = match lam with -| Lstaticraise (_,_) -> lam -| _ -> - if !Clflags.debug && not !Clflags.native_code - then Levent(lam, {lev_loc = exp.exp_loc; - lev_kind = Lev_before; - lev_repr = None; - lev_env = Env.summary exp.exp_env}) - else lam +let event_before = Translprim.event_before -let event_after exp lam = - if !Clflags.debug && not !Clflags.native_code - then Levent(lam, {lev_loc = exp.exp_loc; - lev_kind = Lev_after exp.exp_type; - lev_repr = None; - lev_env = Env.summary exp.exp_env}) - else lam +let event_after = Translprim.event_after let event_function exp lam = if !Clflags.debug && not !Clflags.native_code then @@ -626,14 +159,6 @@ let event_function exp lam = else lam None -let primitive_is_ccall = function - (* Determine if a primitive is a Pccall or will be turned later into - a C function call that may raise an exception *) - | Pccall _ | Pstringrefs | Pbytesrefs | Pbytessets | Parrayrefs _ | - Parraysets _ | Pbigarrayref _ | Pbigarrayset _ | Pduprecord _ | Pdirapply | - Prevapply -> true - | _ -> false - (* Assertions *) let assert_failed exp = @@ -655,8 +180,6 @@ let rec cut n l = (* Translation of expressions *) -let try_ids = Hashtbl.create 8 - let rec transl_exp e = List.iter (Translattribute.check_attribute e) e.exp_attributes; let eval_once = @@ -670,25 +193,8 @@ let rec transl_exp e = and transl_exp0 e = match e.exp_desc with - Texp_ident(path, _, {val_kind = Val_prim p}) -> - let public_send = p.prim_name = "%send" in - if public_send || p.prim_name = "%sendself" then - let kind = if public_send then Public else Self in - let obj = Ident.create "obj" and meth = Ident.create "meth" in - Lfunction{kind = Curried; params = [obj; meth]; - attr = default_stub_attribute; - loc = e.exp_loc; - body = Lsend(kind, Lvar meth, Lvar obj, [], e.exp_loc)} - else if p.prim_name = "%sendcache" then - let obj = Ident.create "obj" and meth = Ident.create "meth" in - let cache = Ident.create "cache" and pos = Ident.create "pos" in - Lfunction{kind = Curried; params = [obj; meth; cache; pos]; - attr = default_stub_attribute; - loc = e.exp_loc; - body = Lsend(Cached, Lvar meth, Lvar obj, - [Lvar cache; Lvar pos], e.exp_loc)} - else - transl_primitive e.exp_loc p e.exp_env e.exp_type (Some path) + | Texp_ident(path, _, {val_kind = Val_prim p}) -> + Translprim.transl_primitive e.exp_loc p e.exp_env e.exp_type (Some path) | Texp_ident(_, _, {val_kind = Val_anc _}) -> raise(Error(e.exp_loc, Free_super_var)) | Texp_ident(path, _, {val_kind = Val_reg | Val_self _}) -> @@ -718,89 +224,32 @@ and transl_exp0 e = exp_type = prim_type } as funct, oargs) when List.length oargs >= p.prim_arity && List.for_all (fun (_, arg) -> arg <> None) oargs -> - let args, args' = cut p.prim_arity oargs in - let wrap f = - if args' = [] - then event_after e f - else - let should_be_tailcall, funct = - Translattribute.get_tailcall_attribute funct - in - let inlined, funct = - Translattribute.get_and_remove_inlined_attribute funct - in - let specialised, funct = - Translattribute.get_and_remove_specialised_attribute funct - in - let e = { e with exp_desc = Texp_apply(funct, oargs) } in - event_after e - (transl_apply ~should_be_tailcall ~inlined ~specialised - f args' e.exp_loc) + let argl, extra_args = cut p.prim_arity oargs in + let arg_exps = + List.map (function _, Some x -> x | _ -> assert false) argl in - let wrap0 f = - if args' = [] then f else wrap f in - let args = - List.map (function _, Some x -> x | _ -> assert false) args in - let argl = transl_list args in - let public_send = p.prim_name = "%send" - || not !Clflags.native_code && p.prim_name = "%sendcache"in - if public_send || p.prim_name = "%sendself" then - let kind = if public_send then Public else Self in - let obj = List.hd argl in - wrap (Lsend (kind, List.nth argl 1, obj, [], e.exp_loc)) - else if p.prim_name = "%sendcache" then - match argl with [obj; meth; cache; pos] -> - wrap (Lsend(Cached, meth, obj, [cache; pos], e.exp_loc)) - | _ -> assert false - else if p.prim_name = "%raise_with_backtrace" then begin - let texn1 = List.hd args (* Should not fail by typing *) in - let texn2,bt = match argl with - | [a;b] -> a,b - | _ -> assert false (* idem *) - in - let vexn = Ident.create "exn" in - Llet(Strict, Pgenval, vexn, texn2, - event_before e begin - Lsequence( - wrap (Lprim (Pccall prim_restore_raw_backtrace, - [Lvar vexn;bt], - e.exp_loc)), - wrap0 (Lprim(Praise Raise_reraise, - [event_after texn1 (Lvar vexn)], - e.exp_loc)) - ) - end - ) - end + let args = transl_list arg_exps in + let prim_exp = if extra_args = [] then Some e else None in + let lam = + Translprim.transl_primitive_application + e.exp_loc p e.exp_env prim_type path + prim_exp args arg_exps + in + if extra_args = [] then lam else begin - let prim = transl_primitive_application - e.exp_loc p e.exp_env prim_type (Some path) args in - match (prim, args) with - (Praise k, [arg1]) -> - let targ = List.hd argl in - let k = - match k, targ with - | Raise_regular, Lvar id - when Hashtbl.mem try_ids id -> - Raise_reraise - | _ -> - k - in - wrap0 (Lprim(Praise k, [event_after arg1 targ], e.exp_loc)) - | (Ploc kind, []) -> - lam_of_loc kind e.exp_loc - | (Ploc kind, [arg1]) -> - let lam = lam_of_loc kind arg1.exp_loc in - Lprim(Pmakeblock(0, Immutable, None), lam :: argl, e.exp_loc) - | (Ploc _, _) -> assert false - | (_, _) -> - begin match (prim, argl) with - | (Plazyforce, [a]) -> - wrap (Matching.inline_lazy_force a e.exp_loc) - | (Plazyforce, _) -> assert false - |_ -> let p = Lprim(prim, argl, e.exp_loc) in - if primitive_is_ccall prim then wrap p else wrap0 p - end + let should_be_tailcall, funct = + Translattribute.get_tailcall_attribute funct + in + let inlined, funct = + Translattribute.get_and_remove_inlined_attribute funct + in + let specialised, funct = + Translattribute.get_and_remove_specialised_attribute funct + in + let e = { e with exp_desc = Texp_apply(funct, oargs) } in + event_after e + (transl_apply ~should_be_tailcall ~inlined ~specialised + lam extra_args e.exp_loc) end | Texp_apply(funct, oargs) -> let should_be_tailcall, funct = @@ -1019,11 +468,13 @@ and transl_exp0 e = do *) begin match Typeopt.classify_lazy_argument e with | `Constant_or_function -> - (* a constant expr of type <> float gets compiled as itself *) + (* A constant expr (of type <> float if [Config.flat_float_array] is + true) gets compiled as itself. *) transl_exp e - | `Float -> + | `Float_that_cannot_be_shortcut -> (* We don't need to wrap with Popaque: this forward - block will never be shortcutted since it points to a float. *) + block will never be shortcutted since it points to a float + and Config.flat_float_array is true. *) Lprim(Pmakeblock(Obj.forward_tag, Immutable, None), [transl_exp e], e.exp_loc) | `Identifier `Forward_value -> @@ -1086,15 +537,19 @@ and transl_cases cases = List.map transl_case cases and transl_case_try {c_lhs; c_guard; c_rhs} = - match c_lhs.pat_desc with - | Tpat_var (id, _) - | Tpat_alias (_, id, _) -> - Hashtbl.replace try_ids id (); - Misc.try_finally - (fun () -> c_lhs, transl_guard c_guard c_rhs) - (fun () -> Hashtbl.remove try_ids id) - | _ -> - c_lhs, transl_guard c_guard c_rhs + let rec iter_exn_names f pat = + match pat.pat_desc with + | Tpat_var (id, _) -> f id + | Tpat_alias (p, id, _) -> + f id; + iter_exn_names f p + | _ -> () + in + iter_exn_names Translprim.add_exception_ident c_lhs; + Misc.try_finally + (fun () -> c_lhs, transl_guard c_guard c_rhs) + (fun () -> + iter_exn_names Translprim.remove_exception_ident c_lhs) and transl_cases_try cases = let cases = @@ -1203,12 +658,18 @@ and transl_function loc untuplify_fn repr partial param cases = Matching.for_function loc repr (Lvar param) (transl_cases cases) partial) -and transl_let rec_flag pat_expr_list body = +(* + Notice: transl_let consumes (ie compiles) its pat_expr_list argument, + and returns a function that will take the body of the lambda-let construct. + This complication allows choosing any compilation order for the + bindings and body of let constructs. +*) +and transl_let rec_flag pat_expr_list = match rec_flag with Nonrecursive -> let rec transl = function [] -> - body + fun body -> body | {vb_pat=pat; vb_expr=expr; vb_attributes=attr; vb_loc} :: rem -> let lam = transl_exp expr in let lam = @@ -1217,7 +678,8 @@ and transl_let rec_flag pat_expr_list body = let lam = Translattribute.add_specialise_attribute lam vb_loc attr in - Matching.for_let pat.pat_loc lam pat (transl rem) + let mk_body = transl rem in + fun body -> Matching.for_let pat.pat_loc lam pat (mk_body body) in transl pat_expr_list | Recursive -> let idlist = @@ -1238,7 +700,8 @@ and transl_let rec_flag pat_expr_list body = vb_attributes in (id, lam) in - Lletrec(List.map2 transl_case pat_expr_list idlist, body) + let lam_bds = List.map2 transl_case pat_expr_list idlist in + fun body -> Lletrec(lam_bds, body) and transl_setinstvar loc self var expr = Lprim(Psetfield_computed (maybe_pointer expr, Assignment), @@ -1347,7 +810,7 @@ and transl_match e arg pat_expr_list exn_pat_expr_list partial = and cases = transl_cases pat_expr_list and exn_cases = transl_cases_try exn_pat_expr_list in let static_catch body val_ids handler = - let static_exception_id = next_negative_raise_count () in + let static_exception_id = next_raise_count () in Lstaticcatch (Ltrywith (Lstaticraise (static_exception_id, body), id, Matching.for_trywith (Lvar id) exn_cases), @@ -1391,8 +854,6 @@ let report_error ppf = function | Free_super_var -> fprintf ppf "Ancestor names can only be used to select inherited methods" - | Unknown_builtin_primitive prim_name -> - fprintf ppf "Unknown builtin primitive \"%s\"" prim_name | Unreachable_reached -> fprintf ppf "Unreachable expression was reached" diff --git a/bytecomp/translcore.mli b/bytecomp/translcore.mli index 75c26f8d..e27eb20a 100644 --- a/bytecomp/translcore.mli +++ b/bytecomp/translcore.mli @@ -27,17 +27,12 @@ val transl_apply: ?should_be_tailcall:bool -> lambda -> (arg_label * expression option) list -> Location.t -> lambda val transl_let: rec_flag -> value_binding list -> lambda -> lambda -val transl_primitive: Location.t -> Primitive.description -> Env.t - -> Types.type_expr -> Path.t option -> lambda val transl_extension_constructor: Env.t -> Path.t option -> extension_constructor -> lambda -val used_primitives: (Path.t, Location.t) Hashtbl.t - type error = Free_super_var - | Unknown_builtin_primitive of string | Unreachable_reached exception Error of Location.t * error diff --git a/bytecomp/translmod.ml b/bytecomp/translmod.ml index 31557eed..63cdf699 100644 --- a/bytecomp/translmod.ml +++ b/bytecomp/translmod.ml @@ -28,7 +28,7 @@ open Translcore open Translclass type error = - Circular_dependency of Ident.t + Circular_dependency of Ident.t list | Conflicting_inline_attributes exception Error of Location.t * error @@ -78,7 +78,7 @@ let rec apply_coercion loc strict restr arg = let carg = apply_coercion loc Alias cc_arg (Lvar param) in apply_coercion_result loc strict arg [param] [carg] cc_res | Tcoerce_primitive { pc_loc; pc_desc; pc_env; pc_type; } -> - transl_primitive pc_loc pc_desc pc_env pc_type None + Translprim.transl_primitive pc_loc pc_desc pc_env pc_type None | Tcoerce_alias (path, cc) -> name_lambda strict arg (fun _ -> apply_coercion loc Alias cc (transl_normal_path path)) @@ -112,19 +112,19 @@ and apply_coercion_result loc strict funct params args cc_res = and wrap_id_pos_list loc id_pos_list get_field lam = let fv = free_variables lam in (*Format.eprintf "%a@." Printlambda.lambda lam; - IdentSet.iter (fun id -> Format.eprintf "%a " Ident.print id) fv; + Ident.Set.iter (fun id -> Format.eprintf "%a " Ident.print id) fv; Format.eprintf "@.";*) let (lam,s) = List.fold_left (fun (lam,s) (id',pos,c) -> - if IdentSet.mem id' fv then + if Ident.Set.mem id' fv then let id'' = Ident.create (Ident.name id') in (Llet(Alias, Pgenval, id'', apply_coercion loc Alias c (get_field pos),lam), - Ident.add id' (Lvar id'') s) + Ident.Map.add id' (Lvar id'') s) else (lam,s)) - (lam, Ident.empty) id_pos_list + (lam, Ident.Map.empty) id_pos_list in - if s == Ident.empty then lam else subst_lambda s lam + if s == Ident.Map.empty then lam else Lambda.subst s lam (* Compose two coercions @@ -199,9 +199,10 @@ let undefined_location loc = let init_shape modl = let rec init_shape_mod env mty = match Mtype.scrape env mty with - Mty_ident _ -> + Mty_ident _ + | Mty_alias (Mta_present, _) -> raise Not_found - | Mty_alias _ -> + | Mty_alias (Mta_absent, _) -> Const_block (1, [Const_pointer 0]) | Mty_signature sg -> Const_block(0, [Const_block(0, init_shape_struct env sg)]) @@ -247,7 +248,20 @@ let init_shape modl = (* Reorder bindings to honor dependencies. *) -type binding_status = Undefined | Inprogress | Defined +type binding_status = + | Undefined + | Inprogress of int option (** parent node *) + | Defined + +let extract_unsafe_cycle id status cycle_start = + let rec collect stop l i = match status.(i) with + | Inprogress None | Undefined | Defined -> assert false + | Inprogress Some i when i = stop -> id.(i) :: l + | Inprogress Some i -> collect stop (id.(i)::l) i in + collect cycle_start [id.(cycle_start)] cycle_start +(* This yields [cycle_start; ...; cycle_start]. The start of the cycle + is duplicated to make the cycle more visible in the corresponding error + message. *) let reorder_rec_bindings bindings = let id = Array.of_list (List.map (fun (id,_,_,_) -> id) bindings) @@ -258,23 +272,26 @@ let reorder_rec_bindings bindings = let num_bindings = Array.length id in let status = Array.make num_bindings Undefined in let res = ref [] in - let rec emit_binding i = + let rec emit_binding parent i = match status.(i) with Defined -> () - | Inprogress -> raise(Error(loc.(i), Circular_dependency id.(i))) + | Inprogress _ -> + status.(i) <- Inprogress parent; + let cycle = extract_unsafe_cycle id status i in + raise(Error(loc.(i), Circular_dependency cycle)) | Undefined -> if init.(i) = None then begin - status.(i) <- Inprogress; + status.(i) <- Inprogress parent; for j = 0 to num_bindings - 1 do - if IdentSet.mem id.(j) fv.(i) then emit_binding j + if Ident.Set.mem id.(j) fv.(i) then emit_binding (Some i) j done end; res := (id.(i), init.(i), rhs.(i)) :: !res; status.(i) <- Defined in for i = 0 to num_bindings - 1 do match status.(i) with - Undefined -> emit_binding i - | Inprogress -> assert false + Undefined -> emit_binding None i + | Inprogress _ -> assert false | Defined -> () done; List.rev !res @@ -456,6 +473,9 @@ and transl_module cc rootpath mexp = and transl_struct loc fields cc rootpath str = transl_structure loc fields cc rootpath str.str_final_env str.str_items +(* The function transl_structure is called by the bytecode compiler. + Some effort is made to compile in top to bottom order, in order to display + warning by increasing locations. *) and transl_structure loc fields cc rootpath final_env = function [] -> let body, size = @@ -472,19 +492,19 @@ and transl_structure loc fields cc rootpath final_env = function Format.eprintf "@]@.";*) let v = Array.of_list (List.rev fields) in let get_field pos = Lvar v.(pos) - and ids = List.fold_right IdentSet.add fields IdentSet.empty in + and ids = List.fold_right Ident.Set.add fields Ident.Set.empty in let lam = Lprim(Pmakeblock(0, Immutable, None), List.map (fun (pos, cc) -> match cc with Tcoerce_primitive p -> - transl_primitive p.pc_loc + Translprim.transl_primitive p.pc_loc p.pc_desc p.pc_env p.pc_type None | _ -> apply_coercion loc Strict cc (get_field pos)) pos_cc_list, loc) and id_pos_list = - List.filter (fun (id,_,_) -> not (IdentSet.mem id ids)) + List.filter (fun (id,_,_) -> not (Ident.Set.mem id ids)) id_pos_list in wrap_id_pos_list loc id_pos_list get_field lam, @@ -512,11 +532,14 @@ and transl_structure loc fields cc rootpath final_env = function in Lsequence(transl_exp expr, body), size | Tstr_value(rec_flag, pat_expr_list) -> + (* Translate bindings first *) + let mk_lam_let = transl_let rec_flag pat_expr_list in let ext_fields = rev_let_bound_idents pat_expr_list @ fields in + (* Then, translate remainder of struct *) let body, size = transl_structure loc ext_fields cc rootpath final_env rem in - transl_let rec_flag pat_expr_list body, size + mk_lam_let body, size | Tstr_primitive descr -> record_primitive descr.val_val; transl_structure loc fields cc rootpath final_env rem @@ -540,9 +563,7 @@ and transl_structure loc fields cc rootpath final_env = function size | Tstr_module mb -> let id = mb.mb_id in - let body, size = - transl_structure loc (id :: fields) cc rootpath final_env rem - in + (* Translate module first *) let module_body = transl_module Tcoerce_none (field_path rootpath id) mb.mb_expr in @@ -550,6 +571,10 @@ and transl_structure loc fields cc rootpath final_env = function Translattribute.add_inline_attribute module_body mb.mb_loc mb.mb_attributes in + (* Translate remainder second *) + let body, size = + transl_structure loc (id :: fields) cc rootpath final_env rem + in let module_body = Levent (module_body, { lev_loc = mb.mb_loc; @@ -632,7 +657,7 @@ let _ = let scan_used_globals lam = let globals = ref Ident.Set.empty in let rec scan lam = - Lambda.iter scan lam; + Lambda.iter_head_constructor scan lam; match lam with Lprim ((Pgetglobal id | Psetglobal id), _, _) -> globals := Ident.Set.add id !globals @@ -649,15 +674,16 @@ let required_globals ~flambda body = Ident.Set.add id req in let required = - Hashtbl.fold - (fun path _ -> add_global (Path.head path)) used_primitives + List.fold_left + (fun acc path -> add_global (Path.head path) acc) (if flambda then globals else Ident.Set.empty) + (Translprim.get_used_primitives ()) in let required = List.fold_right add_global (Env.get_required_globals ()) required in Env.reset_required_globals (); - Hashtbl.clear used_primitives; + Translprim.clear_used_primitives (); required (* Compile an implementation *) @@ -665,7 +691,7 @@ let required_globals ~flambda body = let transl_implementation_flambda module_name (str, cc) = reset_labels (); primitive_declarations := []; - Hashtbl.clear used_primitives; + Translprim.clear_used_primitives (); let module_id = Ident.create_persistent module_name in let body, size = Translobj.transl_label_init @@ -792,12 +818,12 @@ and all_idents = function "map" is a table from defined idents to (pos in global block, coercion). "prim" is a list of (pos in global block, primitive declaration). *) -let transl_store_subst = ref Ident.empty +let transl_store_subst = ref Ident.Map.empty (** In the native toplevel, this reference is threaded through successive calls of transl_store_structure *) let nat_toplevel_name id = - try match Ident.find_same id !transl_store_subst with + try match Ident.Map.find id !transl_store_subst with | Lprim(Pfield pos, [Lprim(Pgetglobal glob, [], _)], _) -> (glob,pos) | _ -> raise Not_found with Not_found -> @@ -808,7 +834,7 @@ let field_of_str loc str = fun (pos, cc) -> match cc with | Tcoerce_primitive { pc_loc; pc_desc; pc_env; pc_type; } -> - transl_primitive pc_loc pc_desc pc_env pc_type None + Translprim.transl_primitive pc_loc pc_desc pc_env pc_type None | _ -> apply_coercion loc Strict cc (Lvar ids.(pos)) @@ -820,14 +846,14 @@ let transl_store_structure glob map prims str = | item :: rem -> match item.str_desc with | Tstr_eval (expr, _attrs) -> - Lsequence(subst_lambda subst (transl_exp expr), + Lsequence(Lambda.subst subst (transl_exp expr), transl_store rootpath subst rem) | Tstr_value(rec_flag, pat_expr_list) -> let ids = let_bound_idents pat_expr_list in let lam = transl_let rec_flag pat_expr_list (store_idents Location.none ids) in - Lsequence(subst_lambda subst lam, + Lsequence(Lambda.subst subst lam, transl_store rootpath (add_idents false ids subst) rem) | Tstr_primitive descr -> record_primitive descr.val_val; @@ -842,13 +868,13 @@ let transl_store_structure glob map prims str = transl_type_extension item.str_env rootpath tyext (store_idents Location.none ids) in - Lsequence(subst_lambda subst lam, + Lsequence(Lambda.subst subst lam, transl_store rootpath (add_idents false ids subst) rem) | Tstr_exception ext -> let id = ext.ext_id in let path = field_path rootpath id in let lam = transl_extension_constructor item.str_env path ext in - Lsequence(Llet(Strict, Pgenval, id, subst_lambda subst lam, + Lsequence(Llet(Strict, Pgenval, id, Lambda.subst subst lam, store_ident ext.ext_loc id), transl_store rootpath (add_ident false id subst) rem) | Tstr_module{mb_id=id;mb_loc=loc; @@ -863,7 +889,7 @@ let transl_store_structure glob map prims str = let subst = !transl_store_subst in Lsequence(lam, Llet(Strict, Pgenval, id, - subst_lambda subst + Lambda.subst subst (Lprim(Pmakeblock(0, Immutable, None), List.map (fun id -> Lvar id) (defined_idents str.str_items), loc)), @@ -891,7 +917,7 @@ let transl_store_structure glob map prims str = let field = field_of_str loc str in Lsequence(lam, Llet(Strict, Pgenval, id, - subst_lambda subst + Lambda.subst subst (Lprim(Pmakeblock(0, Immutable, None), List.map field map, loc)), Lsequence(store_ident loc id, @@ -910,14 +936,14 @@ let transl_store_structure glob map prims str = the compilation unit (add_ident true returns subst unchanged). If not, we can use the value from the global (add_ident true adds id -> Pgetglobal... to subst). *) - Llet(Strict, Pgenval, id, subst_lambda subst lam, + Llet(Strict, Pgenval, id, Lambda.subst subst lam, Lsequence(store_ident loc id, transl_store rootpath (add_ident true id subst) rem)) | Tstr_recmodule bindings -> let ids = List.map (fun mb -> mb.mb_id) bindings in compile_recmodule (fun id modl _loc -> - subst_lambda subst + Lambda.subst subst (transl_module Tcoerce_none (field_path rootpath id) modl)) bindings @@ -928,7 +954,7 @@ let transl_store_structure glob map prims str = let lam = Lletrec(class_bindings, store_idents Location.none ids) in - Lsequence(subst_lambda subst lam, + Lsequence(Lambda.subst subst lam, transl_store rootpath (add_idents false ids subst) rem) | Tstr_include{ @@ -959,7 +985,7 @@ let transl_store_structure glob map prims str = | [], [] -> transl_store rootpath (add_idents true ids0 subst) rem | id :: ids, arg :: args -> - Llet(Alias, Pgenval, id, subst_lambda subst (field arg), + Llet(Alias, Pgenval, id, Lambda.subst subst (field arg), Lsequence(store_ident loc id, loop ids args)) | _ -> assert false @@ -980,7 +1006,7 @@ let transl_store_structure glob map prims str = store_idents (pos + 1) idl)) in Llet(Strict, Pgenval, mid, - subst_lambda subst (transl_module Tcoerce_none None modl), + Lambda.subst subst (transl_module Tcoerce_none None modl), store_idents 0 ids) | Tstr_modtype _ | Tstr_open _ @@ -1006,7 +1032,7 @@ let transl_store_structure glob map prims str = let (pos, cc) = Ident.find_same id map in match cc with Tcoerce_none -> - Ident.add id + Ident.Map.add id (Lprim(Pfield pos, [Lprim(Pgetglobal glob, [], Location.none)], Location.none)) @@ -1022,7 +1048,7 @@ let transl_store_structure glob map prims str = and store_primitive (pos, prim) cont = Lsequence(Lprim(Psetfield(pos, Pointer, Root_initialization), [Lprim(Pgetglobal glob, [], Location.none); - transl_primitive Location.none + Translprim.transl_primitive Location.none prim.pc_desc prim.pc_env prim.pc_type None], Location.none), cont) @@ -1076,14 +1102,14 @@ let build_ident_map restr idlist more_ids = let transl_store_gen module_name ({ str_items = str }, restr) topl = reset_labels (); primitive_declarations := []; - Hashtbl.clear used_primitives; + Translprim.clear_used_primitives (); let module_id = Ident.create_persistent module_name in let (map, prims, size) = build_ident_map restr (defined_idents str) (more_idents str) in let f = function | [ { str_desc = Tstr_eval (expr, _attrs) } ] when topl -> assert (size = 0); - subst_lambda !transl_store_subst (transl_exp expr) + Lambda.subst !transl_store_subst (transl_exp expr) | str -> transl_store_structure module_id map prims str in transl_store_label_init module_id size f str (*size, transl_label_init (transl_store_structure module_id map prims str)*) @@ -1093,7 +1119,7 @@ let transl_store_phrases module_name str = let transl_store_implementation module_name (str, restr) = let s = !transl_store_subst in - transl_store_subst := Ident.empty; + transl_store_subst := Ident.Map.empty; let (i, code) = transl_store_gen module_name (str, restr) false in transl_store_subst := s; { Lambda.main_module_block_size = i; @@ -1143,7 +1169,7 @@ let toploop_setvalue id lam = let toploop_setvalue_id id = toploop_setvalue id (Lvar id) let close_toplevel_term (lam, ()) = - IdentSet.fold (fun id l -> Llet(Strict, Pgenval, id, + Ident.Set.fold (fun id l -> Llet(Strict, Pgenval, id, toploop_getvalue id, l)) (free_variables lam) lam @@ -1219,7 +1245,7 @@ let transl_toplevel_item_and_close itm = let transl_toplevel_definition str = reset_labels (); - Hashtbl.clear used_primitives; + Translprim.clear_used_primitives (); make_sequence transl_toplevel_item_and_close str.str_items (* Compile the initialization code for a packed library *) @@ -1315,12 +1341,18 @@ let transl_store_package component_names target_name coercion = open Format +let print_cycle ppf = + Format.pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "@ -> ") + Printtyp.ident ppf + let report_error ppf = function - Circular_dependency id -> + Circular_dependency cycle -> + let[@manual.ref "s-recursive-modules"] chapter, section = 8, 4 in fprintf ppf - "@[Cannot safely evaluate the definition@ \ - of the recursively-defined module %a@]" - Printtyp.ident id + "@[Cannot safely evaluate the definition of the following cycle@ \ + of recursively-defined modules:@ %a.@ \ + There are no safe modules in this cycle@ (see manual section %d.%d)@]" + print_cycle cycle chapter section | Conflicting_inline_attributes -> fprintf ppf "@[Conflicting ``inline'' attributes@]" @@ -1336,8 +1368,7 @@ let () = let reset () = primitive_declarations := []; - transl_store_subst := Ident.empty; - toploop_ident.Ident.flags <- 0; + transl_store_subst := Ident.Map.empty; aliased_idents := Ident.empty; Env.reset_required_globals (); - Hashtbl.clear used_primitives + Translprim.clear_used_primitives () diff --git a/bytecomp/translmod.mli b/bytecomp/translmod.mli index 1b86328d..3098e543 100644 --- a/bytecomp/translmod.mli +++ b/bytecomp/translmod.mli @@ -43,7 +43,7 @@ val nat_toplevel_name: Ident.t -> Ident.t * int val primitive_declarations: Primitive.description list ref type error = - Circular_dependency of Ident.t + Circular_dependency of Ident.t list | Conflicting_inline_attributes exception Error of Location.t * error diff --git a/bytecomp/translobj.ml b/bytecomp/translobj.ml index c3dd9793..dd55acf3 100644 --- a/bytecomp/translobj.ml +++ b/bytecomp/translobj.ml @@ -162,7 +162,7 @@ let transl_label_init f = let wrapping = ref false let top_env = ref Env.empty let classes = ref [] -let method_ids = ref IdentSet.empty +let method_ids = ref Ident.Set.empty let oo_add_class id = classes := id :: !classes; @@ -178,7 +178,7 @@ let oo_wrap env req f x = cache_required := req; top_env := env; classes := []; - method_ids := IdentSet.empty; + method_ids := Ident.Set.empty; let lambda = f x in let lambda = List.fold_left @@ -207,4 +207,4 @@ let reset () = wrapping := false; top_env := Env.empty; classes := []; - method_ids := IdentSet.empty + method_ids := Ident.Set.empty diff --git a/bytecomp/translobj.mli b/bytecomp/translobj.mli index d7426241..c27053e9 100644 --- a/bytecomp/translobj.mli +++ b/bytecomp/translobj.mli @@ -25,7 +25,7 @@ val transl_label_init: (unit -> lambda * 'a) -> lambda * 'a val transl_store_label_init: Ident.t -> int -> ('a -> lambda) -> 'a -> int * lambda -val method_ids: IdentSet.t ref (* reset when starting a new wrapper *) +val method_ids: Ident.Set.t ref (* reset when starting a new wrapper *) val oo_wrap: Env.t -> bool -> ('a -> lambda) -> 'a -> lambda val oo_add_class: Ident.t -> Env.t * bool diff --git a/bytecomp/translprim.ml b/bytecomp/translprim.ml new file mode 100644 index 00000000..25cc6154 --- /dev/null +++ b/bytecomp/translprim.ml @@ -0,0 +1,757 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Translation of primitives *) + +open Misc +open Asttypes +open Primitive +open Types +open Typedtree +open Typeopt +open Lambda + +type error = + | Unknown_builtin_primitive of string + | Wrong_arity_builtin_primitive of string + +exception Error of Location.t * error + +(* Insertion of debugging events *) + +let event_before exp lam = match lam with +| Lstaticraise (_,_) -> lam +| _ -> + if !Clflags.debug && not !Clflags.native_code + then Levent(lam, {lev_loc = exp.exp_loc; + lev_kind = Lev_before; + lev_repr = None; + lev_env = Env.summary exp.exp_env}) + else lam + +let event_after exp lam = + if !Clflags.debug && not !Clflags.native_code + then Levent(lam, {lev_loc = exp.exp_loc; + lev_kind = Lev_after exp.exp_type; + lev_repr = None; + lev_env = Env.summary exp.exp_env}) + else lam + +type comparison = + | Equal + | Not_equal + | Less_equal + | Less_than + | Greater_equal + | Greater_than + | Compare + +type comparison_kind = + | Compare_generic + | Compare_ints + | Compare_floats + | Compare_strings + | Compare_bytes + | Compare_nativeints + | Compare_int32s + | Compare_int64s + +type loc_kind = + | Loc_FILE + | Loc_LINE + | Loc_MODULE + | Loc_LOC + | Loc_POS + +type prim = + | Primitive of Lambda.primitive + | Comparison of comparison * comparison_kind + | Raise of Lambda.raise_kind + | Raise_with_backtrace + | Lazy_force + | Loc of loc_kind + | Send + | Send_self + | Send_cache + +let used_primitives = Hashtbl.create 7 +let add_used_primitive loc env path = + match path with + Some (Path.Pdot _ as path) -> + let path = Env.normalize_path (Some loc) env path in + let unit = Path.head path in + if Ident.global unit && not (Hashtbl.mem used_primitives path) + then Hashtbl.add used_primitives path loc + | _ -> () + +let clear_used_primitives () = Hashtbl.clear used_primitives +let get_used_primitives () = + Hashtbl.fold (fun path _ acc -> path :: acc) used_primitives [] + +let gen_array_kind = + if Config.flat_float_array then Pgenarray else Paddrarray + +let primitives_table = create_hashtable 57 [ + "%identity", Primitive Pidentity; + "%bytes_to_string", Primitive Pbytes_to_string; + "%bytes_of_string", Primitive Pbytes_of_string; + "%ignore", Primitive Pignore; + "%revapply", Primitive Prevapply; + "%apply", Primitive Pdirapply; + "%loc_LOC", Loc Loc_LOC; + "%loc_FILE", Loc Loc_FILE; + "%loc_LINE", Loc Loc_LINE; + "%loc_POS", Loc Loc_POS; + "%loc_MODULE", Loc Loc_MODULE; + "%field0", Primitive (Pfield 0); + "%field1", Primitive (Pfield 1); + "%setfield0", Primitive (Psetfield(0, Pointer, Assignment)); + "%makeblock", Primitive (Pmakeblock(0, Immutable, None)); + "%makemutable", Primitive (Pmakeblock(0, Mutable, None)); + "%raise", Raise Raise_regular; + "%reraise", Raise Raise_reraise; + "%raise_notrace", Raise Raise_notrace; + "%raise_with_backtrace", Raise_with_backtrace; + "%sequand", Primitive Psequand; + "%sequor", Primitive Psequor; + "%boolnot", Primitive Pnot; + "%big_endian", Primitive (Pctconst Big_endian); + "%backend_type", Primitive (Pctconst Backend_type); + "%word_size", Primitive (Pctconst Word_size); + "%int_size", Primitive (Pctconst Int_size); + "%max_wosize", Primitive (Pctconst Max_wosize); + "%ostype_unix", Primitive (Pctconst Ostype_unix); + "%ostype_win32", Primitive (Pctconst Ostype_win32); + "%ostype_cygwin", Primitive (Pctconst Ostype_cygwin); + "%negint", Primitive Pnegint; + "%succint", Primitive (Poffsetint 1); + "%predint", Primitive (Poffsetint(-1)); + "%addint", Primitive Paddint; + "%subint", Primitive Psubint; + "%mulint", Primitive Pmulint; + "%divint", Primitive (Pdivint Safe); + "%modint", Primitive (Pmodint Safe); + "%andint", Primitive Pandint; + "%orint", Primitive Porint; + "%xorint", Primitive Pxorint; + "%lslint", Primitive Plslint; + "%lsrint", Primitive Plsrint; + "%asrint", Primitive Pasrint; + "%eq", Primitive (Pintcomp Ceq); + "%noteq", Primitive (Pintcomp Cne); + "%ltint", Primitive (Pintcomp Clt); + "%leint", Primitive (Pintcomp Cle); + "%gtint", Primitive (Pintcomp Cgt); + "%geint", Primitive (Pintcomp Cge); + "%incr", Primitive (Poffsetref(1)); + "%decr", Primitive (Poffsetref(-1)); + "%intoffloat", Primitive Pintoffloat; + "%floatofint", Primitive Pfloatofint; + "%negfloat", Primitive Pnegfloat; + "%absfloat", Primitive Pabsfloat; + "%addfloat", Primitive Paddfloat; + "%subfloat", Primitive Psubfloat; + "%mulfloat", Primitive Pmulfloat; + "%divfloat", Primitive Pdivfloat; + "%eqfloat", Primitive (Pfloatcomp CFeq); + "%noteqfloat", Primitive (Pfloatcomp CFneq); + "%ltfloat", Primitive (Pfloatcomp CFlt); + "%lefloat", Primitive (Pfloatcomp CFle); + "%gtfloat", Primitive (Pfloatcomp CFgt); + "%gefloat", Primitive (Pfloatcomp CFge); + "%string_length", Primitive Pstringlength; + "%string_safe_get", Primitive Pstringrefs; + "%string_safe_set", Primitive Pbytessets; + "%string_unsafe_get", Primitive Pstringrefu; + "%string_unsafe_set", Primitive Pbytessetu; + "%bytes_length", Primitive Pbyteslength; + "%bytes_safe_get", Primitive Pbytesrefs; + "%bytes_safe_set", Primitive Pbytessets; + "%bytes_unsafe_get", Primitive Pbytesrefu; + "%bytes_unsafe_set", Primitive Pbytessetu; + "%array_length", Primitive (Parraylength gen_array_kind); + "%array_safe_get", Primitive (Parrayrefs gen_array_kind); + "%array_safe_set", Primitive (Parraysets gen_array_kind); + "%array_unsafe_get", Primitive (Parrayrefu gen_array_kind); + "%array_unsafe_set", Primitive (Parraysetu gen_array_kind); + "%obj_size", Primitive (Parraylength gen_array_kind); + "%obj_field", Primitive (Parrayrefu gen_array_kind); + "%obj_set_field", Primitive (Parraysetu gen_array_kind); + "%floatarray_length", Primitive (Parraylength Pfloatarray); + "%floatarray_safe_get", Primitive (Parrayrefs Pfloatarray); + "%floatarray_safe_set", Primitive (Parraysets Pfloatarray); + "%floatarray_unsafe_get", Primitive (Parrayrefu Pfloatarray); + "%floatarray_unsafe_set", Primitive (Parraysetu Pfloatarray); + "%obj_is_int", Primitive Pisint; + "%lazy_force", Lazy_force; + "%nativeint_of_int", Primitive (Pbintofint Pnativeint); + "%nativeint_to_int", Primitive (Pintofbint Pnativeint); + "%nativeint_neg", Primitive (Pnegbint Pnativeint); + "%nativeint_add", Primitive (Paddbint Pnativeint); + "%nativeint_sub", Primitive (Psubbint Pnativeint); + "%nativeint_mul", Primitive (Pmulbint Pnativeint); + "%nativeint_div", Primitive (Pdivbint { size = Pnativeint; is_safe = Safe }); + "%nativeint_mod", Primitive (Pmodbint { size = Pnativeint; is_safe = Safe }); + "%nativeint_and", Primitive (Pandbint Pnativeint); + "%nativeint_or", Primitive (Porbint Pnativeint); + "%nativeint_xor", Primitive (Pxorbint Pnativeint); + "%nativeint_lsl", Primitive (Plslbint Pnativeint); + "%nativeint_lsr", Primitive (Plsrbint Pnativeint); + "%nativeint_asr", Primitive (Pasrbint Pnativeint); + "%int32_of_int", Primitive (Pbintofint Pint32); + "%int32_to_int", Primitive (Pintofbint Pint32); + "%int32_neg", Primitive (Pnegbint Pint32); + "%int32_add", Primitive (Paddbint Pint32); + "%int32_sub", Primitive (Psubbint Pint32); + "%int32_mul", Primitive (Pmulbint Pint32); + "%int32_div", Primitive (Pdivbint { size = Pint32; is_safe = Safe }); + "%int32_mod", Primitive (Pmodbint { size = Pint32; is_safe = Safe }); + "%int32_and", Primitive (Pandbint Pint32); + "%int32_or", Primitive (Porbint Pint32); + "%int32_xor", Primitive (Pxorbint Pint32); + "%int32_lsl", Primitive (Plslbint Pint32); + "%int32_lsr", Primitive (Plsrbint Pint32); + "%int32_asr", Primitive (Pasrbint Pint32); + "%int64_of_int", Primitive (Pbintofint Pint64); + "%int64_to_int", Primitive (Pintofbint Pint64); + "%int64_neg", Primitive (Pnegbint Pint64); + "%int64_add", Primitive (Paddbint Pint64); + "%int64_sub", Primitive (Psubbint Pint64); + "%int64_mul", Primitive (Pmulbint Pint64); + "%int64_div", Primitive (Pdivbint { size = Pint64; is_safe = Safe }); + "%int64_mod", Primitive (Pmodbint { size = Pint64; is_safe = Safe }); + "%int64_and", Primitive (Pandbint Pint64); + "%int64_or", Primitive (Porbint Pint64); + "%int64_xor", Primitive (Pxorbint Pint64); + "%int64_lsl", Primitive (Plslbint Pint64); + "%int64_lsr", Primitive (Plsrbint Pint64); + "%int64_asr", Primitive (Pasrbint Pint64); + "%nativeint_of_int32", Primitive (Pcvtbint(Pint32, Pnativeint)); + "%nativeint_to_int32", Primitive (Pcvtbint(Pnativeint, Pint32)); + "%int64_of_int32", Primitive (Pcvtbint(Pint32, Pint64)); + "%int64_to_int32", Primitive (Pcvtbint(Pint64, Pint32)); + "%int64_of_nativeint", Primitive (Pcvtbint(Pnativeint, Pint64)); + "%int64_to_nativeint", Primitive (Pcvtbint(Pint64, Pnativeint)); + "%caml_ba_ref_1", + Primitive + (Pbigarrayref(false, 1, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_ref_2", + Primitive + (Pbigarrayref(false, 2, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_ref_3", + Primitive + (Pbigarrayref(false, 3, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_set_1", + Primitive + (Pbigarrayset(false, 1, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_set_2", + Primitive + (Pbigarrayset(false, 2, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_set_3", + Primitive + (Pbigarrayset(false, 3, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_unsafe_ref_1", + Primitive + (Pbigarrayref(true, 1, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_unsafe_ref_2", + Primitive + (Pbigarrayref(true, 2, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_unsafe_ref_3", + Primitive + (Pbigarrayref(true, 3, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_unsafe_set_1", + Primitive + (Pbigarrayset(true, 1, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_unsafe_set_2", + Primitive + (Pbigarrayset(true, 2, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_unsafe_set_3", + Primitive + (Pbigarrayset(true, 3, Pbigarray_unknown, Pbigarray_unknown_layout)); + "%caml_ba_dim_1", Primitive (Pbigarraydim(1)); + "%caml_ba_dim_2", Primitive (Pbigarraydim(2)); + "%caml_ba_dim_3", Primitive (Pbigarraydim(3)); + "%caml_string_get16", Primitive (Pstring_load_16(false)); + "%caml_string_get16u", Primitive (Pstring_load_16(true)); + "%caml_string_get32", Primitive (Pstring_load_32(false)); + "%caml_string_get32u", Primitive (Pstring_load_32(true)); + "%caml_string_get64", Primitive (Pstring_load_64(false)); + "%caml_string_get64u", Primitive (Pstring_load_64(true)); + "%caml_string_set16", Primitive (Pbytes_set_16(false)); + "%caml_string_set16u", Primitive (Pbytes_set_16(true)); + "%caml_string_set32", Primitive (Pbytes_set_32(false)); + "%caml_string_set32u", Primitive (Pbytes_set_32(true)); + "%caml_string_set64", Primitive (Pbytes_set_64(false)); + "%caml_string_set64u", Primitive (Pbytes_set_64(true)); + "%caml_bytes_get16", Primitive (Pbytes_load_16(false)); + "%caml_bytes_get16u", Primitive (Pbytes_load_16(true)); + "%caml_bytes_get32", Primitive (Pbytes_load_32(false)); + "%caml_bytes_get32u", Primitive (Pbytes_load_32(true)); + "%caml_bytes_get64", Primitive (Pbytes_load_64(false)); + "%caml_bytes_get64u", Primitive (Pbytes_load_64(true)); + "%caml_bytes_set16", Primitive (Pbytes_set_16(false)); + "%caml_bytes_set16u", Primitive (Pbytes_set_16(true)); + "%caml_bytes_set32", Primitive (Pbytes_set_32(false)); + "%caml_bytes_set32u", Primitive (Pbytes_set_32(true)); + "%caml_bytes_set64", Primitive (Pbytes_set_64(false)); + "%caml_bytes_set64u", Primitive (Pbytes_set_64(true)); + "%caml_bigstring_get16", Primitive (Pbigstring_load_16(false)); + "%caml_bigstring_get16u", Primitive (Pbigstring_load_16(true)); + "%caml_bigstring_get32", Primitive (Pbigstring_load_32(false)); + "%caml_bigstring_get32u", Primitive (Pbigstring_load_32(true)); + "%caml_bigstring_get64", Primitive (Pbigstring_load_64(false)); + "%caml_bigstring_get64u", Primitive (Pbigstring_load_64(true)); + "%caml_bigstring_set16", Primitive (Pbigstring_set_16(false)); + "%caml_bigstring_set16u", Primitive (Pbigstring_set_16(true)); + "%caml_bigstring_set32", Primitive (Pbigstring_set_32(false)); + "%caml_bigstring_set32u", Primitive (Pbigstring_set_32(true)); + "%caml_bigstring_set64", Primitive (Pbigstring_set_64(false)); + "%caml_bigstring_set64u", Primitive (Pbigstring_set_64(true)); + "%bswap16", Primitive Pbswap16; + "%bswap_int32", Primitive (Pbbswap(Pint32)); + "%bswap_int64", Primitive (Pbbswap(Pint64)); + "%bswap_native", Primitive (Pbbswap(Pnativeint)); + "%int_as_pointer", Primitive Pint_as_pointer; + "%opaque", Primitive Popaque; + "%send", Send; + "%sendself", Send_self; + "%sendcache", Send_cache; + "%equal", Comparison(Equal, Compare_generic); + "%notequal", Comparison(Not_equal, Compare_generic); + "%lessequal", Comparison(Less_equal, Compare_generic); + "%lessthan", Comparison(Less_than, Compare_generic); + "%greaterequal", Comparison(Greater_equal, Compare_generic); + "%greaterthan", Comparison(Greater_than, Compare_generic); + "%compare", Comparison(Compare, Compare_generic); +] + +let lookup_primitive loc p env path = + match Hashtbl.find primitives_table p.prim_name with + | prim -> prim + | exception Not_found -> + if String.length p.prim_name > 0 && p.prim_name.[0] = '%' then + raise(Error(loc, Unknown_builtin_primitive p.prim_name)); + add_used_primitive loc env path; + Primitive (Pccall p) + +let simplify_constant_constructor = function + | Equal -> true + | Not_equal -> true + | Less_equal -> false + | Less_than -> false + | Greater_equal -> false + | Greater_than -> false + | Compare -> false + +(* The following function computes the greatest lower bound in the + semilattice of array kinds: + gen + / \ + addr float + | + int + Note that the GLB is not guaranteed to exist, in which case we return + our first argument instead of raising a fatal error because, although + it cannot happen in a well-typed program, (ab)use of Obj.magic can + probably trigger it. +*) +let glb_array_type t1 t2 = + match t1, t2 with + | Pfloatarray, (Paddrarray | Pintarray) + | (Paddrarray | Pintarray), Pfloatarray -> t1 + + | Pgenarray, x | x, Pgenarray -> x + | Paddrarray, x | x, Paddrarray -> x + | Pintarray, Pintarray -> Pintarray + | Pfloatarray, Pfloatarray -> Pfloatarray + +(* Specialize a primitive from available type information. *) + +let specialize_primitive env ty ~has_constant_constructor prim = + let param_tys = + match is_function_type env ty with + | None -> [] + | Some (p1, rhs) -> + match is_function_type env rhs with + | None -> [p1] + | Some (p2, _) -> [p1;p2] + in + match prim, param_tys with + | Primitive (Psetfield(n, Pointer, init)), [_; p2] -> begin + match maybe_pointer_type env p2 with + | Pointer -> None + | Immediate -> Some (Primitive (Psetfield(n, Immediate, init))) + end + | Primitive (Parraylength t), [p] -> begin + let array_type = glb_array_type t (array_type_kind env p) in + if t = array_type then None + else Some (Primitive (Parraylength array_type)) + end + | Primitive (Parrayrefu t), p1 :: _ -> begin + let array_type = glb_array_type t (array_type_kind env p1) in + if t = array_type then None + else Some (Primitive (Parrayrefu array_type)) + end + | Primitive (Parraysetu t), p1 :: _ -> begin + let array_type = glb_array_type t (array_type_kind env p1) in + if t = array_type then None + else Some (Primitive (Parraysetu array_type)) + end + | Primitive (Parrayrefs t), p1 :: _ -> begin + let array_type = glb_array_type t (array_type_kind env p1) in + if t = array_type then None + else Some (Primitive (Parrayrefs array_type)) + end + | Primitive (Parraysets t), p1 :: _ -> begin + let array_type = glb_array_type t (array_type_kind env p1) in + if t = array_type then None + else Some (Primitive (Parraysets array_type)) + end + | Primitive (Pbigarrayref(unsafe, n, Pbigarray_unknown, + Pbigarray_unknown_layout)), p1 :: _ -> begin + let (k, l) = bigarray_type_kind_and_layout env p1 in + match k, l with + | Pbigarray_unknown, Pbigarray_unknown_layout -> None + | _, _ -> Some (Primitive (Pbigarrayref(unsafe, n, k, l))) + end + | Primitive (Pbigarrayset(unsafe, n, Pbigarray_unknown, + Pbigarray_unknown_layout)), p1 :: _ -> begin + let (k, l) = bigarray_type_kind_and_layout env p1 in + match k, l with + | Pbigarray_unknown, Pbigarray_unknown_layout -> None + | _, _ -> Some (Primitive (Pbigarrayset(unsafe, n, k, l))) + end + | Primitive (Pmakeblock(tag, mut, None)), fields -> begin + let shape = List.map (Typeopt.value_kind env) fields in + let useful = List.exists (fun knd -> knd <> Pgenval) shape in + if useful then Some (Primitive (Pmakeblock(tag, mut, Some shape))) + else None + end + | Comparison(comp, Compare_generic), p1 :: _ -> + if (has_constant_constructor + && simplify_constant_constructor comp) then begin + Some (Comparison(comp, Compare_ints)) + end else if (is_base_type env p1 Predef.path_int + || is_base_type env p1 Predef.path_char + || (maybe_pointer_type env p1 = Immediate)) then begin + Some (Comparison(comp, Compare_ints)) + end else if is_base_type env p1 Predef.path_float then begin + Some (Comparison(comp, Compare_floats)) + end else if is_base_type env p1 Predef.path_string then begin + Some (Comparison(comp, Compare_strings)) + end else if is_base_type env p1 Predef.path_bytes then begin + Some (Comparison(comp, Compare_bytes)) + end else if is_base_type env p1 Predef.path_nativeint then begin + Some (Comparison(comp, Compare_nativeints)) + end else if is_base_type env p1 Predef.path_int32 then begin + Some (Comparison(comp, Compare_int32s)) + end else if is_base_type env p1 Predef.path_int64 then begin + Some (Comparison(comp, Compare_int64s)) + end else begin + None + end + | _ -> None + +let unboxed_compare name native_repr = + Primitive.make ~name ~alloc:false ~native_name:(name^"_unboxed") + ~native_repr_args:[native_repr;native_repr] ~native_repr_res:Untagged_int + +let caml_equal = + Primitive.simple ~name:"caml_equal" ~arity:2 ~alloc:true +let caml_string_equal = + Primitive.simple ~name:"caml_string_equal" ~arity:2 ~alloc:false +let caml_bytes_equal = + Primitive.simple ~name:"caml_bytes_equal" ~arity:2 ~alloc:false +let caml_notequal = + Primitive.simple ~name:"caml_notequal" ~arity:2 ~alloc:true +let caml_string_notequal = + Primitive.simple ~name:"caml_string_notequal" ~arity:2 ~alloc:false +let caml_bytes_notequal = + Primitive.simple ~name:"caml_bytes_notequal" ~arity:2 ~alloc:false +let caml_lessequal = + Primitive.simple ~name:"caml_lessequal" ~arity:2 ~alloc:true +let caml_string_lessequal = + Primitive.simple ~name:"caml_string_lessequal" ~arity:2 ~alloc:false +let caml_bytes_lessequal = + Primitive.simple ~name:"caml_bytes_lessequal" ~arity:2 ~alloc:false +let caml_lessthan = + Primitive.simple ~name:"caml_lessthan" ~arity:2 ~alloc:true +let caml_string_lessthan = + Primitive.simple ~name:"caml_string_lessthan" ~arity:2 ~alloc:false +let caml_bytes_lessthan = + Primitive.simple ~name:"caml_bytes_lessthan" ~arity:2 ~alloc:false +let caml_greaterequal = + Primitive.simple ~name:"caml_greaterequal" ~arity:2 ~alloc:true +let caml_string_greaterequal = + Primitive.simple ~name:"caml_string_greaterequal" ~arity:2 ~alloc:false +let caml_bytes_greaterequal = + Primitive.simple ~name:"caml_bytes_greaterequal" ~arity:2 ~alloc:false +let caml_greaterthan = + Primitive.simple ~name:"caml_greaterthan" ~arity:2 ~alloc:true +let caml_string_greaterthan = + Primitive.simple ~name:"caml_string_greaterthan" ~arity:2 ~alloc: false +let caml_bytes_greaterthan = + Primitive.simple ~name:"caml_bytes_greaterthan" ~arity:2 ~alloc: false +let caml_compare = + Primitive.simple ~name:"caml_compare" ~arity:2 ~alloc:true +let caml_int_compare = + (* Not unboxed since the comparison is done directly on tagged int *) + Primitive.simple ~name:"caml_int_compare" ~arity:2 ~alloc:false +let caml_float_compare = + unboxed_compare "caml_float_compare" Unboxed_float +let caml_string_compare = + Primitive.simple ~name:"caml_string_compare" ~arity:2 ~alloc:false +let caml_bytes_compare = + Primitive.simple ~name:"caml_bytes_compare" ~arity:2 ~alloc:false +let caml_nativeint_compare = + unboxed_compare "caml_nativeint_compare" (Unboxed_integer Pnativeint) +let caml_int32_compare = + unboxed_compare "caml_int32_compare" (Unboxed_integer Pint32) +let caml_int64_compare = + unboxed_compare "caml_int64_compare" (Unboxed_integer Pint64) + +let comparison_primitive comparison comparison_kind = + match comparison, comparison_kind with + | Equal, Compare_generic -> Pccall caml_equal + | Equal, Compare_ints -> Pintcomp Ceq + | Equal, Compare_floats -> Pfloatcomp CFeq + | Equal, Compare_strings -> Pccall caml_string_equal + | Equal, Compare_bytes -> Pccall caml_bytes_equal + | Equal, Compare_nativeints -> Pbintcomp(Pnativeint, Ceq) + | Equal, Compare_int32s -> Pbintcomp(Pint32, Ceq) + | Equal, Compare_int64s -> Pbintcomp(Pint64, Ceq) + | Not_equal, Compare_generic -> Pccall caml_notequal + | Not_equal, Compare_ints -> Pintcomp Cne + | Not_equal, Compare_floats -> Pfloatcomp CFneq + | Not_equal, Compare_strings -> Pccall caml_string_notequal + | Not_equal, Compare_bytes -> Pccall caml_bytes_notequal + | Not_equal, Compare_nativeints -> Pbintcomp(Pnativeint, Cne) + | Not_equal, Compare_int32s -> Pbintcomp(Pint32, Cne) + | Not_equal, Compare_int64s -> Pbintcomp(Pint64, Cne) + | Less_equal, Compare_generic -> Pccall caml_lessequal + | Less_equal, Compare_ints -> Pintcomp Cle + | Less_equal, Compare_floats -> Pfloatcomp CFle + | Less_equal, Compare_strings -> Pccall caml_string_lessequal + | Less_equal, Compare_bytes -> Pccall caml_bytes_lessequal + | Less_equal, Compare_nativeints -> Pbintcomp(Pnativeint, Cle) + | Less_equal, Compare_int32s -> Pbintcomp(Pint32, Cle) + | Less_equal, Compare_int64s -> Pbintcomp(Pint64, Cle) + | Less_than, Compare_generic -> Pccall caml_lessthan + | Less_than, Compare_ints -> Pintcomp Clt + | Less_than, Compare_floats -> Pfloatcomp CFlt + | Less_than, Compare_strings -> Pccall caml_string_lessthan + | Less_than, Compare_bytes -> Pccall caml_bytes_lessthan + | Less_than, Compare_nativeints -> Pbintcomp(Pnativeint, Clt) + | Less_than, Compare_int32s -> Pbintcomp(Pint32, Clt) + | Less_than, Compare_int64s -> Pbintcomp(Pint64, Clt) + | Greater_equal, Compare_generic -> Pccall caml_greaterequal + | Greater_equal, Compare_ints -> Pintcomp Cge + | Greater_equal, Compare_floats -> Pfloatcomp CFge + | Greater_equal, Compare_strings -> Pccall caml_string_greaterequal + | Greater_equal, Compare_bytes -> Pccall caml_bytes_greaterequal + | Greater_equal, Compare_nativeints -> Pbintcomp(Pnativeint, Cge) + | Greater_equal, Compare_int32s -> Pbintcomp(Pint32, Cge) + | Greater_equal, Compare_int64s -> Pbintcomp(Pint64, Cge) + | Greater_than, Compare_generic -> Pccall caml_greaterthan + | Greater_than, Compare_ints -> Pintcomp Cgt + | Greater_than, Compare_floats -> Pfloatcomp CFgt + | Greater_than, Compare_strings -> Pccall caml_string_greaterthan + | Greater_than, Compare_bytes -> Pccall caml_bytes_greaterthan + | Greater_than, Compare_nativeints -> Pbintcomp(Pnativeint, Cgt) + | Greater_than, Compare_int32s -> Pbintcomp(Pint32, Cgt) + | Greater_than, Compare_int64s -> Pbintcomp(Pint64, Cgt) + | Compare, Compare_generic -> Pccall caml_compare + | Compare, Compare_ints -> Pccall caml_int_compare + | Compare, Compare_floats -> Pccall caml_float_compare + | Compare, Compare_strings -> Pccall caml_string_compare + | Compare, Compare_bytes -> Pccall caml_bytes_compare + | Compare, Compare_nativeints -> Pccall caml_nativeint_compare + | Compare, Compare_int32s -> Pccall caml_int32_compare + | Compare, Compare_int64s -> Pccall caml_int64_compare + +let lambda_of_loc kind loc = + let loc_start = loc.Location.loc_start in + let (file, lnum, cnum) = Location.get_pos_info loc_start in + let enum = loc.Location.loc_end.Lexing.pos_cnum - + loc_start.Lexing.pos_cnum + cnum in + match kind with + | Loc_POS -> + Lconst (Const_block (0, [ + Const_immstring file; + Const_base (Const_int lnum); + Const_base (Const_int cnum); + Const_base (Const_int enum); + ])) + | Loc_FILE -> Lconst (Const_immstring file) + | Loc_MODULE -> + let filename = Filename.basename file in + let name = Env.get_unit_name () in + let module_name = if name = "" then "//"^filename^"//" else name in + Lconst (Const_immstring module_name) + | Loc_LOC -> + let loc = Printf.sprintf "File %S, line %d, characters %d-%d" + file lnum cnum enum in + Lconst (Const_immstring loc) + | Loc_LINE -> Lconst (Const_base (Const_int lnum)) + +let caml_restore_raw_backtrace = + Primitive.simple ~name:"caml_restore_raw_backtrace" ~arity:2 ~alloc:false + +let try_ids = Hashtbl.create 8 + +let add_exception_ident id = + Hashtbl.replace try_ids id () + +let remove_exception_ident id = + Hashtbl.remove try_ids id + +let lambda_of_prim prim_name prim loc args arg_exps = + match prim, args with + | Primitive prim, args -> + Lprim(prim, args, loc) + | Comparison(comp, knd), args -> + let prim = comparison_primitive comp knd in + Lprim(prim, args, loc) + | Raise kind, [arg] -> + let kind = + match kind, arg with + | Raise_regular, Lvar argv when Hashtbl.mem try_ids argv -> + Raise_reraise + | _, _ -> + kind + in + let arg = + match arg_exps with + | None -> arg + | Some [arg_exp] -> event_after arg_exp arg + | Some _ -> assert false + in + Lprim(Praise kind, [arg], loc) + | Raise_with_backtrace, [exn; bt] -> + let vexn = Ident.create "exn" in + let raise_arg = + match arg_exps with + | None -> Lvar vexn + | Some [exn_exp; _] -> event_after exn_exp (Lvar vexn) + | Some _ -> assert false + in + Llet(Strict, Pgenval, vexn, exn, + Lsequence(Lprim(Pccall caml_restore_raw_backtrace, + [Lvar vexn; bt], + loc), + Lprim(Praise Raise_reraise, [raise_arg], loc))) + | Lazy_force, [arg] -> + Matching.inline_lazy_force arg Location.none + | Loc kind, [] -> + lambda_of_loc kind loc + | Loc kind, [arg] -> + let lam = lambda_of_loc kind loc in + Lprim(Pmakeblock(0, Immutable, None), [lam; arg], loc) + | Send, [obj; meth] -> + Lsend(Public, meth, obj, [], loc) + | Send_self, [obj; meth] -> + Lsend(Self, meth, obj, [], loc) + | Send_cache, [obj; meth; cache; pos] -> + Lsend(Cached, meth, obj, [cache; pos], loc) + | (Raise _ | Raise_with_backtrace + | Lazy_force | Loc _ + | Send | Send_self | Send_cache), _ -> + raise(Error(loc, Wrong_arity_builtin_primitive prim_name)) + +(* Eta-expand a primitive *) + +let transl_primitive loc p env ty path = + let prim = lookup_primitive loc p env path in + let has_constant_constructor = false in + let prim = + match specialize_primitive env ty ~has_constant_constructor prim with + | None -> prim + | Some prim -> prim + in + let rec make_params n = + if n <= 0 then [] else Ident.create "prim" :: make_params (n-1) + in + let params = make_params p.prim_arity in + let args = List.map (fun id -> Lvar id) params in + let body = lambda_of_prim p.prim_name prim loc args None in + match params with + | [] -> body + | _ -> + Lfunction{ kind = Curried; params; + attr = default_stub_attribute; + loc = loc; + body = body; } + +(* Determine if a primitive is a Pccall or will be turned later into + a C function call that may raise an exception *) +let primitive_is_ccall = function + | Pccall _ | Pstringrefs | Pbytesrefs | Pbytessets | Parrayrefs _ | + Parraysets _ | Pbigarrayref _ | Pbigarrayset _ | Pduprecord _ | Pdirapply | + Prevapply -> true + | _ -> false + +(* Determine if a primitive should be surrounded by an "after" debug event *) +let primitive_needs_event_after = function + | Primitive prim -> primitive_is_ccall prim + | Comparison(comp, knd) -> + primitive_is_ccall (comparison_primitive comp knd) + | Lazy_force | Send | Send_self | Send_cache -> true + | Raise _ | Raise_with_backtrace | Loc _ -> false + +let transl_primitive_application loc p env ty path exp args arg_exps = + let prim = lookup_primitive loc p env (Some path) in + let has_constant_constructor = + match arg_exps with + | [_; {exp_desc = Texp_construct(_, {cstr_tag = Cstr_constant _}, _)}] + | [{exp_desc = Texp_construct(_, {cstr_tag = Cstr_constant _}, _)}; _] + | [_; {exp_desc = Texp_variant(_, None)}] + | [{exp_desc = Texp_variant(_, None)}; _] -> true + | _ -> false + in + let prim = + match specialize_primitive env ty ~has_constant_constructor prim with + | None -> prim + | Some prim -> prim + in + let lam = lambda_of_prim p.prim_name prim loc args (Some arg_exps) in + let lam = + if primitive_needs_event_after prim then begin + match exp with + | None -> lam + | Some exp -> event_after exp lam + end else begin + lam + end + in + lam + +(* Error report *) + +open Format + +let report_error ppf = function + | Unknown_builtin_primitive prim_name -> + fprintf ppf "Unknown builtin primitive \"%s\"" prim_name + | Wrong_arity_builtin_primitive prim_name -> + fprintf ppf "Wrong arity for builtin primitive \"%s\"" prim_name + +let () = + Location.register_error_of_exn + (function + | Error (loc, err) -> + Some (Location.error_of_printer loc report_error err) + | _ -> + None + ) diff --git a/bytecomp/translprim.mli b/bytecomp/translprim.mli new file mode 100644 index 00000000..d8941656 --- /dev/null +++ b/bytecomp/translprim.mli @@ -0,0 +1,49 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +(* Insertion of debugging events *) + +val event_before : Typedtree.expression -> Lambda.lambda -> Lambda.lambda + +val event_after : Typedtree.expression -> Lambda.lambda -> Lambda.lambda + +(* Translation of primitives *) + +val add_exception_ident : Ident.t -> unit +val remove_exception_ident : Ident.t -> unit + +val clear_used_primitives : unit -> unit +val get_used_primitives: unit -> Path.t list + +val transl_primitive : + Location.t -> Primitive.description -> Env.t -> + Types.type_expr -> Path.t option -> Lambda.lambda + +val transl_primitive_application : + Location.t -> Primitive.description -> Env.t -> + Types.type_expr -> Path.t -> Typedtree.expression option -> + Lambda.lambda list -> Typedtree.expression list -> Lambda.lambda + +(* Errors *) + +type error = + | Unknown_builtin_primitive of string + | Wrong_arity_builtin_primitive of string + +exception Error of Location.t * error + +open Format + +val report_error : formatter -> error -> unit diff --git a/byterun/.depend b/byterun/.depend index 21a592ad..c0f81615 100644 --- a/byterun/.depend +++ b/byterun/.depend @@ -8,7 +8,7 @@ alloc.$(O): alloc.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ array.$(O): array.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/signals.h \ - caml/spacetime.h + caml/spacetime.h caml/io.h caml/stack.h backtrace.$(O): backtrace.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/backtrace.h \ @@ -22,7 +22,8 @@ backtrace_prim.$(O): backtrace_prim.c caml/config.h caml/m.h caml/s.h \ bigarray.$(O): bigarray.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/bigarray.h caml/custom.h caml/fail.h \ caml/intext.h caml/io.h caml/hash.h caml/memory.h caml/gc.h \ - caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h + caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ + caml/signals.h callback.$(O): callback.c caml/callback.h caml/mlvalues.h caml/config.h \ caml/m.h caml/s.h caml/misc.h caml/fail.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ @@ -37,7 +38,7 @@ compare.$(O): compare.c caml/custom.h caml/mlvalues.h caml/config.h caml/m.h \ custom.$(O): custom.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/custom.h caml/fail.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ - caml/address_class.h + caml/address_class.h caml/signals.h debugger.$(O): debugger.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/debugger.h caml/osdeps.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ @@ -139,7 +140,7 @@ misc.$(O): misc.c caml/config.h caml/m.h caml/s.h caml/misc.h caml/memory.h \ obj.$(O): obj.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/gc.h caml/interp.h caml/major_gc.h \ caml/freelist.h caml/memory.h caml/minor_gc.h caml/address_class.h \ - caml/prims.h caml/spacetime.h + caml/prims.h caml/spacetime.h caml/io.h caml/stack.h parsing.$(O): parsing.c caml/config.h caml/m.h caml/s.h caml/mlvalues.h \ caml/misc.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/alloc.h @@ -189,8 +190,6 @@ sys.$(O): sys.c caml/config.h caml/m.h caml/s.h caml/alloc.h caml/misc.h \ caml/osdeps.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/signals.h caml/stacks.h \ caml/sys.h caml/version.h caml/callback.h caml/startup_aux.h -terminfo.$(O): terminfo.c caml/config.h caml/m.h caml/s.h caml/alloc.h \ - caml/misc.h caml/mlvalues.h caml/fail.h caml/io.h unix.$(O): unix.c caml/config.h caml/m.h caml/s.h caml/fail.h caml/misc.h \ caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/osdeps.h \ @@ -209,7 +208,7 @@ alloc.d.$(O): alloc.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ array.d.$(O): array.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/signals.h \ - caml/spacetime.h + caml/spacetime.h caml/io.h caml/stack.h backtrace.d.$(O): backtrace.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/backtrace.h \ @@ -223,7 +222,8 @@ backtrace_prim.d.$(O): backtrace_prim.c caml/config.h caml/m.h caml/s.h \ bigarray.d.$(O): bigarray.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/bigarray.h caml/custom.h caml/fail.h \ caml/intext.h caml/io.h caml/hash.h caml/memory.h caml/gc.h \ - caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h + caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ + caml/signals.h callback.d.$(O): callback.c caml/callback.h caml/mlvalues.h caml/config.h \ caml/m.h caml/s.h caml/misc.h caml/fail.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ @@ -238,7 +238,7 @@ compare.d.$(O): compare.c caml/custom.h caml/mlvalues.h caml/config.h caml/m.h \ custom.d.$(O): custom.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/custom.h caml/fail.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ - caml/address_class.h + caml/address_class.h caml/signals.h debugger.d.$(O): debugger.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/debugger.h caml/osdeps.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ @@ -343,7 +343,7 @@ misc.d.$(O): misc.c caml/config.h caml/m.h caml/s.h caml/misc.h caml/memory.h \ obj.d.$(O): obj.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/gc.h caml/interp.h caml/major_gc.h \ caml/freelist.h caml/memory.h caml/minor_gc.h caml/address_class.h \ - caml/prims.h caml/spacetime.h + caml/prims.h caml/spacetime.h caml/io.h caml/stack.h parsing.d.$(O): parsing.c caml/config.h caml/m.h caml/s.h caml/mlvalues.h \ caml/misc.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/alloc.h @@ -393,8 +393,6 @@ sys.d.$(O): sys.c caml/config.h caml/m.h caml/s.h caml/alloc.h caml/misc.h \ caml/osdeps.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/signals.h caml/stacks.h \ caml/sys.h caml/version.h caml/callback.h caml/startup_aux.h -terminfo.d.$(O): terminfo.c caml/config.h caml/m.h caml/s.h caml/alloc.h \ - caml/misc.h caml/mlvalues.h caml/fail.h caml/io.h unix.d.$(O): unix.c caml/config.h caml/m.h caml/s.h caml/fail.h caml/misc.h \ caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/osdeps.h \ @@ -413,7 +411,7 @@ alloc.i.$(O): alloc.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ array.i.$(O): array.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/signals.h \ - caml/spacetime.h + caml/spacetime.h caml/io.h caml/stack.h backtrace.i.$(O): backtrace.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/backtrace.h \ @@ -427,7 +425,8 @@ backtrace_prim.i.$(O): backtrace_prim.c caml/config.h caml/m.h caml/s.h \ bigarray.i.$(O): bigarray.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/bigarray.h caml/custom.h caml/fail.h \ caml/intext.h caml/io.h caml/hash.h caml/memory.h caml/gc.h \ - caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h + caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ + caml/signals.h callback.i.$(O): callback.c caml/callback.h caml/mlvalues.h caml/config.h \ caml/m.h caml/s.h caml/misc.h caml/fail.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ @@ -442,7 +441,7 @@ compare.i.$(O): compare.c caml/custom.h caml/mlvalues.h caml/config.h caml/m.h \ custom.i.$(O): custom.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/custom.h caml/fail.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ - caml/address_class.h + caml/address_class.h caml/signals.h debugger.i.$(O): debugger.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/debugger.h caml/osdeps.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ @@ -544,7 +543,7 @@ misc.i.$(O): misc.c caml/config.h caml/m.h caml/s.h caml/misc.h caml/memory.h \ obj.i.$(O): obj.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/gc.h caml/interp.h caml/major_gc.h \ caml/freelist.h caml/memory.h caml/minor_gc.h caml/address_class.h \ - caml/prims.h caml/spacetime.h + caml/prims.h caml/spacetime.h caml/io.h caml/stack.h parsing.i.$(O): parsing.c caml/config.h caml/m.h caml/s.h caml/mlvalues.h \ caml/misc.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/alloc.h @@ -594,8 +593,6 @@ sys.i.$(O): sys.c caml/config.h caml/m.h caml/s.h caml/alloc.h caml/misc.h \ caml/osdeps.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/signals.h caml/stacks.h \ caml/sys.h caml/version.h caml/callback.h caml/startup_aux.h -terminfo.i.$(O): terminfo.c caml/config.h caml/m.h caml/s.h caml/alloc.h \ - caml/misc.h caml/mlvalues.h caml/fail.h caml/io.h unix.i.$(O): unix.c caml/config.h caml/m.h caml/s.h caml/fail.h caml/misc.h \ caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/osdeps.h \ @@ -614,7 +611,7 @@ alloc.pic.$(O): alloc.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h array.pic.$(O): array.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/signals.h \ - caml/spacetime.h + caml/spacetime.h caml/io.h caml/stack.h backtrace.pic.$(O): backtrace.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/backtrace.h \ @@ -628,7 +625,8 @@ backtrace_prim.pic.$(O): backtrace_prim.c caml/config.h caml/m.h caml/s.h \ bigarray.pic.$(O): bigarray.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/bigarray.h caml/custom.h caml/fail.h \ caml/intext.h caml/io.h caml/hash.h caml/memory.h caml/gc.h \ - caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h + caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ + caml/signals.h callback.pic.$(O): callback.c caml/callback.h caml/mlvalues.h caml/config.h \ caml/m.h caml/s.h caml/misc.h caml/fail.h caml/memory.h caml/gc.h \ caml/major_gc.h caml/freelist.h caml/minor_gc.h caml/address_class.h \ @@ -643,7 +641,7 @@ compare.pic.$(O): compare.c caml/custom.h caml/mlvalues.h caml/config.h caml/m.h custom.pic.$(O): custom.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/custom.h caml/fail.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ - caml/address_class.h + caml/address_class.h caml/signals.h debugger.pic.$(O): debugger.c caml/alloc.h caml/misc.h caml/config.h caml/m.h \ caml/s.h caml/mlvalues.h caml/debugger.h caml/osdeps.h caml/memory.h \ caml/gc.h caml/major_gc.h caml/freelist.h caml/minor_gc.h \ @@ -745,7 +743,7 @@ misc.pic.$(O): misc.c caml/config.h caml/m.h caml/s.h caml/misc.h caml/memory.h obj.pic.$(O): obj.c caml/alloc.h caml/misc.h caml/config.h caml/m.h caml/s.h \ caml/mlvalues.h caml/fail.h caml/gc.h caml/interp.h caml/major_gc.h \ caml/freelist.h caml/memory.h caml/minor_gc.h caml/address_class.h \ - caml/prims.h caml/spacetime.h + caml/prims.h caml/spacetime.h caml/io.h caml/stack.h parsing.pic.$(O): parsing.c caml/config.h caml/m.h caml/s.h caml/mlvalues.h \ caml/misc.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/alloc.h @@ -795,8 +793,6 @@ sys.pic.$(O): sys.c caml/config.h caml/m.h caml/s.h caml/alloc.h caml/misc.h \ caml/osdeps.h caml/memory.h caml/gc.h caml/major_gc.h caml/freelist.h \ caml/minor_gc.h caml/address_class.h caml/signals.h caml/stacks.h \ caml/sys.h caml/version.h caml/callback.h caml/startup_aux.h -terminfo.pic.$(O): terminfo.c caml/config.h caml/m.h caml/s.h caml/alloc.h \ - caml/misc.h caml/mlvalues.h caml/fail.h caml/io.h unix.pic.$(O): unix.c caml/config.h caml/m.h caml/s.h caml/fail.h caml/misc.h \ caml/mlvalues.h caml/memory.h caml/gc.h caml/major_gc.h \ caml/freelist.h caml/minor_gc.h caml/address_class.h caml/osdeps.h \ diff --git a/byterun/Makefile b/byterun/Makefile index 9b92a589..25b7e2c3 100644 --- a/byterun/Makefile +++ b/byterun/Makefile @@ -14,10 +14,7 @@ #************************************************************************** include ../config/Makefile - -INSTALL_BINDIR=$(DESTDIR)$(BINDIR) -INSTALL_LIBDIR=$(DESTDIR)$(LIBDIR) -INSTALL_INCDIR=$(INSTALL_LIBDIR)/caml +include ../Makefile.common # The PROGRAMS (resp. LIBRARIES) variable list the files to build and # install as programs in $(INSTALL_BINDIR) (resp. libraries in @@ -25,6 +22,7 @@ INSTALL_INCDIR=$(INSTALL_LIBDIR)/caml PROGRAMS = ocamlrun$(EXE) LIBRARIES = ld.conf libcamlrun.$(A) +DYNLIBRARIES= ifeq "$(RUNTIMED)" "true" PROGRAMS += ocamlrund$(EXE) @@ -38,7 +36,8 @@ endif ifeq "$(UNIX_OR_WIN32)" "unix" ifeq "$(SUPPORTS_SHARED_LIBRARIES)" "true" -LIBRARIES += libcamlrun_pic.$(A) libcamlrun_shared.$(SO) +LIBRARIES += libcamlrun_pic.$(A) +DYNLIBRARIES += libcamlrun_shared.$(SO) endif endif @@ -89,15 +88,16 @@ endif PRIMS=\ alloc.c array.c compare.c extern.c floats.c gc_ctrl.c hash.c \ intern.c interp.c ints.c io.c lexing.c md5.c meta.c obj.c parsing.c \ - signals.c str.c sys.c terminfo.c callback.c weak.c finalise.c stacks.c \ - dynlink.c backtrace_prim.c backtrace.c spacetime.c afl.c + signals.c str.c sys.c callback.c weak.c finalise.c stacks.c \ + dynlink.c backtrace_prim.c backtrace.c spacetime.c afl.c \ + bigarray.c OBJS=$(addsuffix .$(O), \ interp misc stacks fix_code startup_aux startup \ freelist major_gc minor_gc memory alloc roots globroots \ fail signals signals_byt printexc backtrace_prim backtrace \ compare ints floats str array io extern intern \ - hash sys meta parsing gc_ctrl terminfo md5 obj \ + hash sys meta parsing gc_ctrl md5 obj \ lexing callback debugger weak compact finalise custom \ dynlink spacetime afl $(UNIX_OR_WIN32) bigarray main) @@ -106,18 +106,23 @@ IOBJS=$(OBJS:.$(O)=.i.$(O)) PICOBJS=$(OBJS:.$(O)=.pic.$(O)) .PHONY: all -all: $(LIBRARIES) $(PROGRAMS) +all: $(LIBRARIES) $(DYNLIBRARIES) $(PROGRAMS) ld.conf: ../config/Makefile echo "$(STUBLIBDIR)" > $@ echo "$(LIBDIR)" >> $@ +INSTALL_INCDIR=$(INSTALL_LIBDIR)/caml + .PHONY: install install: - cp $(PROGRAMS) "$(INSTALL_BINDIR)" - cp $(LIBRARIES) "$(INSTALL_LIBDIR)" + $(INSTALL_PROG) $(PROGRAMS) "$(INSTALL_BINDIR)" + $(INSTALL_DATA) $(LIBRARIES) "$(INSTALL_LIBDIR)" + if test -n "$(DYNLIBRARIES)"; then \ + $(INSTALL_PROG) $(DYNLIBRARIES) "$(INSTALL_LIBDIR)"; \ + fi mkdir -p "$(INSTALL_INCDIR)" - cp caml/*.h "$(INSTALL_INCDIR)" + $(INSTALL_DATA) caml/*.h "$(INSTALL_INCDIR)" # If primitives contain duplicated lines (e.g. because the code is defined # like @@ -156,24 +161,25 @@ prims.c : primitives echo ' 0 };') > prims.c caml/opnames.h : caml/instruct.h + cat $^ | tr -d '\r' | \ sed -e '/\/\*/d' \ -e '/^#/d' \ -e 's/enum /char * names_of_/' \ -e 's/{$$/[] = {/' \ - -e 's/\([[:upper:]][[:upper:]_0-9]*\)/"\1"/g' caml/instruct.h \ - > caml/opnames.h + -e 's/\([[:upper:]][[:upper:]_0-9]*\)/"\1"/g' > $@ # caml/jumptbl.h is required only if you have GCC 2.0 or later caml/jumptbl.h : caml/instruct.h + cat $^ | tr -d '\r' | \ sed -n -e '/^ /s/ \([A-Z]\)/ \&\&lbl_\1/gp' \ - -e '/^}/q' caml/instruct.h > caml/jumptbl.h + -e '/^}/q' > $@ caml/version.h : ../VERSION ../tools/make-version-header.sh ../tools/make-version-header.sh ../VERSION > caml/version.h .PHONY: clean clean: - rm -f $(LIBRARIES) $(PROGRAMS) *.$(O) *.$(A) *.$(SO) + rm -f $(LIBRARIES) $(DYNLIBRARIES) $(PROGRAMS) *.$(O) *.$(A) *.$(SO) rm -f primitives prims.c caml/opnames.h caml/jumptbl.h rm -f caml/version.h diff --git a/byterun/alloc.c b/byterun/alloc.c index e49fabd0..8924dbc0 100644 --- a/byterun/alloc.c +++ b/byterun/alloc.c @@ -159,7 +159,7 @@ CAMLexport value caml_alloc_array(value (*funct)(char const *), } /* [len] is a number of floats */ -CAMLprim value caml_alloc_float_array(mlsize_t len) +value caml_alloc_float_array(mlsize_t len) { #ifdef FLAT_FLOAT_ARRAY mlsize_t wosize = len * Double_wosize; diff --git a/byterun/bigarray.c b/byterun/bigarray.c index a8991d0e..3e376799 100644 --- a/byterun/bigarray.c +++ b/byterun/bigarray.c @@ -13,18 +13,11 @@ /* */ /**************************************************************************/ -/* This file is an intermediate step in making the bigarray library - (in otherlibs/bigarray) a part of the standard library. - This file defines the basic allocation functions for bigarrays, - as well as the comparison, hashing and marshaling methods for - bigarrays. The other bigarray primitives are still defined - in otherlibs/bigarray. Memory-mapping a file as a bigarray - is being migrated to otherlibs/unix and otherlibs/win32unix. */ - #define CAML_INTERNALS #include #include +#include #include "caml/alloc.h" #include "caml/bigarray.h" #include "caml/custom.h" @@ -33,6 +26,12 @@ #include "caml/hash.h" #include "caml/memory.h" #include "caml/mlvalues.h" +#include "caml/signals.h" + +#define int8 caml_ba_int8 +#define uint8 caml_ba_uint8 +#define int16 caml_ba_int16 +#define uint16 caml_ba_uint16 /* Compute the number of elements of a big array */ @@ -440,22 +439,31 @@ static void caml_ba_deserialize_longarray(void * dest, intnat num_elts) CAMLexport uintnat caml_ba_deserialize(void * dst) { struct caml_ba_array * b = dst; - int i, elt_size; - uintnat num_elts; + int i; + uintnat num_elts, size; /* Read back header information */ b->num_dims = caml_deserialize_uint_4(); + if (b->num_dims < 0 || b->num_dims > CAML_BA_MAX_NUM_DIMS) + caml_deserialize_error("input_value: wrong number of bigarray dimensions"); b->flags = caml_deserialize_uint_4() | CAML_BA_MANAGED; b->proxy = NULL; for (i = 0; i < b->num_dims; i++) b->dim[i] = caml_deserialize_uint_4(); - /* Compute total number of elements */ - num_elts = caml_ba_num_elts(b); - /* Determine element size in bytes */ + /* Compute total number of elements. Watch out for overflows (MPR#7765). */ + num_elts = 1; + for (i = 0; i < b->num_dims; i++) { + if (caml_umul_overflow(num_elts, b->dim[i], &num_elts)) + caml_deserialize_error("input_value: size overflow for bigarray"); + } + /* Determine array size in bytes. Watch out for overflows (MPR#7765). */ if ((b->flags & CAML_BA_KIND_MASK) > CAML_BA_CHAR) caml_deserialize_error("input_value: bad bigarray kind"); - elt_size = caml_ba_element_size[b->flags & CAML_BA_KIND_MASK]; + if (caml_umul_overflow(num_elts, + caml_ba_element_size[b->flags & CAML_BA_KIND_MASK], + &size)) + caml_deserialize_error("input_value: size overflow for bigarray"); /* Allocate room for data */ - b->data = malloc(elt_size * num_elts); + b->data = malloc(size); if (b->data == NULL) caml_deserialize_error("input_value: out of memory for bigarray"); /* Read data */ @@ -484,3 +492,732 @@ CAMLexport uintnat caml_ba_deserialize(void * dst) /* PR#5516: use C99's flexible array types if possible */ return SIZEOF_BA_ARRAY + b->num_dims * sizeof(intnat); } + +/* Allocate a bigarray from OCaml */ + +CAMLprim value caml_ba_create(value vkind, value vlayout, value vdim) +{ + intnat dim[CAML_BA_MAX_NUM_DIMS]; + mlsize_t num_dims; + int i, flags; + + num_dims = Wosize_val(vdim); + /* here num_dims is unsigned (mlsize_t) so no need to check (num_dims >= 0) */ + if (num_dims > CAML_BA_MAX_NUM_DIMS) + caml_invalid_argument("Bigarray.create: bad number of dimensions"); + for (i = 0; i < num_dims; i++) { + dim[i] = Long_val(Field(vdim, i)); + if (dim[i] < 0) + caml_invalid_argument("Bigarray.create: negative dimension"); + } + flags = Caml_ba_kind_val(vkind) | Caml_ba_layout_val(vlayout); + return caml_ba_alloc(flags, num_dims, NULL, dim); +} + +/* Given a big array and a vector of indices, check that the indices + are within the bounds and return the offset of the corresponding + array element in the data part of the array. */ + +static long caml_ba_offset(struct caml_ba_array * b, intnat * index) +{ + intnat offset; + int i; + + offset = 0; + if ((b->flags & CAML_BA_LAYOUT_MASK) == CAML_BA_C_LAYOUT) { + /* C-style layout: row major, indices start at 0 */ + for (i = 0; i < b->num_dims; i++) { + if ((uintnat) index[i] >= (uintnat) b->dim[i]) + caml_array_bound_error(); + offset = offset * b->dim[i] + index[i]; + } + } else { + /* Fortran-style layout: column major, indices start at 1 */ + for (i = b->num_dims - 1; i >= 0; i--) { + if ((uintnat) (index[i] - 1) >= (uintnat) b->dim[i]) + caml_array_bound_error(); + offset = offset * b->dim[i] + (index[i] - 1); + } + } + return offset; +} + +/* Helper function to allocate a record of two double floats */ + +static value copy_two_doubles(double d0, double d1) +{ + value res = caml_alloc_small(2 * Double_wosize, Double_array_tag); + Store_double_field(res, 0, d0); + Store_double_field(res, 1, d1); + return res; +} + +/* Generic code to read from a big array */ + +value caml_ba_get_N(value vb, value * vind, int nind) +{ + struct caml_ba_array * b = Caml_ba_array_val(vb); + intnat index[CAML_BA_MAX_NUM_DIMS]; + int i; + intnat offset; + + /* Check number of indices = number of dimensions of array + (maybe not necessary if ML typing guarantees this) */ + if (nind != b->num_dims) + caml_invalid_argument("Bigarray.get: wrong number of indices"); + /* Compute offset and check bounds */ + for (i = 0; i < b->num_dims; i++) index[i] = Long_val(vind[i]); + offset = caml_ba_offset(b, index); + /* Perform read */ + switch ((b->flags) & CAML_BA_KIND_MASK) { + default: + CAMLassert(0); + case CAML_BA_FLOAT32: + return caml_copy_double(((float *) b->data)[offset]); + case CAML_BA_FLOAT64: + return caml_copy_double(((double *) b->data)[offset]); + case CAML_BA_SINT8: + return Val_int(((int8 *) b->data)[offset]); + case CAML_BA_UINT8: + return Val_int(((uint8 *) b->data)[offset]); + case CAML_BA_SINT16: + return Val_int(((int16 *) b->data)[offset]); + case CAML_BA_UINT16: + return Val_int(((uint16 *) b->data)[offset]); + case CAML_BA_INT32: + return caml_copy_int32(((int32_t *) b->data)[offset]); + case CAML_BA_INT64: + return caml_copy_int64(((int64_t *) b->data)[offset]); + case CAML_BA_NATIVE_INT: + return caml_copy_nativeint(((intnat *) b->data)[offset]); + case CAML_BA_CAML_INT: + return Val_long(((intnat *) b->data)[offset]); + case CAML_BA_COMPLEX32: + { float * p = ((float *) b->data) + offset * 2; + return copy_two_doubles(p[0], p[1]); } + case CAML_BA_COMPLEX64: + { double * p = ((double *) b->data) + offset * 2; + return copy_two_doubles(p[0], p[1]); } + case CAML_BA_CHAR: + return Val_int(((unsigned char *) b->data)[offset]); + } +} + +CAMLprim value caml_ba_get_1(value vb, value vind1) +{ + return caml_ba_get_N(vb, &vind1, 1); +} + +CAMLprim value caml_ba_get_2(value vb, value vind1, value vind2) +{ + value vind[2]; + vind[0] = vind1; vind[1] = vind2; + return caml_ba_get_N(vb, vind, 2); +} + +CAMLprim value caml_ba_get_3(value vb, value vind1, value vind2, value vind3) +{ + value vind[3]; + vind[0] = vind1; vind[1] = vind2; vind[2] = vind3; + return caml_ba_get_N(vb, vind, 3); +} + +CAMLprim value caml_ba_get_generic(value vb, value vind) +{ + return caml_ba_get_N(vb, &Field(vind, 0), Wosize_val(vind)); +} + + +CAMLprim value caml_ba_uint8_get16(value vb, value vind) +{ + intnat res; + unsigned char b1, b2; + intnat idx = Long_val(vind); + struct caml_ba_array * b = Caml_ba_array_val(vb); + if (idx < 0 || idx >= b->dim[0] - 1) caml_array_bound_error(); + b1 = ((unsigned char*) b->data)[idx]; + b2 = ((unsigned char*) b->data)[idx+1]; +#ifdef ARCH_BIG_ENDIAN + res = b1 << 8 | b2; +#else + res = b2 << 8 | b1; +#endif + return Val_int(res); +} + +CAMLprim value caml_ba_uint8_get32(value vb, value vind) +{ + intnat res; + unsigned char b1, b2, b3, b4; + intnat idx = Long_val(vind); + struct caml_ba_array * b = Caml_ba_array_val(vb); + if (idx < 0 || idx >= b->dim[0] - 3) caml_array_bound_error(); + b1 = ((unsigned char*) b->data)[idx]; + b2 = ((unsigned char*) b->data)[idx+1]; + b3 = ((unsigned char*) b->data)[idx+2]; + b4 = ((unsigned char*) b->data)[idx+3]; +#ifdef ARCH_BIG_ENDIAN + res = b1 << 24 | b2 << 16 | b3 << 8 | b4; +#else + res = b4 << 24 | b3 << 16 | b2 << 8 | b1; +#endif + return caml_copy_int32(res); +} + +CAMLprim value caml_ba_uint8_get64(value vb, value vind) +{ + uint64_t res; + unsigned char b1, b2, b3, b4, b5, b6, b7, b8; + intnat idx = Long_val(vind); + struct caml_ba_array * b = Caml_ba_array_val(vb); + if (idx < 0 || idx >= b->dim[0] - 7) caml_array_bound_error(); + b1 = ((unsigned char*) b->data)[idx]; + b2 = ((unsigned char*) b->data)[idx+1]; + b3 = ((unsigned char*) b->data)[idx+2]; + b4 = ((unsigned char*) b->data)[idx+3]; + b5 = ((unsigned char*) b->data)[idx+4]; + b6 = ((unsigned char*) b->data)[idx+5]; + b7 = ((unsigned char*) b->data)[idx+6]; + b8 = ((unsigned char*) b->data)[idx+7]; +#ifdef ARCH_BIG_ENDIAN + res = (uint64_t) b1 << 56 | (uint64_t) b2 << 48 + | (uint64_t) b3 << 40 | (uint64_t) b4 << 32 + | (uint64_t) b5 << 24 | (uint64_t) b6 << 16 + | (uint64_t) b7 << 8 | (uint64_t) b8; +#else + res = (uint64_t) b8 << 56 | (uint64_t) b7 << 48 + | (uint64_t) b6 << 40 | (uint64_t) b5 << 32 + | (uint64_t) b4 << 24 | (uint64_t) b3 << 16 + | (uint64_t) b2 << 8 | (uint64_t) b1; +#endif + return caml_copy_int64(res); +} + +/* Generic write to a big array */ + +static value caml_ba_set_aux(value vb, value * vind, intnat nind, value newval) +{ + struct caml_ba_array * b = Caml_ba_array_val(vb); + intnat index[CAML_BA_MAX_NUM_DIMS]; + int i; + intnat offset; + + /* Check number of indices = number of dimensions of array + (maybe not necessary if ML typing guarantees this) */ + if (nind != b->num_dims) + caml_invalid_argument("Bigarray.set: wrong number of indices"); + /* Compute offset and check bounds */ + for (i = 0; i < b->num_dims; i++) index[i] = Long_val(vind[i]); + offset = caml_ba_offset(b, index); + /* Perform write */ + switch (b->flags & CAML_BA_KIND_MASK) { + default: + CAMLassert(0); + case CAML_BA_FLOAT32: + ((float *) b->data)[offset] = Double_val(newval); break; + case CAML_BA_FLOAT64: + ((double *) b->data)[offset] = Double_val(newval); break; + case CAML_BA_CHAR: + case CAML_BA_SINT8: + case CAML_BA_UINT8: + ((int8 *) b->data)[offset] = Int_val(newval); break; + case CAML_BA_SINT16: + case CAML_BA_UINT16: + ((int16 *) b->data)[offset] = Int_val(newval); break; + case CAML_BA_INT32: + ((int32_t *) b->data)[offset] = Int32_val(newval); break; + case CAML_BA_INT64: + ((int64_t *) b->data)[offset] = Int64_val(newval); break; + case CAML_BA_NATIVE_INT: + ((intnat *) b->data)[offset] = Nativeint_val(newval); break; + case CAML_BA_CAML_INT: + ((intnat *) b->data)[offset] = Long_val(newval); break; + case CAML_BA_COMPLEX32: + { float * p = ((float *) b->data) + offset * 2; + p[0] = Double_field(newval, 0); + p[1] = Double_field(newval, 1); + break; } + case CAML_BA_COMPLEX64: + { double * p = ((double *) b->data) + offset * 2; + p[0] = Double_field(newval, 0); + p[1] = Double_field(newval, 1); + break; } + } + return Val_unit; +} + +CAMLprim value caml_ba_set_1(value vb, value vind1, value newval) +{ + return caml_ba_set_aux(vb, &vind1, 1, newval); +} + +CAMLprim value caml_ba_set_2(value vb, value vind1, value vind2, value newval) +{ + value vind[2]; + vind[0] = vind1; vind[1] = vind2; + return caml_ba_set_aux(vb, vind, 2, newval); +} + +CAMLprim value caml_ba_set_3(value vb, value vind1, value vind2, value vind3, + value newval) +{ + value vind[3]; + vind[0] = vind1; vind[1] = vind2; vind[2] = vind3; + return caml_ba_set_aux(vb, vind, 3, newval); +} + +value caml_ba_set_N(value vb, value * vind, int nargs) +{ + return caml_ba_set_aux(vb, vind, nargs - 1, vind[nargs - 1]); +} + +CAMLprim value caml_ba_set_generic(value vb, value vind, value newval) +{ + return caml_ba_set_aux(vb, &Field(vind, 0), Wosize_val(vind), newval); +} + +CAMLprim value caml_ba_uint8_set16(value vb, value vind, value newval) +{ + unsigned char b1, b2; + intnat val; + intnat idx = Long_val(vind); + struct caml_ba_array * b = Caml_ba_array_val(vb); + if (idx < 0 || idx >= b->dim[0] - 1) caml_array_bound_error(); + val = Long_val(newval); +#ifdef ARCH_BIG_ENDIAN + b1 = 0xFF & val >> 8; + b2 = 0xFF & val; +#else + b2 = 0xFF & val >> 8; + b1 = 0xFF & val; +#endif + ((unsigned char*) b->data)[idx] = b1; + ((unsigned char*) b->data)[idx+1] = b2; + return Val_unit; +} + +CAMLprim value caml_ba_uint8_set32(value vb, value vind, value newval) +{ + unsigned char b1, b2, b3, b4; + intnat idx = Long_val(vind); + intnat val; + struct caml_ba_array * b = Caml_ba_array_val(vb); + if (idx < 0 || idx >= b->dim[0] - 3) caml_array_bound_error(); + val = Int32_val(newval); +#ifdef ARCH_BIG_ENDIAN + b1 = 0xFF & val >> 24; + b2 = 0xFF & val >> 16; + b3 = 0xFF & val >> 8; + b4 = 0xFF & val; +#else + b4 = 0xFF & val >> 24; + b3 = 0xFF & val >> 16; + b2 = 0xFF & val >> 8; + b1 = 0xFF & val; +#endif + ((unsigned char*) b->data)[idx] = b1; + ((unsigned char*) b->data)[idx+1] = b2; + ((unsigned char*) b->data)[idx+2] = b3; + ((unsigned char*) b->data)[idx+3] = b4; + return Val_unit; +} + +CAMLprim value caml_ba_uint8_set64(value vb, value vind, value newval) +{ + unsigned char b1, b2, b3, b4, b5, b6, b7, b8; + intnat idx = Long_val(vind); + int64_t val; + struct caml_ba_array * b = Caml_ba_array_val(vb); + if (idx < 0 || idx >= b->dim[0] - 7) caml_array_bound_error(); + val = Int64_val(newval); +#ifdef ARCH_BIG_ENDIAN + b1 = 0xFF & val >> 56; + b2 = 0xFF & val >> 48; + b3 = 0xFF & val >> 40; + b4 = 0xFF & val >> 32; + b5 = 0xFF & val >> 24; + b6 = 0xFF & val >> 16; + b7 = 0xFF & val >> 8; + b8 = 0xFF & val; +#else + b8 = 0xFF & val >> 56; + b7 = 0xFF & val >> 48; + b6 = 0xFF & val >> 40; + b5 = 0xFF & val >> 32; + b4 = 0xFF & val >> 24; + b3 = 0xFF & val >> 16; + b2 = 0xFF & val >> 8; + b1 = 0xFF & val; +#endif + ((unsigned char*) b->data)[idx] = b1; + ((unsigned char*) b->data)[idx+1] = b2; + ((unsigned char*) b->data)[idx+2] = b3; + ((unsigned char*) b->data)[idx+3] = b4; + ((unsigned char*) b->data)[idx+4] = b5; + ((unsigned char*) b->data)[idx+5] = b6; + ((unsigned char*) b->data)[idx+6] = b7; + ((unsigned char*) b->data)[idx+7] = b8; + return Val_unit; +} + +/* Return the number of dimensions of a big array */ + +CAMLprim value caml_ba_num_dims(value vb) +{ + struct caml_ba_array * b = Caml_ba_array_val(vb); + return Val_long(b->num_dims); +} + +/* Return the n-th dimension of a big array */ + +CAMLprim value caml_ba_dim(value vb, value vn) +{ + struct caml_ba_array * b = Caml_ba_array_val(vb); + intnat n = Long_val(vn); + if (n < 0 || n >= b->num_dims) caml_invalid_argument("Bigarray.dim"); + return Val_long(b->dim[n]); +} + +CAMLprim value caml_ba_dim_1(value vb) +{ + return caml_ba_dim(vb, Val_int(0)); +} + +CAMLprim value caml_ba_dim_2(value vb) +{ + return caml_ba_dim(vb, Val_int(1)); +} + +CAMLprim value caml_ba_dim_3(value vb) +{ + return caml_ba_dim(vb, Val_int(2)); +} + +/* Return the kind of a big array */ + +CAMLprim value caml_ba_kind(value vb) +{ + return Val_caml_ba_kind(Caml_ba_array_val(vb)->flags & CAML_BA_KIND_MASK); +} + +/* Return the layout of a big array */ + +CAMLprim value caml_ba_layout(value vb) +{ + int layout = Caml_ba_array_val(vb)->flags & CAML_BA_LAYOUT_MASK; + return Val_caml_ba_layout(layout); +} + +/* Create / update proxy to indicate that b2 is a sub-array of b1 */ + +static void caml_ba_update_proxy(struct caml_ba_array * b1, + struct caml_ba_array * b2) +{ + struct caml_ba_proxy * proxy; + /* Nothing to do for un-managed arrays */ + if ((b1->flags & CAML_BA_MANAGED_MASK) == CAML_BA_EXTERNAL) return; + if (b1->proxy != NULL) { + /* If b1 is already a proxy for a larger array, increment refcount of + proxy */ + b2->proxy = b1->proxy; + ++ b1->proxy->refcount; + } else { + /* Otherwise, create proxy and attach it to both b1 and b2 */ + proxy = malloc(sizeof(struct caml_ba_proxy)); + if (proxy == NULL) caml_raise_out_of_memory(); + proxy->refcount = 2; /* original array + sub array */ + proxy->data = b1->data; + proxy->size = + b1->flags & CAML_BA_MAPPED_FILE ? caml_ba_byte_size(b1) : 0; + b1->proxy = proxy; + b2->proxy = proxy; + } +} + +/* Slicing */ + +CAMLprim value caml_ba_slice(value vb, value vind) +{ + CAMLparam2 (vb, vind); + #define b ((struct caml_ba_array *) Caml_ba_array_val(vb)) + CAMLlocal1 (res); + intnat index[CAML_BA_MAX_NUM_DIMS]; + int num_inds, i; + intnat offset; + intnat * sub_dims; + char * sub_data; + + /* Check number of indices <= number of dimensions of array */ + num_inds = Wosize_val(vind); + if (num_inds > b->num_dims) + caml_invalid_argument("Bigarray.slice: too many indices"); + /* Compute offset and check bounds */ + if ((b->flags & CAML_BA_LAYOUT_MASK) == CAML_BA_C_LAYOUT) { + /* We slice from the left */ + for (i = 0; i < num_inds; i++) index[i] = Long_val(Field(vind, i)); + for (/*nothing*/; i < b->num_dims; i++) index[i] = 0; + offset = caml_ba_offset(b, index); + sub_dims = b->dim + num_inds; + } else { + /* We slice from the right */ + for (i = 0; i < num_inds; i++) + index[b->num_dims - num_inds + i] = Long_val(Field(vind, i)); + for (i = 0; i < b->num_dims - num_inds; i++) index[i] = 1; + offset = caml_ba_offset(b, index); + sub_dims = b->dim; + } + sub_data = + (char *) b->data + + offset * caml_ba_element_size[b->flags & CAML_BA_KIND_MASK]; + /* Allocate an OCaml bigarray to hold the result */ + res = caml_ba_alloc(b->flags, b->num_dims - num_inds, sub_data, sub_dims); + /* Create or update proxy in case of managed bigarray */ + caml_ba_update_proxy(b, Caml_ba_array_val(res)); + /* Return result */ + CAMLreturn (res); + + #undef b +} + +/* Changing the layout of an array (memory is shared) */ + +CAMLprim value caml_ba_change_layout(value vb, value vlayout) +{ + CAMLparam2 (vb, vlayout); + CAMLlocal1 (res); + #define b ((struct caml_ba_array *) Caml_ba_array_val(vb)) + /* if the layout is different, change the flags and reverse the dimensions */ + if (Caml_ba_layout_val(vlayout) != (b->flags & CAML_BA_LAYOUT_MASK)) { + /* change the flags to reflect the new layout */ + int flags = (b->flags & (CAML_BA_KIND_MASK | CAML_BA_MANAGED_MASK)) + | Caml_ba_layout_val(vlayout); + /* reverse the dimensions */ + intnat new_dim[CAML_BA_MAX_NUM_DIMS]; + unsigned int i; + for(i = 0; i < b->num_dims; i++) new_dim[i] = b->dim[b->num_dims - i - 1]; + res = caml_ba_alloc(flags, b->num_dims, b->data, new_dim); + caml_ba_update_proxy(b, Caml_ba_array_val(res)); + CAMLreturn(res); + } else { + /* otherwise, do nothing */ + CAMLreturn(vb); + } + #undef b +} + + +/* Extracting a sub-array of same number of dimensions */ + +CAMLprim value caml_ba_sub(value vb, value vofs, value vlen) +{ + CAMLparam3 (vb, vofs, vlen); + CAMLlocal1 (res); + #define b ((struct caml_ba_array *) Caml_ba_array_val(vb)) + intnat ofs = Long_val(vofs); + intnat len = Long_val(vlen); + int i, changed_dim; + intnat mul; + char * sub_data; + + /* Compute offset and check bounds */ + if ((b->flags & CAML_BA_LAYOUT_MASK) == CAML_BA_C_LAYOUT) { + /* We reduce the first dimension */ + mul = 1; + for (i = 1; i < b->num_dims; i++) mul *= b->dim[i]; + changed_dim = 0; + } else { + /* We reduce the last dimension */ + mul = 1; + for (i = 0; i < b->num_dims - 1; i++) mul *= b->dim[i]; + changed_dim = b->num_dims - 1; + ofs--; /* Fortran arrays start at 1 */ + } + if (ofs < 0 || len < 0 || ofs + len > b->dim[changed_dim]) + caml_invalid_argument("Bigarray.sub: bad sub-array"); + sub_data = + (char *) b->data + + ofs * mul * caml_ba_element_size[b->flags & CAML_BA_KIND_MASK]; + /* Allocate an OCaml bigarray to hold the result */ + res = caml_ba_alloc(b->flags, b->num_dims, sub_data, b->dim); + /* Doctor the changed dimension */ + Caml_ba_array_val(res)->dim[changed_dim] = len; + /* Create or update proxy in case of managed bigarray */ + caml_ba_update_proxy(b, Caml_ba_array_val(res)); + /* Return result */ + CAMLreturn (res); + + #undef b +} + +/* Copying a big array into another one */ + +#define LEAVE_RUNTIME_OP_CUTOFF 4096 +#define is_mmapped(ba) ((ba)->flags & CAML_BA_MAPPED_FILE) + +CAMLprim value caml_ba_blit(value vsrc, value vdst) +{ + CAMLparam2(vsrc, vdst); + struct caml_ba_array * src = Caml_ba_array_val(vsrc); + struct caml_ba_array * dst = Caml_ba_array_val(vdst); + void *src_data = src->data; + void *dst_data = dst->data; + int i; + intnat num_bytes; + int leave_runtime; + + /* Check same numbers of dimensions and same dimensions */ + if (src->num_dims != dst->num_dims) goto blit_error; + for (i = 0; i < src->num_dims; i++) + if (src->dim[i] != dst->dim[i]) goto blit_error; + /* Compute number of bytes in array data */ + num_bytes = + caml_ba_num_elts(src) + * caml_ba_element_size[src->flags & CAML_BA_KIND_MASK]; + leave_runtime = + ( + (num_bytes >= LEAVE_RUNTIME_OP_CUTOFF*sizeof(long)) + || is_mmapped(src) + || is_mmapped(dst) + ); + /* Do the copying */ + if (leave_runtime) caml_enter_blocking_section(); + memmove (dst_data, src_data, num_bytes); + if (leave_runtime) caml_leave_blocking_section(); + CAMLreturn (Val_unit); + blit_error: + caml_invalid_argument("Bigarray.blit: dimension mismatch"); + CAMLreturn (Val_unit); /* not reached */ +} + +/* Filling a big array with a given value */ + +#define FILL_GEN_LOOP(n_ops, loop) do{ \ + int leave_runtime = ((n_ops >= LEAVE_RUNTIME_OP_CUTOFF) || is_mmapped(b)); \ + if (leave_runtime) caml_enter_blocking_section(); \ + loop; \ + if (leave_runtime) caml_leave_blocking_section(); \ +}while(0) + +#define FILL_SCALAR_LOOP \ + FILL_GEN_LOOP(num_elts, \ + for (p = data; num_elts > 0; p++, num_elts--) *p = init) + +#define FILL_COMPLEX_LOOP \ + FILL_GEN_LOOP(num_elts + num_elts, \ + for (p = data; num_elts > 0; num_elts--) { *p++ = init0; *p++ = init1; }) + +CAMLprim value caml_ba_fill(value vb, value vinit) +{ + CAMLparam1(vb); + struct caml_ba_array * b = Caml_ba_array_val(vb); + void *data = b->data; + intnat num_elts = caml_ba_num_elts(b); + + switch (b->flags & CAML_BA_KIND_MASK) { + default: + CAMLassert(0); + case CAML_BA_FLOAT32: { + float init = Double_val(vinit); + float * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_FLOAT64: { + double init = Double_val(vinit); + double * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_CHAR: + case CAML_BA_SINT8: + case CAML_BA_UINT8: { + int init = Int_val(vinit); + unsigned char * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_SINT16: + case CAML_BA_UINT16: { + int init = Int_val(vinit); + int16 * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_INT32: { + int32_t init = Int32_val(vinit); + int32_t * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_INT64: { + int64_t init = Int64_val(vinit); + int64_t * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_NATIVE_INT: { + intnat init = Nativeint_val(vinit); + intnat * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_CAML_INT: { + intnat init = Long_val(vinit); + intnat * p; + FILL_SCALAR_LOOP; + break; + } + case CAML_BA_COMPLEX32: { + float init0 = Double_field(vinit, 0); + float init1 = Double_field(vinit, 1); + float * p; + FILL_COMPLEX_LOOP; + break; + } + case CAML_BA_COMPLEX64: { + double init0 = Double_field(vinit, 0); + double init1 = Double_field(vinit, 1); + double * p; + FILL_COMPLEX_LOOP; + break; + } + } + CAMLreturn (Val_unit); +} + +/* Reshape an array: change dimensions and number of dimensions, preserving + array contents */ + +CAMLprim value caml_ba_reshape(value vb, value vdim) +{ + CAMLparam2 (vb, vdim); + CAMLlocal1 (res); +#define b ((struct caml_ba_array *) Caml_ba_array_val(vb)) + intnat dim[CAML_BA_MAX_NUM_DIMS]; + mlsize_t num_dims; + uintnat num_elts; + int i; + + num_dims = Wosize_val(vdim); + /* here num_dims is unsigned (mlsize_t) so no need to check (num_dims >= 0) */ + if (num_dims > CAML_BA_MAX_NUM_DIMS) + caml_invalid_argument("Bigarray.reshape: bad number of dimensions"); + num_elts = 1; + for (i = 0; i < num_dims; i++) { + dim[i] = Long_val(Field(vdim, i)); + if (dim[i] < 0) + caml_invalid_argument("Bigarray.reshape: negative dimension"); + num_elts *= dim[i]; + } + /* Check that sizes agree */ + if (num_elts != caml_ba_num_elts(b)) + caml_invalid_argument("Bigarray.reshape: size mismatch"); + /* Create bigarray with same data and new dimensions */ + res = caml_ba_alloc(b->flags, num_dims, b->data, dim); + /* Create or update proxy in case of managed bigarray */ + caml_ba_update_proxy(b, Caml_ba_array_val(res)); + /* Return result */ + CAMLreturn (res); + +#undef b +} diff --git a/byterun/caml/config.h b/byterun/caml/config.h index cee3e8bf..0dba12ad 100644 --- a/byterun/caml/config.h +++ b/byterun/caml/config.h @@ -29,6 +29,8 @@ #include "compatibility.h" #endif +#include + #ifdef HAS_STDINT_H #include #endif diff --git a/byterun/caml/exec.h b/byterun/caml/exec.h index f39747ac..38ab7ae8 100644 --- a/byterun/caml/exec.h +++ b/byterun/caml/exec.h @@ -58,7 +58,7 @@ struct exec_trailer { /* Magic number for this release */ -#define EXEC_MAGIC "Caml1999X011" +#define EXEC_MAGIC "Caml1999X023" #endif /* CAML_INTERNALS */ diff --git a/byterun/caml/instruct.h b/byterun/caml/instruct.h index 73798333..5c10df4f 100644 --- a/byterun/caml/instruct.h +++ b/byterun/caml/instruct.h @@ -41,7 +41,7 @@ enum instructions { GETFIELD0, GETFIELD1, GETFIELD2, GETFIELD3, GETFIELD, GETFLOATFIELD, SETFIELD0, SETFIELD1, SETFIELD2, SETFIELD3, SETFIELD, SETFLOATFIELD, VECTLENGTH, GETVECTITEM, SETVECTITEM, - GETSTRINGCHAR, SETSTRINGCHAR, + GETBYTESCHAR, SETBYTESCHAR, BRANCH, BRANCHIF, BRANCHIFNOT, SWITCH, BOOLNOT, PUSHTRAP, POPTRAP, RAISE, CHECK_SIGNALS, @@ -60,6 +60,7 @@ enum instructions { STOP, EVENT, BREAK, RERAISE, RAISE_NOTRACE, + GETSTRINGCHAR, FIRST_UNIMPLEMENTED_OP}; #endif /* CAML_INTERNALS */ diff --git a/byterun/caml/intext.h b/byterun/caml/intext.h index 673c6fc0..f67c98b5 100644 --- a/byterun/caml/intext.h +++ b/byterun/caml/intext.h @@ -196,7 +196,7 @@ struct code_fragment { CAMLextern struct code_fragment * caml_extern_find_code(char *addr); -struct ext_table caml_code_fragments_table; +extern struct ext_table caml_code_fragments_table; #endif /* CAML_INTERNALS */ diff --git a/byterun/caml/major_gc.h b/byterun/caml/major_gc.h index a6c42d9e..813f8a78 100644 --- a/byterun/caml/major_gc.h +++ b/byterun/caml/major_gc.h @@ -64,9 +64,9 @@ extern uintnat total_heap_size; extern char *caml_gc_sweep_hp; extern int caml_major_window; -double caml_major_ring[Max_major_window]; -int caml_major_ring_index; -double caml_major_work_credit; +extern double caml_major_ring[Max_major_window]; +extern int caml_major_ring_index; +extern double caml_major_work_credit; extern double caml_gc_clock; /* [caml_major_gc_hook] is called just between the end of the mark diff --git a/byterun/caml/minor_gc.h b/byterun/caml/minor_gc.h index e0820752..6c48c761 100644 --- a/byterun/caml/minor_gc.h +++ b/byterun/caml/minor_gc.h @@ -26,6 +26,7 @@ CAMLextern value *caml_young_ptr, *caml_young_limit; CAMLextern value *caml_young_trigger; extern asize_t caml_minor_heap_wsz; extern int caml_in_minor_collection; +extern double caml_extra_heap_resources_minor; #define CAML_TABLE_STRUCT(t) { \ t *base; \ diff --git a/byterun/caml/misc.h b/byterun/caml/misc.h index d9a7b768..8244de93 100644 --- a/byterun/caml/misc.h +++ b/byterun/caml/misc.h @@ -180,10 +180,8 @@ typedef wchar_t char_os; #define rename_os caml_win32_rename #define chdir_os _wchdir #define getcwd_os _wgetcwd -#define getenv_os _wgetenv #define system_os _wsystem #define rmdir_os _wrmdir -#define utime_os _wutime #define putenv_os _wputenv #define chmod_os _wchmod #define execv_os _wexecv @@ -214,10 +212,8 @@ typedef char char_os; #define rename_os rename #define chdir_os chdir #define getcwd_os getcwd -#define getenv_os getenv #define system_os system #define rmdir_os rmdir -#define utime_os utime #define putenv_os putenv #define chmod_os chmod #define execv_os execv @@ -252,7 +248,7 @@ typedef char char_os; #define CAML_SYS_UNLINK(filename) unlink_os(filename) #define CAML_SYS_RENAME(old_name,new_name) rename_os(old_name, new_name) #define CAML_SYS_CHDIR(dirname) chdir_os(dirname) -#define CAML_SYS_GETENV(varname) getenv_os(varname) +#define CAML_SYS_GETENV(varname) getenv(varname) #define CAML_SYS_SYSTEM(command) system_os(command) #define CAML_SYS_READ_DIRECTORY(dirname,tbl) caml_read_directory(dirname,tbl) @@ -306,7 +302,7 @@ extern intnat (*caml_cplugins_prim)(int,intnat,intnat,intnat); #define CAML_SYS_CHDIR(dirname) \ CAML_SYS_PRIM_1(CAML_CPLUGINS_CHDIR,chdir_os,dirname) #define CAML_SYS_GETENV(varname) \ - CAML_SYS_STRING_PRIM_1(CAML_CPLUGINS_GETENV,getenv_os,varname) + CAML_SYS_STRING_PRIM_1(CAML_CPLUGINS_GETENV,getenv,varname) #define CAML_SYS_SYSTEM(command) \ CAML_SYS_PRIM_1(CAML_CPLUGINS_SYSTEM,system_os,command) #define CAML_SYS_READ_DIRECTORY(dirname,tbl) \ diff --git a/byterun/caml/osdeps.h b/byterun/caml/osdeps.h index bc75a7de..b65503d8 100644 --- a/byterun/caml/osdeps.h +++ b/byterun/caml/osdeps.h @@ -98,6 +98,11 @@ extern char_os * caml_executable_name(void); */ extern char_os *caml_secure_getenv(char_os const *var); +/* If [fd] refers to a terminal or console, return the number of rows + (lines) that it displays. Otherwise, or if the number of rows + cannot be determined, return -1. */ +extern int caml_num_rows_fd(int fd); + #ifdef _WIN32 extern int caml_win32_rename(const wchar_t *, const wchar_t *); @@ -106,6 +111,8 @@ extern void caml_probe_win32_version(void); extern void caml_setup_win32_terminal(void); extern void caml_restore_win32_terminal(void); +extern wchar_t *caml_win32_getenv(wchar_t const *); + /* Windows Unicode support */ extern int win_multi_byte_to_wide_char(const char* s, int slen, wchar_t *out, int outlen); @@ -136,6 +143,8 @@ extern char* caml_stat_strdup_of_utf16(const wchar_t *s); */ extern value caml_copy_string_of_utf16(const wchar_t *s); +extern int caml_win32_isatty(int fd); + #endif /* _WIN32 */ #endif /* CAML_INTERNALS */ diff --git a/byterun/caml/spacetime.h b/byterun/caml/spacetime.h index df1193e2..5bcc9232 100644 --- a/byterun/caml/spacetime.h +++ b/byterun/caml/spacetime.h @@ -15,11 +15,9 @@ #ifndef CAML_SPACETIME_H #define CAML_SPACETIME_H -#ifdef NATIVE_CODE - -#include "caml/io.h" -#include "caml/misc.h" -#include "caml/stack.h" +#include "io.h" +#include "misc.h" +#include "stack.h" /* Runtime support for Spacetime profiling. * This header file is not intended for the casual user. @@ -202,7 +200,4 @@ extern void caml_spacetime_automatic_snapshot (void); #define Get_my_profinfo_with_cached_backtrace(profinfo, size) \ profinfo = (uintnat) 0; -#endif /* NATIVE_CODE */ - - #endif diff --git a/byterun/custom.c b/byterun/custom.c index f68eac95..b6a5c4e3 100644 --- a/byterun/custom.c +++ b/byterun/custom.c @@ -22,6 +22,7 @@ #include "caml/fail.h" #include "caml/memory.h" #include "caml/mlvalues.h" +#include "caml/signals.h" /* [size] is a number of bytes */ CAMLexport value caml_alloc_custom(struct custom_operations * ops, @@ -30,7 +31,8 @@ CAMLexport value caml_alloc_custom(struct custom_operations * ops, mlsize_t max) { mlsize_t wosize; - value result; + CAMLparam0(); + CAMLlocal1(result); wosize = 1 + (size + sizeof(value) - 1) / sizeof(value); if (wosize <= Max_young_wosize) { @@ -39,6 +41,16 @@ CAMLexport value caml_alloc_custom(struct custom_operations * ops, if (ops->finalize != NULL || mem != 0) { /* Remember that the block needs processing after minor GC. */ add_to_custom_table (&caml_custom_table, result, mem, max); + /* Keep track of extra resources held by custom block in + minor heap. */ + if (mem != 0) { + if (max == 0) max = 1; + caml_extra_heap_resources_minor += (double) mem / (double) max; + if (caml_extra_heap_resources_minor > 1.0) { + caml_request_minor_gc (); + caml_gc_dispatch (); + } + } } } else { result = caml_alloc_shr(wosize, Custom_tag); @@ -46,7 +58,7 @@ CAMLexport value caml_alloc_custom(struct custom_operations * ops, caml_adjust_gc_speed(mem, max); result = caml_check_urgent_gc(result); } - return result; + CAMLreturn(result); } struct custom_operations_list { diff --git a/byterun/debugger.c b/byterun/debugger.c index 71536774..1c416cd6 100644 --- a/byterun/debugger.c +++ b/byterun/debugger.c @@ -165,6 +165,7 @@ void caml_debugger_init(void) { char * address; char_os * a; + size_t a_len; char * port, * p; struct hostent * host; int n; @@ -194,11 +195,16 @@ void caml_debugger_init(void) /* Unix domain */ sock_domain = PF_UNIX; sock_addr.s_unix.sun_family = AF_UNIX; + a_len = strlen(address); + if (a_len >= sizeof(sock_addr.s_unix.sun_path)) { + caml_fatal_error("Debug socket path length exceeds maximum permitted length"); + } strncpy(sock_addr.s_unix.sun_path, address, - sizeof(sock_addr.s_unix.sun_path)); + sizeof(sock_addr.s_unix.sun_path) - 1); + sock_addr.s_unix.sun_path[sizeof(sock_addr.s_unix.sun_path) - 1] = '\0'; sock_addr_len = ((char *)&(sock_addr.s_unix.sun_path) - (char *)&(sock_addr.s_unix)) - + strlen(address); + + a_len; #else caml_fatal_error("Unix sockets not supported"); #endif diff --git a/byterun/extern.c b/byterun/extern.c index adebc910..db7163cd 100644 --- a/byterun/extern.c +++ b/byterun/extern.c @@ -694,7 +694,7 @@ CAMLprim value caml_output_value(value vchan, value v, value flags) CAMLreturn (Val_unit); } -CAMLprim value caml_output_value_to_string(value v, value flags) +CAMLprim value caml_output_value_to_bytes(value v, value flags) { char header[32]; int header_len; @@ -722,6 +722,11 @@ CAMLprim value caml_output_value_to_string(value v, value flags) return res; } +CAMLprim value caml_output_value_to_string(value v, value flags) +{ + return caml_output_value_to_bytes(v,flags); +} + CAMLexport intnat caml_output_value_to_block(value v, value flags, char * buf, intnat len) { diff --git a/byterun/finalise.c b/byterun/finalise.c index 9a41ff78..d34913fb 100644 --- a/byterun/finalise.c +++ b/byterun/finalise.c @@ -314,7 +314,7 @@ static void generic_final_minor_update (struct finalisable * final) CAMLassert (Is_block (final->table[i].val)); CAMLassert (Is_in_heap_or_young (final->table[i].val)); CAMLassert (Tag_val (final->table[i].val) != Forward_tag); - if(Is_young(final->table[j].val) && Hd_val(final->table[i].val) != 0){ + if(Is_young(final->table[i].val) && Hd_val(final->table[i].val) != 0){ /** dead */ to_do_tl->item[k] = final->table[i]; /* The finalisation function is called with unit not with the value */ diff --git a/byterun/fix_code.c b/byterun/fix_code.c index 68f97c99..ec2f08cc 100644 --- a/byterun/fix_code.c +++ b/byterun/fix_code.c @@ -38,6 +38,7 @@ code_t caml_start_code; asize_t caml_code_size; unsigned char * caml_saved_code; +struct ext_table caml_code_fragments_table; /* Read the main bytecode block from a file */ diff --git a/byterun/floats.c b/byterun/floats.c index fe313c59..4d2494cf 100644 --- a/byterun/floats.c +++ b/byterun/floats.c @@ -226,7 +226,19 @@ static int caml_float_of_hex(const char * s, double * res) if (*s == 0) return -1; /* nothing after exponent mark */ e = strtol(s, &p, 10); if (*p != 0) return -1; /* ill-formed exponent */ - if (e < INT_MIN || e > INT_MAX) return -1; /* unreasonable exponent */ + /* Handle exponents larger than int by returning 0/∞ directly. + Mind that INT_MIN/INT_MAX are included in the test so as to capture + the overflow case of strtol on Win64 — long and int have the same + size there. */ + if (e <= INT_MIN) { + *res = 0.; + return 0; + } + else if (e >= INT_MAX) { + *res = m == 0 ? 0. : HUGE_VAL; + return 0; + } + /* regular exponent value */ exp = e; s = p; /* stop at next loop iteration */ break; @@ -261,8 +273,17 @@ static int caml_float_of_hex(const char * s, double * res) on several architectures. */ f = (double) (int64_t) m; /* Adjust exponent to take decimal point and extra digits into account */ - if (dec_point >= 0) exp = exp + (dec_point - n_bits); - exp = exp + x_bits; + { + int adj = x_bits; + if (dec_point >= 0) adj = adj + (dec_point - n_bits); + /* saturated addition exp + adj */ + if (adj > 0 && exp > INT_MAX - adj) + exp = INT_MAX; + else if (adj < 0 && exp < INT_MIN - adj) + exp = INT_MIN; + else + exp = exp + adj; + } /* Apply exponent if needed */ if (exp != 0) f = ldexp(f, exp); /* Done! */ diff --git a/byterun/freelist.c b/byterun/freelist.c index 26c1d9c4..915eb9f9 100644 --- a/byterun/freelist.c +++ b/byterun/freelist.c @@ -534,9 +534,13 @@ header_t *caml_fl_merge_block (value bp) */ void caml_fl_add_blocks (value bp) { + value cur = bp; CAMLassert (fl_last != Val_NULL); CAMLassert (Next (fl_last) == Val_NULL); - caml_fl_cur_wsz += Whsize_bp (bp); + do { + caml_fl_cur_wsz += Whsize_bp (cur); + cur = Field(cur, 0); + } while (cur != Val_NULL); if (bp > fl_last){ Next (fl_last) = bp; @@ -547,7 +551,7 @@ void caml_fl_add_blocks (value bp) flp [flp_size++] = fl_last; } }else{ - value cur, prev; + value prev; prev = Fl_head; cur = Next (prev); diff --git a/byterun/intern.c b/byterun/intern.c index ba78846f..565ed10d 100644 --- a/byterun/intern.c +++ b/byterun/intern.c @@ -777,7 +777,7 @@ CAMLprim value caml_input_value_to_outside_heap(value vchan) CAMLreturn (res); } -CAMLexport value caml_input_val_from_string(value str, intnat ofs) +CAMLexport value caml_input_val_from_bytes(value str, intnat ofs) { CAMLparam1 (str); CAMLlocal1 (obj); @@ -801,7 +801,12 @@ CAMLexport value caml_input_val_from_string(value str, intnat ofs) CAMLprim value caml_input_value_from_string(value str, value ofs) { - return caml_input_val_from_string(str, Long_val(ofs)); + return caml_input_val_from_bytes(str, Long_val(ofs)); +} + +CAMLprim value caml_input_value_from_bytes(value str, value ofs) +{ + return caml_input_val_from_bytes(str, Long_val(ofs)); } static value input_val_from_block(struct marshal_header * h) diff --git a/byterun/interp.c b/byterun/interp.c index 76e600c9..0b74df3d 100644 --- a/byterun/interp.c +++ b/byterun/interp.c @@ -778,13 +778,13 @@ value caml_interprete(code_t prog, asize_t prog_size) sp += 2; Next; -/* String operations */ - +/* Bytes/String operations */ Instruct(GETSTRINGCHAR): + Instruct(GETBYTESCHAR): accu = Val_int(Byte_u(accu, Long_val(sp[0]))); sp += 1; Next; - Instruct(SETSTRINGCHAR): + Instruct(SETBYTESCHAR): Byte_u(accu, Long_val(sp[0])) = Int_val(sp[1]); sp += 2; accu = Val_unit; @@ -1002,10 +1002,9 @@ value caml_interprete(code_t prog, asize_t prog_size) Instruct(LSLINT): accu = (value)((((intnat) accu - 1) << Long_val(*sp++)) + 1); Next; Instruct(LSRINT): - accu = (value)((((uintnat) accu - 1) >> Long_val(*sp++)) | 1); - Next; + accu = (value)((((uintnat) accu) >> Long_val(*sp++)) | 1); Next; Instruct(ASRINT): - accu = (value)((((intnat) accu - 1) >> Long_val(*sp++)) | 1); Next; + accu = (value)((((intnat) accu) >> Long_val(*sp++)) | 1); Next; #define Integer_comparison(typ,opname,tst) \ Instruct(opname): \ diff --git a/byterun/ints.c b/byterun/ints.c index a104a0ee..76ae11d4 100644 --- a/byterun/ints.c +++ b/byterun/ints.c @@ -687,7 +687,7 @@ static uintnat nativeint_deserialize(void * dst) default: caml_deserialize_error("input_value: ill-formed native integer"); } - return sizeof(long); + return sizeof(intnat); } CAMLexport struct custom_operations caml_nativeint_ops = { diff --git a/byterun/io.c b/byterun/io.c index 3d956019..d124b56a 100644 --- a/byterun/io.c +++ b/byterun/io.c @@ -821,3 +821,8 @@ CAMLprim value caml_ml_input_scan_line(value vchannel) Unlock(channel); CAMLreturn (Val_long(res)); } + +CAMLprim value caml_terminfo_rows(value vchannel) +{ + return Val_int(caml_num_rows_fd(Channel(vchannel)->fd)); +} diff --git a/byterun/memory.c b/byterun/memory.c index 69a81611..f92b23c4 100644 --- a/byterun/memory.c +++ b/byterun/memory.c @@ -728,10 +728,19 @@ struct pool_block { #endif struct pool_block *next; struct pool_block *prev; - union max_align data[1]; /* not allocated, used for alignment purposes */ + /* Use C99's flexible array types if possible */ +#if (__STDC_VERSION__ >= 199901L) + union max_align data[]; /* not allocated, used for alignment purposes */ +#else + union max_align data[1]; +#endif }; +#if (__STDC_VERSION__ >= 199901L) +#define SIZEOF_POOL_BLOCK sizeof(struct pool_block) +#else #define SIZEOF_POOL_BLOCK offsetof(struct pool_block, data) +#endif static struct pool_block *pool = NULL; diff --git a/byterun/minor_gc.c b/byterun/minor_gc.c index b59b055d..6aa5ed72 100644 --- a/byterun/minor_gc.c +++ b/byterun/minor_gc.c @@ -77,6 +77,8 @@ CAMLexport struct caml_custom_table int caml_in_minor_collection = 0; +double caml_extra_heap_resources_minor = 0; + /* [sz] and [rsv] are numbers of entries */ static void alloc_generic_table (struct generic_table *tbl, asize_t sz, asize_t rsv, asize_t element_size) @@ -394,6 +396,7 @@ void caml_empty_minor_heap (void) clear_table ((struct generic_table *) &caml_ref_table); clear_table ((struct generic_table *) &caml_ephe_ref_table); clear_table ((struct generic_table *) &caml_custom_table); + caml_extra_heap_resources_minor = 0; caml_gc_message (0x02, ">"); caml_in_minor_collection = 0; caml_final_empty_young (); diff --git a/byterun/signals.c b/byterun/signals.c index e092e8d0..15addf1b 100644 --- a/byterun/signals.c +++ b/byterun/signals.c @@ -144,12 +144,12 @@ void caml_execute_signal(int signal_number, int in_signal_handler) void* saved_spacetime_trie_node_ptr; #endif #ifdef POSIX_SIGNALS - sigset_t sigs; + sigset_t nsigs, sigs; /* Block the signal before executing the handler, and record in sigs the original signal mask */ - sigemptyset(&sigs); - sigaddset(&sigs, signal_number); - sigprocmask(SIG_BLOCK, &sigs, &sigs); + sigemptyset(&nsigs); + sigaddset(&nsigs, signal_number); + sigprocmask(SIG_BLOCK, &nsigs, &sigs); #endif #if defined(NATIVE_CODE) && defined(WITH_SPACETIME) /* We record the signal handler's execution separately, in the same diff --git a/byterun/str.c b/byterun/str.c index 2eeceb55..8e07cb03 100644 --- a/byterun/str.c +++ b/byterun/str.c @@ -126,6 +126,11 @@ CAMLprim value caml_string_get16(value str, value index) return Val_int(res); } +CAMLprim value caml_bytes_get16(value str, value index) +{ + return caml_string_get16(str,index); +} + CAMLprim value caml_string_get32(value str, value index) { intnat res; @@ -144,6 +149,11 @@ CAMLprim value caml_string_get32(value str, value index) return caml_copy_int32(res); } +CAMLprim value caml_bytes_get32(value str, value index) +{ + return caml_string_get32(str,index); +} + CAMLprim value caml_string_get64(value str, value index) { uint64_t res; @@ -172,7 +182,12 @@ CAMLprim value caml_string_get64(value str, value index) return caml_copy_int64(res); } -CAMLprim value caml_string_set16(value str, value index, value newval) +CAMLprim value caml_bytes_get64(value str, value index) +{ + return caml_string_get64(str,index); +} + +CAMLprim value caml_bytes_set16(value str, value index, value newval) { unsigned char b1, b2; intnat val; @@ -191,7 +206,7 @@ CAMLprim value caml_string_set16(value str, value index, value newval) return Val_unit; } -CAMLprim value caml_string_set32(value str, value index, value newval) +CAMLprim value caml_bytes_set32(value str, value index, value newval) { unsigned char b1, b2, b3, b4; intnat val; @@ -216,7 +231,7 @@ CAMLprim value caml_string_set32(value str, value index, value newval) return Val_unit; } -CAMLprim value caml_string_set64(value str, value index, value newval) +CAMLprim value caml_bytes_set64(value str, value index, value newval) { unsigned char b1, b2, b3, b4, b5, b6, b7, b8; int64_t val; @@ -372,12 +387,6 @@ CAMLprim value caml_fill_string(value s, value offset, value len, value init) return caml_fill_bytes (s, offset, len, init); } -CAMLprim value caml_bitvect_test(value bv, value n) -{ - intnat pos = Long_val(n); - return Val_int(Byte_u(bv, pos >> 3) & (1 << (pos & 7))); -} - CAMLexport value caml_alloc_sprintf(const char * format, ...) { va_list args; @@ -453,3 +462,13 @@ CAMLexport value caml_alloc_sprintf(const char * format, ...) return res; #endif } + +CAMLprim value caml_string_of_bytes(value bv) +{ + return bv; +} + +CAMLprim value caml_bytes_of_string(value bv) +{ + return bv; +} diff --git a/byterun/sys.c b/byterun/sys.c index a46b6be4..a428fd73 100644 --- a/byterun/sys.c +++ b/byterun/sys.c @@ -27,7 +27,6 @@ #include #include #ifdef _WIN32 -#include /* for isatty */ #include /* for _wchdir and _wgetcwd */ #else #include @@ -333,25 +332,43 @@ CAMLprim value caml_sys_getcwd(value unit) CAMLprim value caml_sys_unsafe_getenv(value var) { char_os * res, * p; + value val; if (! caml_string_is_c_safe(var)) caml_raise_not_found(); p = caml_stat_strdup_to_os(String_val(var)); +#ifdef _WIN32 + res = caml_win32_getenv(p); +#else res = CAML_SYS_GETENV(p); +#endif caml_stat_free(p); if (res == 0) caml_raise_not_found(); - return caml_copy_string_of_os(res); + val = caml_copy_string_of_os(res); +#ifdef _WIN32 + caml_stat_free(res); +#endif + return val; } CAMLprim value caml_sys_getenv(value var) { char_os * res, * p; + value val; if (! caml_string_is_c_safe(var)) caml_raise_not_found(); p = caml_stat_strdup_to_os(String_val(var)); +#ifdef _WIN32 + res = caml_win32_getenv(p); +#else res = caml_secure_getenv(p); +#endif caml_stat_free(p); if (res == 0) caml_raise_not_found(); - return caml_copy_string_of_os(res); + val = caml_copy_string_of_os(res); +#ifdef _WIN32 + caml_stat_free(res); +#endif + return val; } char_os * caml_exe_name; @@ -619,8 +636,7 @@ CAMLprim value caml_sys_isatty(value chan) fd = (Channel(chan))->fd; #ifdef _WIN32 - ret = Val_bool(_isatty(fd)); - /* https://msdn.microsoft.com/en-us/library/f4s0ddew.aspx */ + ret = Val_bool(caml_win32_isatty(fd)); #else ret = Val_bool(isatty(fd)); #endif diff --git a/byterun/terminfo.c b/byterun/terminfo.c deleted file mode 100644 index 3f3401ee..00000000 --- a/byterun/terminfo.c +++ /dev/null @@ -1,134 +0,0 @@ -/**************************************************************************/ -/* */ -/* OCaml */ -/* */ -/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ -/* */ -/* Copyright 1996 Institut National de Recherche en Informatique et */ -/* en Automatique. */ -/* */ -/* All rights reserved. This file is distributed under the terms of */ -/* the GNU Lesser General Public License version 2.1, with the */ -/* special exception on linking described in the file LICENSE. */ -/* */ -/**************************************************************************/ - -#define CAML_INTERNALS - -/* Read and output terminal commands */ - -#include "caml/config.h" -#include "caml/alloc.h" -#include "caml/fail.h" -#include "caml/io.h" -#include "caml/mlvalues.h" - -#define Uninitialised (Val_int(0)) -#define Bad_term (Val_int(1)) -#define Good_term_tag 0 - -#if defined (HAS_TERMCAP) && !defined (NATIVE_CODE) - -extern int tgetent (char * buffer, char * name); -extern char * tgetstr (char * id, char ** area); -extern int tgetnum (char * id); -extern int tputs (char * str, int count, int (*outchar)(int c)); - -static struct channel *chan; -static char area [1024]; -static char *area_p = area; -static int num_lines; -static char *up = NULL; -static char *down = NULL; -static char *standout = NULL; -static char *standend = NULL; - -CAMLprim value caml_terminfo_setup (value vchan) -{ - value result; - static char buffer[1024]; - char *term; - - chan = Channel (vchan); - - term = getenv ("TERM"); - if (term == NULL) return Bad_term; - if (tgetent(buffer, term) != 1) return Bad_term; - - num_lines = tgetnum ("li"); - up = tgetstr ("up", &area_p); - down = tgetstr ("do", &area_p); - standout = tgetstr ("us", &area_p); - standend = tgetstr ("ue", &area_p); - if (standout == NULL || standend == NULL){ - standout = tgetstr ("so", &area_p); - standend = tgetstr ("se", &area_p); - } - CAMLassert (area_p <= area + 1024); - if (num_lines == -1 || up == NULL || down == NULL - || standout == NULL || standend == NULL){ - return Bad_term; - } - result = caml_alloc_small (1, Good_term_tag); - Field (result, 0) = Val_int (num_lines); - return result; -} - -static int terminfo_putc (int c) -{ - caml_putch (chan, c); - return c; -} - -CAMLprim value caml_terminfo_backup (value lines) -{ - int i; - - for (i = 0; i < Int_val (lines); i++){ - tputs (up, 1, terminfo_putc); - } - return Val_unit; -} - -CAMLprim value caml_terminfo_standout (value start) -{ - tputs (Bool_val (start) ? standout : standend, 1, terminfo_putc); - return Val_unit; -} - -CAMLprim value caml_terminfo_resume (value lines) -{ - int i; - - for (i = 0; i < Int_val (lines); i++){ - tputs (down, 1, terminfo_putc); - } - return Val_unit; -} - -#else /* defined (HAS_TERMCAP) && !defined (NATIVE_CODE) */ - -CAMLexport value caml_terminfo_setup (value vchan) -{ - return Bad_term; -} - -CAMLexport value caml_terminfo_backup (value lines) -{ - caml_invalid_argument("Terminfo.backup"); - return Val_unit; -} - -CAMLexport value caml_terminfo_standout (value start) -{ - caml_invalid_argument("Terminfo.standout"); - return Val_unit; -} - -CAMLexport value caml_terminfo_resume (value lines) -{ - caml_invalid_argument("Terminfo.resume"); - return Val_unit; -} - -#endif /* defined (HAS_TERMCAP) && !defined (NATIVE_CODE) */ diff --git a/byterun/unix.c b/byterun/unix.c index 59882e06..da139195 100644 --- a/byterun/unix.c +++ b/byterun/unix.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "caml/config.h" #ifdef SUPPORT_DYNAMIC_LINKING @@ -161,12 +162,14 @@ caml_stat_string caml_search_in_path(struct ext_table * path, const char * name) static int cygwin_file_exists(const char * name) { - int fd; + int fd, ret; + struct stat st; /* Cannot use stat() here because it adds ".exe" implicitly */ fd = open(name, O_RDONLY); if (fd == -1) return 0; + ret = fstat(fd, &st); close(fd); - return 1; + return ret == 0 && S_ISREG(st.st_mode); } static caml_stat_string cygwin_search_exe_in_path(struct ext_table * path, const char * name) @@ -426,3 +429,19 @@ char *caml_secure_getenv (char const *var) return NULL; #endif } + +int caml_num_rows_fd(int fd) +{ +#ifdef TIOCGWINSZ + struct winsize w; + w.ws_row = -1; + if (ioctl(fd, TIOCGWINSZ, &w) == 0) + return w.ws_row; + else + return -1; +#else + return -1; +#endif +} + + diff --git a/byterun/weak.c b/byterun/weak.c index 2f309f44..f430ef8f 100644 --- a/byterun/weak.c +++ b/byterun/weak.c @@ -68,8 +68,9 @@ CAMLprim value caml_ephe_create (value len) mlsize_t size, i; value res; - size = Long_val (len) + 1 /* weak_list */ + 1 /* the value */; - if (size <= 0 || size > Max_wosize) caml_invalid_argument ("Weak.create"); + size = Long_val (len) + CAML_EPHE_FIRST_KEY; + if (size < CAML_EPHE_FIRST_KEY || size > Max_wosize) + caml_invalid_argument ("Weak.create"); res = caml_alloc_shr (size, Abstract_tag); for (i = 1; i < size; i++) Field (res, i) = caml_ephe_none; Field (res, CAML_EPHE_LINK_OFFSET) = caml_ephe_list_head; @@ -119,7 +120,7 @@ CAMLprim value caml_weak_create (value len) that is going to disappear is dead and so should trigger a cleaning */ static void do_check_key_clean(value ar, mlsize_t offset){ - CAMLassert ( offset >= 2); + CAMLassert (offset >= CAML_EPHE_FIRST_KEY); if (caml_gc_phase == Phase_clean){ value elt = Field (ar, offset); if (elt != caml_ephe_none && Is_Dead_during_clean(elt)){ @@ -161,9 +162,9 @@ static void do_set (value ar, mlsize_t offset, value v) CAMLprim value caml_ephe_set_key (value ar, value n, value el) { - mlsize_t offset = Long_val (n) + 2; + mlsize_t offset = Long_val (n) + CAML_EPHE_FIRST_KEY; CAMLassert (Is_in_heap (ar)); - if (offset < 2 || offset >= Wosize_val (ar)){ + if (offset < CAML_EPHE_FIRST_KEY || offset >= Wosize_val (ar)){ caml_invalid_argument ("Weak.set"); } do_check_key_clean(ar,offset); @@ -173,9 +174,9 @@ CAMLprim value caml_ephe_set_key (value ar, value n, value el) CAMLprim value caml_ephe_unset_key (value ar, value n) { - mlsize_t offset = Long_val (n) + 2; + mlsize_t offset = Long_val (n) + CAML_EPHE_FIRST_KEY; CAMLassert (Is_in_heap (ar)); - if (offset < 2 || offset >= Wosize_val (ar)){ + if (offset < CAML_EPHE_FIRST_KEY || offset >= Wosize_val (ar)){ caml_invalid_argument ("Weak.set"); } do_check_key_clean(ar,offset); @@ -185,9 +186,9 @@ CAMLprim value caml_ephe_unset_key (value ar, value n) value caml_ephe_set_key_option (value ar, value n, value el) { - mlsize_t offset = Long_val (n) + 2; + mlsize_t offset = Long_val (n) + CAML_EPHE_FIRST_KEY; CAMLassert (Is_in_heap (ar)); - if (offset < 2 || offset >= Wosize_val (ar)){ + if (offset < CAML_EPHE_FIRST_KEY || offset >= Wosize_val (ar)){ caml_invalid_argument ("Weak.set"); } do_check_key_clean(ar,offset); @@ -212,7 +213,7 @@ CAMLprim value caml_ephe_set_data (value ar, value el) cleaned we always need to check it. */ caml_ephe_clean(ar); }; - do_set (ar, 1, el); + do_set (ar, CAML_EPHE_DATA_OFFSET, el); return Val_unit; } @@ -226,10 +227,10 @@ CAMLprim value caml_ephe_unset_data (value ar) CAMLprim value caml_ephe_get_key (value ar, value n) { CAMLparam2 (ar, n); - mlsize_t offset = Long_val (n) + 2; + mlsize_t offset = Long_val (n) + CAML_EPHE_FIRST_KEY; CAMLlocal2 (res, elt); CAMLassert (Is_in_heap (ar)); - if (offset < 2 || offset >= Wosize_val (ar)){ + if (offset < CAML_EPHE_FIRST_KEY || offset >= Wosize_val (ar)){ caml_invalid_argument ("Weak.get_key"); } if (is_ephe_key_none(ar, offset)){ @@ -252,10 +253,9 @@ CAMLprim value caml_weak_get (value ar, value n){ CAMLprim value caml_ephe_get_data (value ar) { CAMLparam1 (ar); - mlsize_t offset = 1; CAMLlocal2 (res, elt); CAMLassert (Is_in_heap (ar)); - elt = Field (ar, offset); + elt = Field (ar, CAML_EPHE_DATA_OFFSET); if(caml_gc_phase == Phase_clean) caml_ephe_clean(ar); if (elt == caml_ephe_none){ res = None_val; @@ -272,11 +272,11 @@ CAMLprim value caml_ephe_get_data (value ar) CAMLprim value caml_ephe_get_key_copy (value ar, value n) { CAMLparam2 (ar, n); - mlsize_t offset = Long_val (n) + 2; + mlsize_t offset = Long_val (n) + CAML_EPHE_FIRST_KEY; CAMLlocal2 (res, elt); value v; /* Caution: this is NOT a local root. */ CAMLassert (Is_in_heap (ar)); - if (offset < 1 || offset >= Wosize_val (ar)){ + if (offset < CAML_EPHE_FIRST_KEY || offset >= Wosize_val (ar)){ caml_invalid_argument ("Weak.get_copy"); } @@ -319,7 +319,7 @@ CAMLprim value caml_weak_get_copy (value ar, value n){ CAMLprim value caml_ephe_get_data_copy (value ar) { CAMLparam1 (ar); - mlsize_t offset = 1; + mlsize_t offset = CAML_EPHE_DATA_OFFSET; CAMLlocal2 (res, elt); value v; /* Caution: this is NOT a local root. */ CAMLassert (Is_in_heap (ar)); @@ -360,9 +360,9 @@ CAMLprim value caml_ephe_get_data_copy (value ar) CAMLprim value caml_ephe_check_key (value ar, value n) { - mlsize_t offset = Long_val (n) + 2; + mlsize_t offset = Long_val (n) + CAML_EPHE_FIRST_KEY; CAMLassert (Is_in_heap (ar)); - if (offset < 2 || offset >= Wosize_val (ar)){ + if (offset < CAML_EPHE_FIRST_KEY || offset >= Wosize_val (ar)){ caml_invalid_argument ("Weak.check"); } return Val_bool (!is_ephe_key_none(ar, offset)); @@ -382,16 +382,16 @@ CAMLprim value caml_ephe_check_data (value ar) CAMLprim value caml_ephe_blit_key (value ars, value ofs, value ard, value ofd, value len) { - mlsize_t offset_s = Long_val (ofs) + 2; - mlsize_t offset_d = Long_val (ofd) + 2; + mlsize_t offset_s = Long_val (ofs) + CAML_EPHE_FIRST_KEY; + mlsize_t offset_d = Long_val (ofd) + CAML_EPHE_FIRST_KEY; mlsize_t length = Long_val (len); long i; CAMLassert (Is_in_heap (ars)); CAMLassert (Is_in_heap (ard)); - if (offset_s < 1 || offset_s + length > Wosize_val (ars)){ + if (offset_s < CAML_EPHE_FIRST_KEY || offset_s + length > Wosize_val (ars)){ caml_invalid_argument ("Weak.blit"); } - if (offset_d < 1 || offset_d + length > Wosize_val (ard)){ + if (offset_d < CAML_EPHE_FIRST_KEY || offset_d + length > Wosize_val (ard)){ caml_invalid_argument ("Weak.blit"); } if (caml_gc_phase == Phase_clean){ diff --git a/byterun/win32.c b/byterun/win32.c index 264ee201..1ce8ad5e 100644 --- a/byterun/win32.c +++ b/byterun/win32.c @@ -17,6 +17,11 @@ /* Win32-specific stuff */ +/* FILE_INFO_BY_HANDLE_CLASS and FILE_NAME_INFO are only available from Windows + Vista onwards */ +#undef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 + #define WIN32_LEAN_AND_MEAN #include #include @@ -51,12 +56,6 @@ #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) #endif -/* Very old Microsoft headers don't include intptr_t */ -#if defined(_MSC_VER) && !defined(_UINTPTR_T_DEFINED) -typedef unsigned int uintptr_t; -#define _UINTPTR_T_DEFINED -#endif - unsigned short caml_win32_major = 0; unsigned short caml_win32_minor = 0; unsigned short caml_win32_build = 0; @@ -294,8 +293,6 @@ static volatile sighandler ctrl_handler_action = SIG_DFL; static BOOL WINAPI ctrl_handler(DWORD event) { - int saved_mode; - /* Only ctrl-C and ctrl-Break are handled */ if (event != CTRL_C_EVENT && event != CTRL_BREAK_EVENT) return FALSE; /* Default behavior is to exit, which we get by not handling the event */ @@ -367,7 +364,7 @@ static void expand_argument(wchar_t * arg) static void expand_pattern(wchar_t * pat) { wchar_t * prefix, * p, * name; - int handle; + intptr_t handle; struct _wfinddata_t ffblk; size_t i; @@ -380,7 +377,7 @@ static void expand_pattern(wchar_t * pat) /* We need to stop at the first directory or drive boundary, because the * _findata_t structure contains the filename, not the leading directory. */ for (i = wcslen(prefix); i > 0; i--) { - char c = prefix[i - 1]; + wchar_t c = prefix[i - 1]; if (c == L'\\' || c == L'/' || c == L':') { prefix[i] = 0; break; } } /* No separator was found, it's a filename pattern without a leading directory. */ @@ -416,11 +413,7 @@ int caml_read_directory(wchar_t * dirname, struct ext_table * contents) { size_t dirnamelen; wchar_t * template; -#if _MSC_VER <= 1200 - int h; -#else intptr_t h; -#endif struct _wfinddata_t fileinfo; dirnamelen = wcslen(dirname); @@ -731,7 +724,51 @@ int caml_snprintf(char * buf, size_t size, const char * format, ...) wchar_t *caml_secure_getenv (wchar_t const *var) { /* Win32 doesn't have a notion of setuid bit, so getenv is safe. */ - return CAML_SYS_GETENV (var); + return _wgetenv(var); +} + +/* caml_win32_getenv is used to implement Sys.getenv and Unix.getenv in such a + way that they get direct access to the Win32 environment rather than to the + copy that is cached by the C runtime system. The result of caml_win32_getenv + is dynamically allocated and must be explicitly deallocated. + + In contrast, the OCaml runtime system still calls _wgetenv from the C runtime + system, via caml_secure_getenv. The result is statically allocated and needs + no deallocation. */ +CAMLexport wchar_t *caml_win32_getenv(wchar_t const *lpName) +{ + wchar_t * lpBuffer; + DWORD nSize = 256, res; + + lpBuffer = caml_stat_alloc_noexc(nSize * sizeof(wchar_t)); + + if (lpBuffer == NULL) + return NULL; + + res = GetEnvironmentVariable(lpName, lpBuffer, nSize); + + if (res == 0) { + caml_stat_free(lpBuffer); + return NULL; + } + + if (res < nSize) + return lpBuffer; + + nSize = res; + lpBuffer = caml_stat_resize_noexc(lpBuffer, nSize * sizeof(wchar_t)); + + if (lpBuffer == NULL) + return NULL; + + res = GetEnvironmentVariable(lpName, lpBuffer, nSize); + + if (res == 0 || res >= nSize) { + caml_stat_free(lpBuffer); + return NULL; + } + + return lpBuffer; } /* The rename() implementation in MSVC's CRT is based on MoveFile() @@ -914,3 +951,69 @@ void caml_restore_win32_terminal(void) if (startup_codepage != 0) SetConsoleOutputCP(startup_codepage); } + +/* Detect if a named pipe corresponds to a Cygwin/MSYS pty: see + https://github.com/mirror/newlib-cygwin/blob/00e9bf2/winsup/cygwin/dtable.cc#L932 +*/ +typedef +BOOL (WINAPI *tGetFileInformationByHandleEx)(HANDLE, FILE_INFO_BY_HANDLE_CLASS, + LPVOID, DWORD); + +static int caml_win32_is_cygwin_pty(HANDLE hFile) +{ + char buffer[1024]; + FILE_NAME_INFO * nameinfo = (FILE_NAME_INFO *) buffer; + static tGetFileInformationByHandleEx pGetFileInformationByHandleEx = INVALID_HANDLE_VALUE; + + if (pGetFileInformationByHandleEx == INVALID_HANDLE_VALUE) + pGetFileInformationByHandleEx = + (tGetFileInformationByHandleEx)GetProcAddress(GetModuleHandle(L"KERNEL32.DLL"), + "GetFileInformationByHandleEx"); + + if (pGetFileInformationByHandleEx == NULL) + return 0; + + /* Get pipe name. GetFileInformationByHandleEx does not NULL-terminate the string, so reduce + the buffer size to allow for adding one. */ + if (! pGetFileInformationByHandleEx(hFile, FileNameInfo, buffer, sizeof(buffer) - sizeof(WCHAR))) + return 0; + + nameinfo->FileName[nameinfo->FileNameLength / sizeof(WCHAR)] = L'\0'; + + /* check if this could be a msys pty pipe ('msys-XXXX-ptyN-XX') + or a cygwin pty pipe ('cygwin-XXXX-ptyN-XX') */ + if ((wcsstr(nameinfo->FileName, L"msys-") || + wcsstr(nameinfo->FileName, L"cygwin-")) && wcsstr(nameinfo->FileName, L"-pty")) + return 1; + + return 0; +} + +CAMLexport int caml_win32_isatty(int fd) +{ + DWORD lpMode; + HANDLE hFile = (HANDLE)_get_osfhandle(fd); + + if (hFile == INVALID_HANDLE_VALUE) + return 0; + + switch (GetFileType(hFile)) { + case FILE_TYPE_CHAR: + /* Both console handles and the NUL device are FILE_TYPE_CHAR. The NUL + device returns FALSE for a GetConsoleMode call. _isatty incorrectly + only uses GetFileType (see GPR#1321). */ + return GetConsoleMode(hFile, &lpMode); + case FILE_TYPE_PIPE: + /* Cygwin PTYs are implemented using named pipes */ + return caml_win32_is_cygwin_pty(hFile); + default: + break; + } + + return 0; +} + +int caml_num_rows_fd(int fd) +{ + return -1; +} diff --git a/config/Makefile-templ b/config/Makefile-templ index 4797a0dd..34af691e 100644 --- a/config/Makefile-templ +++ b/config/Makefile-templ @@ -172,14 +172,13 @@ RANLIBCMD=ranlib # Currently available: # unix Unix system calls # str Regular expressions and high-level string processing -# num Arbitrary-precision rational arithmetic # threads Lightweight concurrent processes # systhreads Same as threads, requires POSIX threads # graph Portable drawing primitives for X11 # dynlink Dynamic linking of bytecode # bigarray Large, multidimensional numerical arrays -OTHERLIBRARIES=unix str num threads graph dynlink bigarray +OTHERLIBRARIES=unix str threads graph dynlink bigarray ### Link-time options to ocamlc or ocamlopt for linking with POSIX threads # Needed for the "systhreads" package diff --git a/config/Makefile.mingw b/config/Makefile.mingw index 3fafb0fc..3f1eb9b4 100644 --- a/config/Makefile.mingw +++ b/config/Makefile.mingw @@ -96,6 +96,7 @@ UNIX_OR_WIN32=win32 UNIXLIB=win32unix GRAPHLIB=win32graph FLAMBDA=false +WITH_FLAMBDA_INVARIANTS=false WITH_SPACETIME=false ENABLE_CALL_COUNTS=false WITH_PROFINFO=false @@ -106,6 +107,7 @@ FORCE_SAFE_STRING=false DEFAULT_SAFE_STRING=true WINDOWS_UNICODE=1 AFL_INSTRUMENT=false +AWK=gawk ########## Configuration for the bytecode compiler @@ -133,7 +135,7 @@ FLEXLINK_CMD=flexlink FLEXDLL_CHAIN=mingw # FLEXLINK_FLAGS must be safe to insert in an OCaml string # (see ocamlmklibconfig.ml in tools/Makefile) -FLEXLINK_FLAGS=-chain $(FLEXDLL_CHAIN) -stack 16777216 -link -static-libgcc +FLEXLINK_FLAGS=-chain $(FLEXDLL_CHAIN) -stack 16777216 FLEXLINK=$(FLEXLINK_CMD) $(FLEXLINK_FLAGS) FLEXDIR:=$(shell $(FLEXLINK) -where 2>/dev/null) ifeq ($(FLEXDIR),) @@ -156,10 +158,10 @@ MKEXE_BOOT=$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUTEXE)$(1) $(2) MKEXE_ANSI=$(FLEXLINK) -exe ### How to build a static library -MKLIB=rm -f $(1); $(TOOLPREF)ar rc $(1) $(2); $(RANLIB) $(1) +MKLIB=rm -f $(1) && $(TOOLPREF)ar rcs $(1) $(2) #ml let mklib out files opts = -#ml Printf.sprintf "rm -f %s && %sar rcs %s %s %s" -#ml out toolpref opts out files;; +#ml Printf.sprintf "%sar rcs %s %s %s" +#ml toolpref opts out files;; ### Canonicalize the name of a system library SYSLIB=-l$(1) diff --git a/config/Makefile.mingw64 b/config/Makefile.mingw64 index 0466e63c..df605a9f 100644 --- a/config/Makefile.mingw64 +++ b/config/Makefile.mingw64 @@ -96,6 +96,7 @@ UNIX_OR_WIN32=win32 UNIXLIB=win32unix GRAPHLIB=win32graph FLAMBDA=false +WITH_FLAMBDA_INVARIANTS=false WITH_PROFINFO=false WITH_SPACETIME=false ENABLE_CALL_COUNTS=false @@ -106,6 +107,7 @@ FORCE_SAFE_STRING=false DEFAULT_SAFE_STRING=true WINDOWS_UNICODE=1 AFL_INSTRUMENT=false +AWK=gawk ########## Configuration for the bytecode compiler @@ -156,10 +158,10 @@ MKEXE_BOOT=$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUTEXE)$(1) $(2) MKEXE_ANSI=$(FLEXLINK) -exe ### How to build a static library -MKLIB=rm -f $(1); $(TOOLPREF)ar rc $(1) $(2); $(RANLIB) $(1) +MKLIB=rm -f $(1) && $(TOOLPREF)ar rcs $(1) $(2) #ml let mklib out files opts = -#ml Printf.sprintf "rm -f %s && %sar rcs %s %s %s" -#ml out toolpref opts out files;; +#ml Printf.sprintf "%sar rcs %s %s %s" +#ml toolpref opts out files;; ### Canonicalize the name of a system library SYSLIB=-l$(1) diff --git a/config/Makefile.msvc b/config/Makefile.msvc index 9f623684..743d3687 100644 --- a/config/Makefile.msvc +++ b/config/Makefile.msvc @@ -89,6 +89,7 @@ UNIX_OR_WIN32=win32 UNIXLIB=win32unix GRAPHLIB=win32graph FLAMBDA=false +WITH_FLAMBDA_INVARIANTS=false WITH_PROFINFO=false WITH_SPACETIME=false ENABLE_CALL_COUNTS=false @@ -99,6 +100,7 @@ FORCE_SAFE_STRING=false DEFAULT_SAFE_STRING=true WINDOWS_UNICODE=1 AFL_INSTRUMENT=false +AWK=gawk ########## Configuration for the bytecode compiler diff --git a/config/Makefile.msvc64 b/config/Makefile.msvc64 index f7b52033..67bf16a2 100644 --- a/config/Makefile.msvc64 +++ b/config/Makefile.msvc64 @@ -88,6 +88,7 @@ UNIX_OR_WIN32=win32 UNIXLIB=win32unix GRAPHLIB=win32graph FLAMBDA=false +WITH_FLAMBDA_INVARIANTS=false WITH_PROFINFO=false WITH_SPACETIME=false ENABLE_CALL_COUNTS=false @@ -98,6 +99,7 @@ FORCE_SAFE_STRING=false DEFAULT_SAFE_STRING=true WINDOWS_UNICODE=1 AFL_INSTRUMENT=false +AWK=gawk ########## Configuration for the bytecode compiler diff --git a/config/s-nt.h b/config/s-nt.h index c8de2cab..ab4046b3 100644 --- a/config/s-nt.h +++ b/config/s-nt.h @@ -17,7 +17,7 @@ #define OCAML_OS_TYPE "Win32" -#ifdef __MINGW32__ +#if defined(__MINGW32__) || _MSC_VER >= 1600 #define HAS_STDINT_H #endif #undef BSD_SIGNALS diff --git a/config/s-templ.h b/config/s-templ.h index 6eb971d2..1df4eb31 100644 --- a/config/s-templ.h +++ b/config/s-templ.h @@ -40,13 +40,6 @@ /* Define HAS_SIGSETMASK if you have sigsetmask(), as in BSD. */ -#define HAS_TERMCAP - -/* Define HAS_TERMCAP if you have the termcap functions to read the - terminal database, e.g. tgetent(), tgetstr(), tgetnum(), tputs(). - Also add the required libraries (e.g. -lcurses -ltermcap) to $(CCLIBS) - in ../Makefile.config */ - #define SUPPORT_DYNAMIC_LINKING /* Define SUPPORT_DYNAMIC_LINKING if dynamic loading of C stub code diff --git a/configure b/configure index 20339655..1316b3c1 100755 --- a/configure +++ b/configure @@ -33,7 +33,6 @@ cpp='cpp' asoption='' asppoption='' cclibs='' -curseslibs='' mathlib='-lm' dllib='' x11_include_dir='' @@ -46,9 +45,7 @@ graph_wanted=yes pthread_wanted=yes dl_defs='' verbose=false -with_curses=yes -debugruntime=false -with_instrumented_runtime=false +debugruntime=true with_sharedlibs=true partialld="ld -r" with_debugger=ocamldebugger @@ -70,6 +67,10 @@ max_testsuite_dir_retries=0 with_cplugins=false with_fpic=false flat_float_array=true +with_flambda_invariants=false + +# we distinguish '' (not set) from 'true' (explicitly set by the user) +with_instrumented_runtime='' # Try to turn internationalization off, can cause config.guess to malfunction! unset LANG @@ -152,7 +153,7 @@ while : ; do -lib*) cclibs="$2 $cclibs"; shift;; -no-curses|--no-curses) - with_curses=no;; + ;; # Ignored for backward compatibility -no-shared-libs|--no-shared-libs) with_sharedlibs=false;; -x11include*|--x11include*) @@ -173,9 +174,13 @@ while : ; do -verbose|--verbose) verbose=true;; -with-debug-runtime|--with-debug-runtime) - debugruntime=true;; + debugruntime=true;; # default + -no-debug-runtime|--no-debug-runtime) + debugruntime=false;; -with-instrumented-runtime|--with-instrumented-runtime) with_instrumented_runtime=true;; + -no-instrumented-runtime|--no-instrumented-runtime) + with_instrumented_runtime=false;; -no-debugger|--no-debugger) with_debugger="";; -no-ocamldoc|--no-ocamldoc) @@ -206,6 +211,8 @@ while : ; do native_compiler=false;; -flambda|--flambda) flambda=true;; + -with-flambda-invariants|--with-flambda-invariants) + with_flambda_invariants=true;; -with-cplugins|--with-cplugins) with_cplugins=true;; -no-cplugins|--no-cplugins) @@ -544,7 +551,6 @@ case "$cc,$target" in exe=".exe" ostype="Cygwin";; *,*-*-mingw*) - dllccompopt="-DCAML_DLL" if $with_sharedlibs; then case "$target" in i686-*-*) flexlink_chain="mingw";; @@ -974,7 +980,9 @@ case "$target" in powerpc-*-openbsd*) arch=power; model=ppc; system=bsd_elf;; s390x*-*-linux*) arch=s390x; model=z10; system=elf;; armv6*-*-linux-gnueabihf) arch=arm; model=armv6; system=linux_eabihf;; - arm*-*-linux-gnueabihf) arch=arm; system=linux_eabihf;; + armv7*-*-linux-gnueabihf) arch=arm; model=armv7; system=linux_eabihf;; + armv8*-*-linux-gnueabihf) arch=arm; model=armv8; system=linux_eabihf;; + armv8*-*-linux-gnueabi) arch=arm; model=armv8; system=linux_eabi;; armv7*-*-linux-gnueabi) arch=arm; model=armv7; system=linux_eabi;; armv6t2*-*-linux-gnueabi) arch=arm; model=armv6t2; system=linux_eabi;; armv6*-*-linux-gnueabi) arch=arm; model=armv6; system=linux_eabi;; @@ -1191,35 +1199,30 @@ if sh ./hasgot -i unistd.h issetugid; then echo "#define HAS_ISSETUGID" >> s.h fi -# For the terminfo module - -if test "$with_curses" = "yes"; then - for libs in "" "-lcurses" "-ltermcap" "-lcurses -ltermcap" "-lncurses"; do - if sh ./hasgot $libs tgetent tgetstr tgetnum tputs; then - inf "termcap functions found (with libraries '$libs')" - echo "#define HAS_TERMCAP" >> s.h - curseslibs="${libs}" - break - fi - done -fi - # For instrumented runtime # (clock_gettime needs -lrt for glibc before 2.17) -if $with_instrumented_runtime; then - with_instrumented_runtime=false #enabled it only if found +if test "$with_instrumented_runtime" != "false"; then + instrumented_runtime_support="nonsupported" for libs in "" "-lrt"; do if sh ./hasgot $libs clock_gettime; then inf "clock_gettime functions found (with libraries '$libs')" instrumented_runtime_libs="${libs}" - with_instrumented_runtime=true; + instrumented_runtime_support="supported"; break fi done - if ! $with_instrumented_runtime; then - err "clock_gettime functions not found. " \ - "Instrumented runtime can't be built." - fi + case "$with_instrumented_runtime,$instrumented_runtime_support" in + *,supported) + with_instrumented_runtime=true;; + true,nonsupported) + with_instrumented_runtime=false; + err "clock_gettime functions not found. " \ + "Instrumented runtime can't be built.";; + ,nonsupported) + with_instrumented_runtime=false; + inf "clock_gettime functions not found. " \ + "Instrumented runtime can't be built.";; + esac fi # Configuration for the libraries @@ -1783,6 +1786,8 @@ if test "$x11_include" = "not found"; then \ /usr/lib/i386-linux-gnu \ /usr/lib/x86_64-linux-gnu \ + \ + /usr/lib/`echo $target | sed -e "s/-[^-]*//"` \ ; \ do if test -f $dir/libX11.a || \ @@ -1793,7 +1798,6 @@ if test "$x11_include" = "not found"; then if test $dir = /usr/lib; then x11_link="-lX11" else - x11_libs="-L$dir" case "$target" in *-*-freebsd*|*-*-dragonfly*) x11_link="-L$dir -lX11";; *-kfreebsd*-gnu) x11_link="-L$dir -lX11";; @@ -1806,7 +1810,7 @@ if test "$x11_include" = "not found"; then done fi -if test "x11_include" != "not found"; then +if test "$x11_include" != "not found"; then if test "$x11_include" = "-I/usr/include"; then x11_include="" fi @@ -2057,8 +2061,7 @@ config CPPFLAGS "$common_cppflags $internal_cppflags" config OCAMLC_CFLAGS "$common_cflags $sharedcccompopts" config OCAMLC_CPPFLAGS "$common_cppflags" config LDFLAGS "$ldflags" -config BYTECCLIBS "$cclibs $dllib $curseslibs $pthread_link \ - $instrumented_runtime_libs" +config BYTECCLIBS "$cclibs $dllib $pthread_link $instrumented_runtime_libs" config RPATH "$rpath" config EXE "$exe" config EMPTY "" @@ -2072,9 +2075,9 @@ SYSLIB=-l\$(1) #ml let syslib x = "-l"^x;; ### How to build a static library -MKLIB=${TOOLPREF}ar rc \$(1) \$(2); ${TOOLPREF}ranlib \$(1) +MKLIB=rm -f \$(1) && ${TOOLPREF}ar rc \$(1) \$(2) && ${TOOLPREF}ranlib \$(1) #ml let mklib out files opts = (* "" *) -#ml Printf.sprintf "${TOOLPREF}ar rc %s %s %s; ${TOOLPREF}ranlib %s" +#ml Printf.sprintf "${TOOLPREF}ar rc %s %s %s && ${TOOLPREF}ranlib %s" #ml out opts files out;; EOF config ARCH "$arch" @@ -2132,12 +2135,14 @@ if [ "$ostype" = Cygwin ]; then config DIFF "diff -q --strip-trailing-cr" fi config FLAMBDA "$flambda" +config WITH_FLAMBDA_INVARIANTS "$with_flambda_invariants" config FORCE_SAFE_STRING "$force_safe_string" config DEFAULT_SAFE_STRING "$default_safe_string" config WINDOWS_UNICODE "0" config AFL_INSTRUMENT "$afl_instrument" config MAX_TESTSUITE_DIR_RETRIES "$max_testsuite_dir_retries" config FLAT_FLOAT_ARRAY "$flat_float_array" +config AWK "awk" rm -f tst hasgot.c @@ -2158,8 +2163,7 @@ inf " manual pages.............. $mandir (with extension .$programs_man_s inf "Configuration for the bytecode compiler:" inf " C compiler used........... $cc" inf " options for compiling..... $common_cflags" -inf " options for linking....... $ldflags $cclibs $dllib" \ - "$curseslibs $pthread_link" +inf " options for linking....... $ldflags $cclibs $dllib $pthread_link" if $shared_libraries_supported; then inf " shared libraries are supported" inf " options for compiling..... $sharedcccompopts $common_cflags" @@ -2203,7 +2207,7 @@ else fi if $with_spacetime; then inf " spacetime profiling....... yes" - if test "$with_spacetime_call_counts" = "true"; then + if test "$enable_call_counts" = "true"; then inf " ... with call counts.... yes" else inf " ... with call counts.... no" @@ -2251,6 +2255,11 @@ else fi if test "$flambda" = "true"; then inf " using flambda middle-end . yes" + if test "$with_flambda_invariants" = "true"; then + inf " ... with flambda invariants checks . yes" + else + inf " ... with flambda invariants checks . no" + fi else inf " using flambda middle-end . no" fi diff --git a/debugger/.depend b/debugger/.depend index 86b18ab6..5e1310c4 100644 --- a/debugger/.depend +++ b/debugger/.depend @@ -1,7 +1,9 @@ -breakpoints.cmo : symbols.cmi pos.cmi ../bytecomp/instruct.cmi exec.cmi \ - debugcom.cmi checkpoints.cmi breakpoints.cmi -breakpoints.cmx : symbols.cmx pos.cmx ../bytecomp/instruct.cmx exec.cmx \ - debugcom.cmx checkpoints.cmx breakpoints.cmi +breakpoints.cmo : symbols.cmi pos.cmi parameters.cmi \ + ../bytecomp/instruct.cmi exec.cmi debugcom.cmi checkpoints.cmi \ + breakpoints.cmi +breakpoints.cmx : symbols.cmx pos.cmx parameters.cmx \ + ../bytecomp/instruct.cmx exec.cmx debugcom.cmx checkpoints.cmx \ + breakpoints.cmi breakpoints.cmi : ../bytecomp/instruct.cmi checkpoints.cmo : primitives.cmi int64ops.cmi debugcom.cmi checkpoints.cmi checkpoints.cmx : primitives.cmx int64ops.cmx debugcom.cmx checkpoints.cmi @@ -67,9 +69,9 @@ history.cmx : primitives.cmx int64ops.cmx debugger_config.cmx \ checkpoints.cmx history.cmi history.cmi : input_handling.cmo : $(UNIXDIR)/unix.cmi primitives.cmi \ - input_handling.cmi + parameters.cmi input_handling.cmi input_handling.cmx : $(UNIXDIR)/unix.cmx primitives.cmx \ - input_handling.cmi + parameters.cmx input_handling.cmi input_handling.cmi : primitives.cmi int64ops.cmo : int64ops.cmi int64ops.cmx : int64ops.cmi @@ -78,28 +80,28 @@ lexer.cmo : parser.cmi lexer.cmi lexer.cmx : parser.cmx lexer.cmi lexer.cmi : parser.cmi loadprinter.cmo : ../typing/types.cmi ../bytecomp/symtable.cmi printval.cmi \ - ../typing/printtyp.cmi ../typing/path.cmi ../utils/misc.cmi \ - ../parsing/longident.cmi ../parsing/location.cmi ../typing/ident.cmi \ - ../typing/env.cmi ../typing/ctype.cmi ../utils/config.cmi \ - ../driver/compdynlink.cmi loadprinter.cmi + ../typing/printtyp.cmi ../typing/path.cmi parameters.cmi \ + ../utils/misc.cmi ../parsing/longident.cmi ../parsing/location.cmi \ + ../typing/ident.cmi ../typing/env.cmi ../typing/ctype.cmi \ + ../utils/config.cmi ../driver/compdynlink.cmi loadprinter.cmi loadprinter.cmx : ../typing/types.cmx ../bytecomp/symtable.cmx printval.cmx \ - ../typing/printtyp.cmx ../typing/path.cmx ../utils/misc.cmx \ - ../parsing/longident.cmx ../parsing/location.cmx ../typing/ident.cmx \ - ../typing/env.cmx ../typing/ctype.cmx ../utils/config.cmx \ - ../driver/compdynlink.cmi loadprinter.cmi + ../typing/printtyp.cmx ../typing/path.cmx parameters.cmx \ + ../utils/misc.cmx ../parsing/longident.cmx ../parsing/location.cmx \ + ../typing/ident.cmx ../typing/env.cmx ../typing/ctype.cmx \ + ../utils/config.cmx ../driver/compdynlink.cmi loadprinter.cmi loadprinter.cmi : ../parsing/longident.cmi ../driver/compdynlink.cmi main.cmo : unix_tools.cmi $(UNIXDIR)/unix.cmi time_travel.cmi \ show_information.cmi question.cmi program_management.cmi primitives.cmi \ - parameters.cmi ../utils/misc.cmi input_handling.cmi frames.cmi exec.cmi \ - ../typing/env.cmi debugger_config.cmi ../utils/config.cmi \ - command_line.cmi ../typing/cmi_format.cmi ../utils/clflags.cmi \ - checkpoints.cmi + parameters.cmi ../utils/misc.cmi loadprinter.cmi input_handling.cmi \ + frames.cmi exec.cmi ../typing/env.cmi debugger_config.cmi \ + ../utils/config.cmi command_line.cmi ../typing/cmi_format.cmi \ + ../utils/clflags.cmi checkpoints.cmi main.cmx : unix_tools.cmx $(UNIXDIR)/unix.cmx time_travel.cmx \ show_information.cmx question.cmx program_management.cmx primitives.cmx \ - parameters.cmx ../utils/misc.cmx input_handling.cmx frames.cmx exec.cmx \ - ../typing/env.cmx debugger_config.cmx ../utils/config.cmx \ - command_line.cmx ../typing/cmi_format.cmx ../utils/clflags.cmx \ - checkpoints.cmx + parameters.cmx ../utils/misc.cmx loadprinter.cmx input_handling.cmx \ + frames.cmx exec.cmx ../typing/env.cmx debugger_config.cmx \ + ../utils/config.cmx command_line.cmx ../typing/cmi_format.cmx \ + ../utils/clflags.cmx checkpoints.cmx parameters.cmo : primitives.cmi ../typing/envaux.cmi debugger_config.cmi \ ../utils/config.cmi parameters.cmi parameters.cmx : primitives.cmx ../typing/envaux.cmx debugger_config.cmx \ diff --git a/debugger/Makefile b/debugger/Makefile index 2c130dd1..571e91bb 100644 --- a/debugger/Makefile +++ b/debugger/Makefile @@ -14,6 +14,8 @@ #************************************************************************** include ../config/Makefile +include ../Makefile.common + UNIXDIR=../otherlibs/$(UNIXLIB) CAMLRUN ?= ../boot/ocamlrun CAMLYACC ?= ../boot/ocamlyacc @@ -27,8 +29,6 @@ CAMLLEX=$(CAMLRUN) ../boot/ocamllex CAMLDEP=$(CAMLRUN) ../tools/ocamldep DEPFLAGS=$(INCLUDES) -INSTALL_BINDIR=$(DESTDIR)$(BINDIR) - INCLUDES=\ -I ../utils -I ../parsing -I ../typing -I ../bytecomp -I ../toplevel \ -I ../driver -I $(UNIXDIR) @@ -39,6 +39,7 @@ OTHEROBJS=\ ../utils/identifiable.cmo ../utils/numbers.cmo \ ../utils/arg_helper.cmo ../utils/clflags.cmo \ ../utils/consistbl.cmo ../utils/warnings.cmo \ + ../utils/build_path_prefix_map.cmo \ ../utils/terminfo.cmo \ ../parsing/location.cmo ../parsing/longident.cmo ../parsing/docstrings.cmo \ ../parsing/syntaxerr.cmo \ @@ -96,7 +97,7 @@ ocamldebug$(EXE): $(OBJS) $(OTHEROBJS) $(CAMLC) $(LINKFLAGS) -o ocamldebug$(EXE) -linkall $(OTHEROBJS) $(OBJS) install: - cp ocamldebug$(EXE) "$(INSTALL_BINDIR)/ocamldebug$(EXE)" + $(INSTALL_PROG) ocamldebug$(EXE) "$(INSTALL_BINDIR)/ocamldebug$(EXE)" clean:: rm -f ocamldebug$(EXE) diff --git a/debugger/breakpoints.ml b/debugger/breakpoints.ml index 5d5b6ced..62e8ecfb 100644 --- a/debugger/breakpoints.ml +++ b/debugger/breakpoints.ml @@ -170,9 +170,11 @@ let rec new_breakpoint = incr breakpoint_number; insert_position event.ev_pos; breakpoints := (!breakpoint_number, event) :: !breakpoints); - printf "Breakpoint %d at %d: %s" !breakpoint_number event.ev_pos - (Pos.get_desc event); - print_newline () + if !Parameters.breakpoint then begin + printf "Breakpoint %d at %d: %s" !breakpoint_number event.ev_pos + (Pos.get_desc event); + print_newline () + end (* Remove a breakpoint from lists. *) let remove_breakpoint number = @@ -183,9 +185,11 @@ let remove_breakpoint number = (function () -> breakpoints := List.remove_assoc number !breakpoints; remove_position pos; - printf "Removed breakpoint %d at %d: %s" number ev.ev_pos - (Pos.get_desc ev); - print_newline () + if !Parameters.breakpoint then begin + printf "Removed breakpoint %d at %d: %s" number ev.ev_pos + (Pos.get_desc ev); + print_newline () + end ) with Not_found -> diff --git a/debugger/input_handling.ml b/debugger/input_handling.ml index 91f4cc50..e69c5f4b 100644 --- a/debugger/input_handling.ml +++ b/debugger/input_handling.ml @@ -108,7 +108,7 @@ let stop_user_input () = (* Resume reading user input. *) let resume_user_input () = if not (List.mem_assoc !user_channel.io_fd !active_files) then begin - if !interactif then begin + if !interactif && !Parameters.prompt then begin print_string !current_prompt; flush Pervasives.stdout end; diff --git a/debugger/loadprinter.ml b/debugger/loadprinter.ml index 54a2c167..b657331b 100644 --- a/debugger/loadprinter.ml +++ b/debugger/loadprinter.ml @@ -106,11 +106,9 @@ let eval_path path = (* since 4.00, "topdirs.cmi" is not in the same directory as the standard library, so we load it beforehand as it cannot be found in the search path. *) -let () = - let compiler_libs = - Filename.concat Config.standard_library "compiler-libs" in +let init () = let topdirs = - Filename.concat compiler_libs "topdirs.cmi" in + Filename.concat !Parameters.topdirs_path "topdirs.cmi" in ignore (Env.read_signature "Topdirs" topdirs) let match_printer_type desc typename = @@ -124,7 +122,7 @@ let match_printer_type desc typename = let ty_arg = Ctype.newvar() in Ctype.unify Env.empty (Ctype.newconstr printer_type [ty_arg]) - (Ctype.instance Env.empty desc.val_type); + (Ctype.instance desc.val_type); Ctype.end_def(); Ctype.generalize ty_arg; ty_arg diff --git a/debugger/loadprinter.mli b/debugger/loadprinter.mli index c645e8d2..81e4814e 100644 --- a/debugger/loadprinter.mli +++ b/debugger/loadprinter.mli @@ -17,6 +17,8 @@ open Format +val init : unit -> unit + val loadfile : formatter -> string -> unit val install_printer : formatter -> Longident.t -> unit val remove_printer : Longident.t -> unit diff --git a/debugger/main.ml b/debugger/main.ml index 4f2b830f..87027596 100644 --- a/debugger/main.ml +++ b/debugger/main.ml @@ -152,6 +152,8 @@ let add_include d = Misc.expand_directory Config.standard_library d :: !default_load_path let set_socket s = socket_name := s +let set_topdirs_path s = + topdirs_path := s let set_checkpoints n = checkpoint_max_count := n let set_directory dir = @@ -182,6 +184,16 @@ let speclist = [ " Print version and exit"; "-vnum", Arg.Unit print_version_num, " Print version number and exit"; + "-no-version", Arg.Clear Parameters.version, + " Do not print version at startup"; + "-no-prompt", Arg.Clear Parameters.prompt, + " Suppress all prompts"; + "-no-time", Arg.Clear Parameters.time, + " Do not print times"; + "-no-breakpoint-message", Arg.Clear Parameters.breakpoint, + " Do not print message at breakpoint setup and removal"; + "-topdirs-path", Arg.String set_topdirs_path, + " Set path to the directory containing topdirs.cmi"; ] let function_placeholder () = @@ -211,7 +223,9 @@ let main () = arguments := !arguments ^ " " ^ (Filename.quote Sys.argv.(j)) done end; - printf "\tOCaml Debugger version %s@.@." Config.version; + if !Parameters.version + then printf "\tOCaml Debugger version %s@.@." Config.version; + Loadprinter.init(); Config.load_path := !default_load_path; Clflags.recursive_types := true; (* Allow recursive types. *) toplevel_loop (); (* Toplevel. *) diff --git a/debugger/parameters.ml b/debugger/parameters.ml index a4d647c4..ea11698a 100644 --- a/debugger/parameters.ml +++ b/debugger/parameters.ml @@ -27,6 +27,13 @@ let arguments = ref "" let default_load_path = ref [ Filename.current_dir_name; Config.standard_library ] +let breakpoint = ref true +let prompt = ref true +let time = ref true +let version = ref true + +let topdirs_path = ref (Filename.concat Config.standard_library "compiler-libs") + let add_path dir = load_path := dir :: except dir !load_path; Envaux.reset_cache() diff --git a/debugger/parameters.mli b/debugger/parameters.mli index 388fb94d..d680e7f1 100644 --- a/debugger/parameters.mli +++ b/debugger/parameters.mli @@ -20,6 +20,11 @@ val program_name : string ref val socket_name : string ref val arguments : string ref val default_load_path : string list ref +val breakpoint : bool ref +val prompt : bool ref +val time : bool ref +val version : bool ref +val topdirs_path : string ref val add_path : string -> unit val add_path_for : string -> string -> unit diff --git a/debugger/show_information.ml b/debugger/show_information.ml index 30d7774e..29fe1fb6 100644 --- a/debugger/show_information.ml +++ b/debugger/show_information.ml @@ -28,20 +28,23 @@ open Parameters (* Display information about the current event. *) let show_current_event ppf = - fprintf ppf "Time: %Li" (current_time ()); - (match current_pc () with - | Some pc -> - fprintf ppf " - pc: %i" pc - | _ -> ()); + if !Parameters.time then begin + fprintf ppf "Time: %Li" (current_time ()); + (match current_pc () with + | Some pc -> + fprintf ppf " - pc: %i" pc + | _ -> ()); + end; update_current_event (); reset_frame (); match current_report () with | None -> - fprintf ppf "@.Beginning of program.@."; + if !Parameters.time then fprintf ppf "@."; + fprintf ppf "Beginning of program.@."; show_no_point () | Some {rep_type = (Event | Breakpoint); rep_program_pointer = pc} -> let ev = get_current_event () in - fprintf ppf " - module %s@." ev.ev_module; + if !Parameters.time then fprintf ppf " - module %s@." ev.ev_module; (match breakpoints_at_pc pc with | [] -> () @@ -55,17 +58,20 @@ let show_current_event ppf = (List.sort compare breakpoints)); show_point ev true | Some {rep_type = Exited} -> - fprintf ppf "@.Program exit.@."; + if !Parameters.time then fprintf ppf "@."; + fprintf ppf "Program exit.@."; show_no_point () | Some {rep_type = Uncaught_exc} -> + if !Parameters.time then fprintf ppf "@."; fprintf ppf - "@.Program end.@.\ + "Program end.@.\ @[Uncaught exception:@ %a@]@." Printval.print_exception (Debugcom.Remote_value.accu ()); show_no_point () | Some {rep_type = Trap_barrier} -> (* Trap_barrier not visible outside *) (* of module `time_travel'. *) + if !Parameters.time then fprintf ppf "@."; Misc.fatal_error "Show_information.show_current_event" (* Display short information about one frame. *) diff --git a/debugger/symbols.ml b/debugger/symbols.ml index 31124974..2318c10f 100644 --- a/debugger/symbols.ml +++ b/debugger/symbols.ml @@ -167,7 +167,10 @@ let find_event ev char = else bsearch (pivot + 1) hi end in - bsearch 0 (Array.length ev - 1) + if Array.length ev = 0 then + raise Not_found + else + bsearch 0 (Array.length ev - 1) (* Return first event after the given position. *) (* Raise [Not_found] if module is unknown or no event is found. *) diff --git a/driver/compenv.ml b/driver/compenv.ml index 194fa617..704a65f5 100644 --- a/driver/compenv.ml +++ b/driver/compenv.ml @@ -101,19 +101,30 @@ type filename = string type readenv_position = Before_args | Before_compile of filename | Before_link -(* Syntax of OCAMLPARAM: (name=VALUE,)* _ (,name=VALUE)* - where VALUE should not contain ',' *) +(* Syntax of OCAMLPARAM: SEP?(name=VALUE SEP)* _ (SEP name=VALUE)* + where VALUE should not contain SEP, and SEP is ',' if unspecified, + or ':', '|', ';', ' ' or ',' *) exception SyntaxError of string let parse_args s = - let args = String.split_on_char ',' s in + let args = + let len = String.length s in + if len = 0 then [] + else + (* allow first char to specify an alternative separator in ":|; ," *) + match s.[0] with + | ( ':' | '|' | ';' | ' ' | ',' ) as c -> + List.tl (String.split_on_char c s) + | _ -> String.split_on_char ',' s + in let rec iter is_after args before after = match args with [] -> if not is_after then raise (SyntaxError "no '_' separator found") else - (List.rev before, List.rev after) + (List.rev before, List.rev after) + | "" :: tail -> iter is_after tail before after | "_" :: _ when is_after -> raise (SyntaxError "too many '_' separators") | "_" :: tail -> iter true tail before after | arg :: tail -> @@ -329,6 +340,8 @@ let read_one_param ppf position name v = set "flambda-verbose" [ dump_flambda_verbose ] v | "flambda-invariants" -> set "flambda-invariants" [ flambda_invariant_checks ] v + | "linscan" -> + set "linscan" [ use_linscan ] v (* color output *) | "color" -> @@ -358,7 +371,9 @@ let read_one_param ppf position name v = ccobjs := Misc.rev_split_words v @ !ccobjs end - | "ccopts" -> + | "ccopt" + | "ccopts" + -> begin match position with | Before_link | Before_compile _ -> diff --git a/driver/compile.ml b/driver/compile.ml index 98ac5be4..75a2470f 100644 --- a/driver/compile.ml +++ b/driver/compile.ml @@ -41,7 +41,7 @@ let interface ppf sourcefile outputprefix = if !Clflags.dump_typedtree then fprintf ppf "%a@." Printtyped.interface tsg; let sg = tsg.sig_type in if !Clflags.print_types then - Printtyp.wrap_printing_env initial_env (fun () -> + Printtyp.wrap_printing_env ~error:false initial_env (fun () -> fprintf std_formatter "%a@." Printtyp.signature (Typemod.simplify_signature sg)); ignore (Includemod.signatures initial_env sg sg); diff --git a/driver/compmisc.ml b/driver/compmisc.ml index a0839f34..b1bed14b 100644 --- a/driver/compmisc.ml +++ b/driver/compmisc.ml @@ -43,27 +43,19 @@ let init_path ?(dir="") native = (* Note: do not do init_path() in initial_env, this breaks toplevel initialization (PR#1775) *) -let open_implicit_module m env = - let open Asttypes in - let lid = {loc = Location.in_file "command line"; - txt = Longident.parse m } in - snd (Typemod.type_open_ Override env lid.loc lid) - let initial_env () = Ident.reinit(); - let initial = - if Config.safe_string then Env.initial_safe_string - else if !Clflags.unsafe_string then Env.initial_unsafe_string - else Env.initial_safe_string - in - let env = - if !Clflags.nopervasives then initial else - open_implicit_module "Pervasives" initial + let initially_opened_module = + if !Clflags.nopervasives then + None + else + Some "Stdlib" in - List.fold_left (fun env m -> - open_implicit_module m env - ) env (!implicit_modules @ List.rev !Clflags.open_modules) - + Typemod.initial_env + ~loc:(Location.in_file "command line") + ~safe_string:(Config.safe_string || not !Clflags.unsafe_string) + ~initially_opened_module + ~open_implicit_modules:(!implicit_modules @ List.rev !Clflags.open_modules) let read_color_env ppf = try diff --git a/driver/main.ml b/driver/main.ml index 110ea3cf..5fc9387b 100644 --- a/driver/main.ml +++ b/driver/main.ml @@ -111,6 +111,8 @@ module Options = Main_args.Make_bytecomp_options (struct let _where = print_standard_library let _verbose = set verbose let _nopervasives = set nopervasives + let _dno_unique_ids = unset unique_ids + let _dunique_ids = set unique_ids let _dsource = set dump_source let _dparsetree = set dump_parsetree let _dtypedtree = set dump_typedtree diff --git a/driver/main_args.ml b/driver/main_args.ml index 757c7ac5..47e427eb 100644 --- a/driver/main_args.ml +++ b/driver/main_args.ml @@ -457,7 +457,7 @@ let mk_strict_sequence f = let mk_thread f = "-thread", Arg.Unit f, - " Generate code that supports the system threads library" + " (deprecated) same as -I +threads" ;; let mk_dtimings f = @@ -613,6 +613,14 @@ let mk_drawlambda f = "-drawlambda", Arg.Unit f, " (undocumented)" ;; +let mk_dno_unique_ids f = + "-dno-unique-ids", Arg.Unit f, " (undocumented)" +;; + +let mk_dunique_ids f = + "-dunique-ids", Arg.Unit f, " (undocumented)" +;; + let mk_dsource f = "-dsource", Arg.Unit f, " (undocumented)" ;; @@ -637,6 +645,11 @@ let mk_drawflambda f = "-drawflambda", Arg.Unit f, " Print Flambda terms after closure conversion" ;; +let mk_dflambda_invariants f = + "-dflambda-invariants", Arg.Unit f, " Check Flambda invariants \ + around each pass" +;; + let mk_dflambda_no_invariants f = "-dflambda-no-invariants", Arg.Unit f, " Do not Check Flambda invariants \ around each pass" @@ -809,6 +822,8 @@ module type Common_options = sig val _warn_error : string -> unit val _warn_help : unit -> unit + val _dno_unique_ids : unit -> unit + val _dunique_ids : unit -> unit val _dsource : unit -> unit val _dparsetree : unit -> unit val _dtypedtree : unit -> unit @@ -931,6 +946,7 @@ module type Optcommon_options = sig val _clambda_checks : unit -> unit val _dflambda : unit -> unit val _drawflambda : unit -> unit + val _dflambda_invariants : unit -> unit val _dflambda_no_invariants : unit -> unit val _dflambda_let : int -> unit val _dflambda_verbose : unit -> unit @@ -1082,6 +1098,8 @@ struct mk_nopervasives F._nopervasives; mk_use_prims F._use_prims; + mk_dno_unique_ids F._dno_unique_ids; + mk_dunique_ids F._dunique_ids; mk_dsource F._dsource; mk_dparsetree F._dparsetree; mk_dtypedtree F._dtypedtree; @@ -1139,6 +1157,8 @@ struct mk_warn_help F._warn_help; mk__ F.anonymous; + mk_dno_unique_ids F._dno_unique_ids; + mk_dunique_ids F._dunique_ids; mk_dsource F._dsource; mk_dparsetree F._dparsetree; mk_dtypedtree F._dtypedtree; @@ -1254,6 +1274,8 @@ struct mk__ F.anonymous; mk_nopervasives F._nopervasives; + mk_dno_unique_ids F._dno_unique_ids; + mk_dunique_ids F._dunique_ids; mk_dsource F._dsource; mk_dparsetree F._dparsetree; mk_dtypedtree F._dtypedtree; @@ -1263,6 +1285,7 @@ struct mk_dclambda F._dclambda; mk_dflambda F._dflambda; mk_drawflambda F._drawflambda; + mk_dflambda_invariants F._dflambda_invariants; mk_dflambda_no_invariants F._dflambda_no_invariants; mk_dflambda_let F._dflambda_let; mk_dflambda_verbose F._dflambda_verbose; diff --git a/driver/main_args.mli b/driver/main_args.mli index 3d6db535..4777f6b4 100644 --- a/driver/main_args.mli +++ b/driver/main_args.mli @@ -49,6 +49,8 @@ module type Common_options = sig val _warn_error : string -> unit val _warn_help : unit -> unit + val _dno_unique_ids : unit -> unit + val _dunique_ids : unit -> unit val _dsource : unit -> unit val _dparsetree : unit -> unit val _dtypedtree : unit -> unit @@ -171,6 +173,7 @@ module type Optcommon_options = sig val _clambda_checks : unit -> unit val _dflambda : unit -> unit val _drawflambda : unit -> unit + val _dflambda_invariants : unit -> unit val _dflambda_no_invariants : unit -> unit val _dflambda_let : int -> unit val _dflambda_verbose : unit -> unit diff --git a/driver/makedepend.ml b/driver/makedepend.ml index 6b888a0c..32d6e9d7 100644 --- a/driver/makedepend.ml +++ b/driver/makedepend.ml @@ -464,7 +464,8 @@ let sort_files_by_dependencies files = if !worklist <> [] then begin Format.fprintf Format.err_formatter - "@[Warning: cycle in dependencies. End of list is not sorted.@]@."; + "@[%t: cycle in dependencies. End of list is not sorted.@]@." + Location.print_error_prefix; let sorted_deps = let li = ref [] in Hashtbl.iter (fun _ file_deps -> li := file_deps :: !li) h; @@ -478,6 +479,7 @@ let sort_files_by_dependencies files = ) !deps; Format.fprintf Format.err_formatter "@]@."; Printf.printf "%s " file) sorted_deps; + error_occurred := true end; Printf.printf "\n%!"; () diff --git a/driver/optcompile.ml b/driver/optcompile.ml index c450b5f6..d5b64c0f 100644 --- a/driver/optcompile.ml +++ b/driver/optcompile.ml @@ -41,7 +41,7 @@ let interface ppf sourcefile outputprefix = if !Clflags.dump_typedtree then fprintf ppf "%a@." Printtyped.interface tsg; let sg = tsg.sig_type in if !Clflags.print_types then - Printtyp.wrap_printing_env initial_env (fun () -> + Printtyp.wrap_printing_env ~error:false initial_env (fun () -> fprintf std_formatter "%a@." Printtyp.signature (Typemod.simplify_signature sg)); ignore (Includemod.signatures initial_env sg sg); diff --git a/driver/optmain.ml b/driver/optmain.ml index 33fc848d..44b49304 100644 --- a/driver/optmain.ml +++ b/driver/optmain.ml @@ -195,6 +195,8 @@ module Options = Main_args.Make_optcomp_options (struct let _where () = print_standard_library () let _nopervasives = set nopervasives + let _dno_unique_ids = clear unique_ids + let _dunique_ids = set unique_ids let _dsource = set dump_source let _dparsetree = set dump_parsetree let _dtypedtree = set dump_typedtree @@ -208,6 +210,7 @@ module Options = Main_args.Make_optcomp_options (struct let _dflambda_verbose () = set dump_flambda (); set dump_flambda_verbose () + let _dflambda_invariants = set flambda_invariant_checks let _dflambda_no_invariants = clear flambda_invariant_checks let _dcmm = set dump_cmm let _dsel = set dump_selection diff --git a/emacs/Makefile b/emacs/Makefile index 93b2d7d7..dcb7a958 100644 --- a/emacs/Makefile +++ b/emacs/Makefile @@ -13,6 +13,7 @@ #************************************************************************** include ../config/Makefile +include ../Makefile.common # Files to install FILES= caml-font.el caml-hilit.el caml.el camldebug.el \ @@ -64,7 +65,7 @@ install-el: simple-install: @echo "Installing in $(EMACSDIR)..." if test -d $(EMACSDIR); then : ; else mkdir -p $(EMACSDIR); fi - cp $(FILES) $(EMACSDIR) + $(INSTALL_DATA) $(FILES) $(EMACSDIR) if [ -z "$(NOCOMPILE)" ]; then \ cd $(EMACSDIR); $(EMACS) --batch --eval '$(COMPILECMD)'; \ fi @@ -74,7 +75,7 @@ ocamltags: ocamltags.in chmod a+x ocamltags install-ocamltags: ocamltags - cp ocamltags $(SCRIPTDIR)/ocamltags + $(INSTALL_DATA) ocamltags $(SCRIPTDIR)/ocamltags # This is for testing purposes compile-only: diff --git a/emacs/caml-font-old.el b/emacs/caml-font-old.el index 674beb68..fb39c223 100644 --- a/emacs/caml-font-old.el +++ b/emacs/caml-font-old.el @@ -42,7 +42,7 @@ (setq font-lock-variable-name-face 'DarkGoldenRod) (setq font-lock-type-face 'DarkOliveGreen) (setq font-lock-reference-face 'CadetBlue))) - ; extra faces for documention + ; extra faces for documentation (make-face 'Stop) (set-face-foreground 'Stop "White") (set-face-background 'Stop "Red") diff --git a/emacs/caml-font.el b/emacs/caml-font.el index ac339744..91c6fce9 100644 --- a/emacs/caml-font.el +++ b/emacs/caml-font.el @@ -62,7 +62,7 @@ "when" "while" "with") 'words)) . font-lock-constant-face) - ("\\" + ("\\<\\(raise\\|failwith\\|invalid_arg\\)\\>" . font-lock-comment-face) ;labels (and open) ("\\(\\([~?]\\|\\<\\)[a-z][a-zA-Z0-9_']*:\\)[^:=]" diff --git a/emacs/caml-help.el b/emacs/caml-help.el index 6b1abc65..9e3a221b 100644 --- a/emacs/caml-help.el +++ b/emacs/caml-help.el @@ -25,7 +25,7 @@ ;; This is a preliminary version. ;; ;; Possible improvements? -;; - dump some databaes: Info, Lib, ... +;; - dump some databases: Info, Lib, ... ;; - accept a search path for local libraries instead of current dir ;; (then distinguish between different modules lying in different ;; directories) @@ -33,8 +33,8 @@ ;; ;; Abstract over ;; - the viewing method and the database, so that the documentation for -;; and identifier could be search in -;; * info / html / man / mli's sources +;; an identifier could be +;; * searched in info / html / man / mli's sources ;; * viewed in Emacs or using an external previewer. ;; ;; Take all identifiers (labels, Constructors, exceptions, etc.) @@ -56,7 +56,7 @@ (defvar ocaml-lib-path 'lazy "Path list for ocaml lib sources (mli files). -`lazy' means ask ocaml to find it for your at first use.") +`lazy' means ask ocaml to find it for you at first use.") (defun ocaml-lib-path () "Compute if necessary and return the path for ocaml libs." (if (listp ocaml-lib-path) nil @@ -233,7 +233,7 @@ ocaml-visible-modules) (defun ocaml-open-module (arg) - "*Make module of name ARG visible whe ARG is a string. + "*Make module of name ARG visible when ARG is a string. When call interactively, make completion over known modules." (interactive "P") (if (not (stringp arg)) @@ -335,7 +335,7 @@ with an optional non-nil argument." (defun caml-complete (arg) "Does completion for OCaml identifiers qualified. -It attemps to recognize an qualified identifier Module . entry +It attemps to recognize a qualified identifier Module . entry around point using function \\[ocaml-qualified-identifier]. If Module is defined, it does completion for identifier in Module. @@ -647,14 +647,14 @@ current buffer using \\[ocaml-qualified-identifier]." (defun caml-help (arg) "Find documentation for OCaml qualified identifiers. -It attemps to recognize an qualified identifier of the form +It attempts to recognize a qualified identifier of the form ``Module . entry'' around point using function `ocaml-qualified-identifier'. If Module is undetermined it is temptatively guessed from the identifier name -and according to visible modules. If this is still unsucessful, the user is +and according to visible modules. If this is still unsuccessful, the user is then prompted for a Module name. -The documentation for Module is first seach in the info manual if available, +The documentation for Module is first searched in the info manual, if available, then in the ``module.mli'' source file. The entry is then searched in the documentation. @@ -666,7 +666,7 @@ Prefix arg 0 forces recompilation of visible modules (and their content) from the file content. Prefix arg 4 prompts for Module and identifier instead of guessing values -from the possition of point in the current buffer." +from the position of point in the current buffer." (interactive "p") (delete-overlay ocaml-help-ovl) (let ((module) (entry) (module-entry)) @@ -726,9 +726,9 @@ from the possition of point in the current buffer." (defvar ocaml-links nil "Local links in the current of last info node or interface file. -The car of the list is a key that indentifies the module to prevent +The car of the list is a key that identifies the module to prevent recompilation when next help command is relative to the same module. -The cdr is a list of elments, each of which is an string and a pair of +The cdr is a list of elements, each of which is a string and a pair of buffer positions." ) (make-variable-buffer-local 'ocaml-links) diff --git a/emacs/caml-types.el b/emacs/caml-types.el index cc5d9152..dc01af64 100644 --- a/emacs/caml-types.el +++ b/emacs/caml-types.el @@ -669,7 +669,7 @@ The function uses two overlays. (error (message "End of buffer!"))))) (setq speed (* speed speed))))) ;; main action, when the motion is inside the window - ;; or on orginal button down event + ;; or on original button down event ((or (caml-mouse-movement-p event) (equal original-event event)) (setq cnum (caml-event-point-end event)) @@ -732,7 +732,7 @@ The function uses two overlays. ;; However, it could also be a key stroke before mouse release. ;; Emacs does not allow to test whether mouse is up or down. ;; Not sure it is robust to loop for mouse release after an error - ;; occured, as is done for exploration. + ;; occurred, as is done for exploration. ;; So far, we just ignore next event. (Next line also be uncommenting.) (if event (caml-read-event))))) diff --git a/emacs/caml.el b/emacs/caml.el index def64b91..1945fbad 100644 --- a/emacs/caml.el +++ b/emacs/caml.el @@ -75,179 +75,179 @@ Priorities are assigned to `interesting' caml operators as follows: (make-variable-buffer-local 'caml-apply-extra-indent) (defvar caml-begin-indent 2 - "*How many spaces to indent from a begin keyword in caml mode.") + "*How many spaces to indent from a \"begin\" keyword in caml mode.") (make-variable-buffer-local 'caml-begin-indent) (defvar caml-class-indent 2 - "*How many spaces to indent from a class keyword in caml mode.") + "*How many spaces to indent from a \"class\" keyword in caml mode.") (make-variable-buffer-local 'caml-class-indent) (defvar caml-exception-indent 2 - "*How many spaces to indent from a exception keyword in caml mode.") + "*How many spaces to indent from an \"exception\" keyword in caml mode.") (make-variable-buffer-local 'caml-exception-indent) (defvar caml-for-indent 2 - "*How many spaces to indent from a for keyword in caml mode.") + "*How many spaces to indent from a \"for\" keyword in caml mode.") (make-variable-buffer-local 'caml-for-indent) (defvar caml-fun-indent 2 - "*How many spaces to indent from a fun keyword in caml mode.") + "*How many spaces to indent from a \"fun\" keyword in caml mode.") (make-variable-buffer-local 'caml-fun-indent) (defvar caml-function-indent 4 - "*How many spaces to indent from a function keyword in caml mode.") + "*How many spaces to indent from a \"function\" keyword in caml mode.") (make-variable-buffer-local 'caml-function-indent) (defvar caml-if-indent 2 - "*How many spaces to indent from a if keyword in caml mode.") + "*How many spaces to indent from an \"if\" keyword in caml mode.") (make-variable-buffer-local 'caml-if-indent) (defvar caml-if-else-indent 0 - "*How many spaces to indent from an if .. else line in caml mode.") + "*How many spaces to indent from an \"if .. else\" line in caml mode.") (make-variable-buffer-local 'caml-if-else-indent) (defvar caml-inherit-indent 2 - "*How many spaces to indent from a inherit keyword in caml mode.") + "*How many spaces to indent from an \"inherit\" keyword in caml mode.") (make-variable-buffer-local 'caml-inherit-indent) (defvar caml-initializer-indent 2 - "*How many spaces to indent from a initializer keyword in caml mode.") + "*How many spaces to indent from an \"initializer\" keyword in caml mode.") (make-variable-buffer-local 'caml-initializer-indent) (defvar caml-include-indent 2 - "*How many spaces to indent from a include keyword in caml mode.") + "*How many spaces to indent from an \"include\" keyword in caml mode.") (make-variable-buffer-local 'caml-include-indent) (defvar caml-let-indent 2 - "*How many spaces to indent from a let keyword in caml mode.") + "*How many spaces to indent from a \"let\" keyword in caml mode.") (make-variable-buffer-local 'caml-let-indent) (defvar caml-let-in-indent 0 - "*How many spaces to indent from a let .. in keyword in caml mode.") + "*How many spaces to indent from a \"let .. in\" keyword in caml mode.") (make-variable-buffer-local 'caml-let-in-indent) (defvar caml-match-indent 2 - "*How many spaces to indent from a match keyword in caml mode.") + "*How many spaces to indent from a \"match\" keyword in caml mode.") (make-variable-buffer-local 'caml-match-indent) (defvar caml-method-indent 2 - "*How many spaces to indent from a method keyword in caml mode.") + "*How many spaces to indent from a \"method\" keyword in caml mode.") (make-variable-buffer-local 'caml-method-indent) (defvar caml-module-indent 2 - "*How many spaces to indent from a module keyword in caml mode.") + "*How many spaces to indent from a \"module\" keyword in caml mode.") (make-variable-buffer-local 'caml-module-indent) (defvar caml-object-indent 2 - "*How many spaces to indent from a object keyword in caml mode.") + "*How many spaces to indent from an \"object\" keyword in caml mode.") (make-variable-buffer-local 'caml-object-indent) (defvar caml-of-indent 2 - "*How many spaces to indent from a of keyword in caml mode.") + "*How many spaces to indent from an \"of\" keyword in caml mode.") (make-variable-buffer-local 'caml-of-indent) (defvar caml-parser-indent 4 - "*How many spaces to indent from a parser keyword in caml mode.") + "*How many spaces to indent from a \"parser\" keyword in caml mode.") (make-variable-buffer-local 'caml-parser-indent) (defvar caml-sig-indent 2 - "*How many spaces to indent from a sig keyword in caml mode.") + "*How many spaces to indent from a \"sig\" keyword in caml mode.") (make-variable-buffer-local 'caml-sig-indent) (defvar caml-struct-indent 2 - "*How many spaces to indent from a struct keyword in caml mode.") + "*How many spaces to indent from a \"struct\" keyword in caml mode.") (make-variable-buffer-local 'caml-struct-indent) (defvar caml-try-indent 2 - "*How many spaces to indent from a try keyword in caml mode.") + "*How many spaces to indent from a \"try\" keyword in caml mode.") (make-variable-buffer-local 'caml-try-indent) (defvar caml-type-indent 4 - "*How many spaces to indent from a type keyword in caml mode.") + "*How many spaces to indent from a \"type\" keyword in caml mode.") (make-variable-buffer-local 'caml-type-indent) (defvar caml-val-indent 2 - "*How many spaces to indent from a val keyword in caml mode.") + "*How many spaces to indent from a \"val\" keyword in caml mode.") (make-variable-buffer-local 'caml-val-indent) (defvar caml-while-indent 2 - "*How many spaces to indent from a while keyword in caml mode.") + "*How many spaces to indent from a \"while\" keyword in caml mode.") (make-variable-buffer-local 'caml-while-indent) (defvar caml-::-indent 2 - "*How many spaces to indent from a :: operator in caml mode.") + "*How many spaces to indent from a \"::\" operator in caml mode.") (make-variable-buffer-local 'caml-::-indent) (defvar caml-@-indent 2 - "*How many spaces to indent from a @ operator in caml mode.") + "*How many spaces to indent from a \"@\" operator in caml mode.") (make-variable-buffer-local 'caml-@-indent) (defvar caml-:=-indent 2 - "*How many spaces to indent from a := operator in caml mode.") + "*How many spaces to indent from a \":=\" operator in caml mode.") (make-variable-buffer-local 'caml-:=-indent) (defvar caml-<--indent 2 - "*How many spaces to indent from a <- operator in caml mode.") + "*How many spaces to indent from a \"<-\" operator in caml mode.") (make-variable-buffer-local 'caml-<--indent) (defvar caml-->-indent 2 - "*How many spaces to indent from a -> operator in caml mode.") + "*How many spaces to indent from a \"->\" operator in caml mode.") (make-variable-buffer-local 'caml-->-indent) (defvar caml-lb-indent 2 - "*How many spaces to indent from a \[ operator in caml mode.") + "*How many spaces to indent from a \"\[\" operator in caml mode.") (make-variable-buffer-local 'caml-lb-indent) (defvar caml-lc-indent 2 - "*How many spaces to indent from a \{ operator in caml mode.") + "*How many spaces to indent from a \"\{\" operator in caml mode.") (make-variable-buffer-local 'caml-lc-indent) (defvar caml-lp-indent 1 - "*How many spaces to indent from a \( operator in caml mode.") + "*How many spaces to indent from a \"\(\" operator in caml mode.") (make-variable-buffer-local 'caml-lp-indent) (defvar caml-and-extra-indent nil - "*Extra indent for caml lines starting with the and keyword. + "*Extra indent for caml lines starting with the \"and\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-and-extra-indent) (defvar caml-do-extra-indent nil - "*Extra indent for caml lines starting with the do keyword. + "*Extra indent for caml lines starting with the \"do\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-do-extra-indent) (defvar caml-done-extra-indent nil - "*Extra indent for caml lines starting with the done keyword. + "*Extra indent for caml lines starting with the \"done\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-done-extra-indent) (defvar caml-else-extra-indent nil - "*Extra indent for caml lines starting with the else keyword. + "*Extra indent for caml lines starting with the \"else\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-else-extra-indent) (defvar caml-end-extra-indent nil - "*Extra indent for caml lines starting with the end keyword. + "*Extra indent for caml lines starting with the \"end\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-end-extra-indent) (defvar caml-in-extra-indent nil - "*Extra indent for caml lines starting with the in keyword. + "*Extra indent for caml lines starting with the \"in\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-in-extra-indent) (defvar caml-then-extra-indent nil - "*Extra indent for caml lines starting with the then keyword. + "*Extra indent for caml lines starting with the \"then\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-then-extra-indent) (defvar caml-to-extra-indent -1 - "*Extra indent for caml lines starting with the to keyword. + "*Extra indent for caml lines starting with the \"to\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-to-extra-indent) (defvar caml-with-extra-indent nil - "*Extra indent for caml lines starting with the with keyword. + "*Extra indent for caml lines starting with the \"with\" keyword. Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-with-extra-indent) @@ -261,7 +261,7 @@ Usually negative. nil is align on master.") (make-variable-buffer-local 'caml-|-extra-indent) (defvar caml-rb-extra-indent -2 - "*Extra indent for caml lines statring with ]. + "*Extra indent for caml lines starting with ]. Usually negative. nil is align on master.") (defvar caml-rc-extra-indent -2 @@ -275,13 +275,13 @@ Usually negative. nil is align on master.") (defvar caml-electric-indent t "*Non-nil means electrically indent lines starting with |, ] or }. -Many people find eletric keys irritating, so you can disable them if +Many people find electric keys irritating, so you can disable them if you are one.") (defvar caml-electric-close-vector t "*Non-nil means electrically insert a | before a vector-closing ]. -Many people find eletric keys irritating, so you can disable them if +Many people find electric keys irritating, so you can disable them if you are one. You should probably have this on, though, if you also have caml-electric-indent on, which see.") @@ -623,8 +623,8 @@ have caml-electric-indent on, which see.") (defun caml-eval-phrase (arg &optional min max) "Send the phrase containing the point to the CAML process. With prefix-arg send as many phrases as its numeric value, -If an error occurs during evalutaion, stop at this phrase and -repport the error. +If an error occurs during evaluation, stop at this phrase and +report the error. Return nil if noerror and position of error if any. @@ -1136,7 +1136,7 @@ to the end. (defun caml-in-comment-p () "Returns non-nil if point is inside a caml comment. -Returns nil for the parenthesis openning a comment." +Returns nil for the parenthesis opening a comment." ;;we look for comments differently than literals. there are two ;;reasons for this. first, caml has nested comments and it is not so ;;clear that parse-partial-sexp supports them; second, if proper @@ -1266,7 +1266,7 @@ Used to distinguish it from toplevel let construct.") "Look back for a caml keyword or operator matching KWOP-REGEXP. Second optional argument MIN-POS bounds the search. -Ignore occurences inside literals. If found, return a list of two +Ignore occurrences inside literals. If found, return a list of two values: the actual text of the keyword or operator, and a boolean indicating whether the keyword was one we looked for explicitly {non-nil}, or on the other hand one of the block-terminating @@ -1971,7 +1971,7 @@ with prefix arg, indent that many phrases starting with the current phrase." "Explore type annotations by mouse dragging." t) (autoload 'caml-help "caml-help" - "Show documentation for qualilifed OCaml identifier." t) + "Show documentation for qualified OCaml identifier." t) (autoload 'caml-complete "caml-help" "Does completion for documented qualified OCaml identifier." t) (autoload 'ocaml-open-module "caml-help" diff --git a/emacs/camldebug.el b/emacs/camldebug.el index 674cd320..8b59942f 100644 --- a/emacs/camldebug.el +++ b/emacs/camldebug.el @@ -99,7 +99,7 @@ The following commands are available: the last line referred to in the camldebug buffer. \\[camldebug-step], \\[camldebug-back] and \\[camldebug-next], in the camldebug -window,call camldebug to step, backstep or next and then update the other window +window, call camldebug to step, backstep or next and then update the other window with the current file and position. If you are in a source file, you may select a point to break diff --git a/emacs/inf-caml.el b/emacs/inf-caml.el index 05bf318f..3d2b2dc9 100644 --- a/emacs/inf-caml.el +++ b/emacs/inf-caml.el @@ -23,7 +23,7 @@ ;; User modifiable variables -;; Whether you want the output buffer to be diplayed when you send a phrase +;; Whether you want the output buffer to be displayed when you send a phrase (defvar caml-display-when-eval t "*If true, display the inferior caml buffer when evaluating expressions.") @@ -205,7 +205,7 @@ Input and output via buffer `*inferior-caml*'." (goto-char loc))) -;;; orgininal inf-caml.el ended here +;;; original inf-caml.el ended here ;; as eval-phrase, but ignores errors. @@ -225,16 +225,16 @@ should lies." beg)) (defvar caml-previous-output nil - "tells the beginning of output in the shell-output buffer, so that the -output can be retreived later, asynchronously.") + "Tells the beginning of output in the shell-output buffer, so that the +output can be retrieved later, asynchronously.") -;; enriched version of eval-phrase, to repport errors. +;; enriched version of eval-phrase, to report errors. (defun inferior-caml-eval-phrase (arg &optional min max) "Send the phrase containing the point to the CAML process. With prefix-arg send as many phrases as its numeric value, -If an error occurs during evalutaion, stop at this phrase and -repport the error. +If an error occurs during evaluation, stop at this phrase and +report the error. Return nil if noerror and position of error if any. diff --git a/lex/outputbis.ml b/lex/outputbis.ml index fc8dfac8..37ff25b0 100644 --- a/lex/outputbis.ml +++ b/lex/outputbis.ml @@ -19,89 +19,129 @@ open Printf open Lexgen open Common -let output_auto_defs oc has_refill = - output_string oc - "let __ocaml_lex_init_lexbuf lexbuf mem_size =\ -\n let pos = lexbuf.Lexing.lex_curr_pos in\ -\n lexbuf.Lexing.lex_mem <- Array.make mem_size (-1) ;\ -\n lexbuf.Lexing.lex_start_pos <- pos ;\ -\n lexbuf.Lexing.lex_last_pos <- pos ;\ -\n lexbuf.Lexing.lex_last_action <- -1\ -\n\n\ -" ; - - if has_refill then - output_string oc - "let rec __ocaml_lex_next_char lexbuf state k =\ -\n if lexbuf.Lexing.lex_curr_pos >= lexbuf.Lexing.lex_buffer_len then begin\ -\n if lexbuf.Lexing.lex_eof_reached then\ -\n state lexbuf k 256\ -\n else begin\ -\n __ocaml_lex_refill (fun lexbuf ->\ -\n lexbuf.Lexing.refill_buff lexbuf ;\ -\n __ocaml_lex_next_char lexbuf state k)\ -\n lexbuf\ -\n end\ -\n end else begin\ -\n let i = lexbuf.Lexing.lex_curr_pos in\ -\n let c = Bytes.get lexbuf.Lexing.lex_buffer i in\ -\n lexbuf.Lexing.lex_curr_pos <- i+1 ;\ -\n state lexbuf k (Char.code c)\ -\n end\ -\n\n" - else - output_string oc - "let rec __ocaml_lex_next_char lexbuf =\ -\n if lexbuf.Lexing.lex_curr_pos >= lexbuf.Lexing.lex_buffer_len then begin\ -\n if lexbuf.Lexing.lex_eof_reached then\ -\n 256\ -\n else begin\ -\n lexbuf.Lexing.refill_buff lexbuf ;\ -\n __ocaml_lex_next_char lexbuf\ -\n end\ -\n end else begin\ -\n let i = lexbuf.Lexing.lex_curr_pos in\ -\n let c = Bytes.get lexbuf.Lexing.lex_buffer i in\ -\n lexbuf.Lexing.lex_curr_pos <- i+1 ;\ -\n Char.code c\ -\n end\ -\n\n" - - -let output_pats oc pats = List.iter (fun p -> fprintf oc "|%d" p) pats - -let output_action oc has_refill mems r = - output_memory_actions " " oc mems ; +type ctx = { + oc: out_channel; + has_refill: bool; + goto_state: (ctx -> string -> int -> unit); + last_action: int option; +} + +let pr ctx = fprintf ctx.oc + +let output_auto_defs ctx = + if ctx.has_refill then begin + pr ctx "\n"; + pr ctx "let rec __ocaml_lex_refill_buf lexbuf _buf _len _curr _last _last_action state k =\n"; + pr ctx " if lexbuf.Lexing.lex_eof_reached then\n"; + pr ctx " state lexbuf _last_action _buf _len _curr _last k 256\n"; + pr ctx " else begin\n"; + pr ctx " lexbuf.Lexing.lex_curr_pos <- _curr;\n"; + pr ctx " lexbuf.Lexing.lex_last_pos <- _last;\n"; + pr ctx " __ocaml_lex_refill\n"; + pr ctx " (fun lexbuf ->\n"; + pr ctx " let _curr = lexbuf.Lexing.lex_curr_pos in\n"; + pr ctx " let _last = lexbuf.Lexing.lex_last_pos in\n"; + pr ctx " let _len = lexbuf.Lexing.lex_buffer_len in\n"; + pr ctx " let _buf = lexbuf.Lexing.lex_buffer in\n"; + pr ctx " if _curr < _len then\n"; + pr ctx " state lexbuf _last_action _buf _len (_curr + 1) _last k\n"; + pr ctx " (Char.code (Bytes.unsafe_get _buf _curr))\n"; + pr ctx " else\n"; + pr ctx " __ocaml_lex_refill_buf lexbuf _buf _len _curr _last _last_action\n"; + pr ctx " state k\n"; + pr ctx " )\n"; + pr ctx " lexbuf\n"; + pr ctx " end\n"; + pr ctx "\n"; + end else begin + pr ctx "\n"; + pr ctx "let rec __ocaml_lex_refill_buf lexbuf _buf _len _curr _last =\n"; + pr ctx " if lexbuf.Lexing.lex_eof_reached then\n"; + pr ctx " 256, _buf, _len, _curr, _last\n"; + pr ctx " else begin\n"; + pr ctx " lexbuf.Lexing.lex_curr_pos <- _curr;\n"; + pr ctx " lexbuf.Lexing.lex_last_pos <- _last;\n"; + pr ctx " lexbuf.Lexing.refill_buff lexbuf;\n"; + pr ctx " let _curr = lexbuf.Lexing.lex_curr_pos in\n"; + pr ctx " let _last = lexbuf.Lexing.lex_last_pos in\n"; + pr ctx " let _len = lexbuf.Lexing.lex_buffer_len in\n"; + pr ctx " let _buf = lexbuf.Lexing.lex_buffer in\n"; + pr ctx " if _curr < _len then\n"; + pr ctx " Char.code (Bytes.unsafe_get _buf _curr), _buf, _len, (_curr + 1), _last\n"; + pr ctx " else\n"; + pr ctx " __ocaml_lex_refill_buf lexbuf _buf _len _curr _last\n"; + pr ctx " end\n"; + pr ctx "\n"; + end + +let output_memory_actions pref oc = function + | [] -> () + | mvs -> + output_string oc pref; + output_string oc "(* " ; + fprintf oc "L=%d " (List.length mvs) ; + List.iter + (fun mv -> match mv with + | Copy (tgt, src) -> + fprintf oc "[%d] <- [%d] ;" tgt src + | Set tgt -> + fprintf oc "[%d] <- p ; " tgt) + mvs ; + output_string oc " *)\n" ; + List.iter + (fun mv -> match mv with + | Copy (tgt, src) -> + fprintf oc + "%s%a <- %a ;\n" + pref output_mem_access tgt output_mem_access src + | Set tgt -> + fprintf oc "%s%a <- _curr;\n" + pref output_mem_access tgt) + mvs + +let output_pats ctx = function + | [x] -> pr ctx "| %d" x + | pats -> List.iter (fun p -> pr ctx "|%d" p) pats + +let last_action ctx = + match ctx.last_action with + | None -> "_last_action" + | Some i -> Printf.sprintf "%i (* = last_action *)" i + +let output_action ctx pref mems r = + output_memory_actions pref ctx.oc mems; match r with | Backtrack -> - fprintf oc - " lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_last_pos ;\n" ; - if has_refill then - fprintf oc " k lexbuf lexbuf.Lexing.lex_last_action\n" - else - fprintf oc " lexbuf.Lexing.lex_last_action\n" + pr ctx "%slet _curr = _last in\n\ + %slexbuf.Lexing.lex_curr_pos <- _curr;\n\ + %slexbuf.Lexing.lex_last_pos <- _last;\n" + pref pref pref; + if ctx.has_refill then + pr ctx "%sk lexbuf %s\n" pref (last_action ctx) + else + pr ctx "%s%s\n" pref (last_action ctx) | Goto n -> - fprintf oc " __ocaml_lex_state%d lexbuf%s\n" n - (if has_refill then " k" else "") + ctx.goto_state ctx pref n -let output_pat oc i = +let output_pat ctx i = if i >= 256 then - fprintf oc "|eof" + pr ctx "|eof" else - fprintf oc "|'%s'" (Char.escaped (Char.chr i)) - -let output_clause oc has_refill pats mems r = - fprintf oc "(* " ; - List.iter (output_pat oc) pats ; - fprintf oc " *)\n" ; - fprintf oc " %a ->\n" output_pats pats ; - output_action oc has_refill mems r + pr ctx "|'%s'" (Char.escaped (Char.chr i)) -let output_default_clause oc has_refill mems r = - fprintf oc " | _ ->\n" ; output_action oc has_refill mems r +let output_clause ctx pref pats mems r = + pr ctx "%s(* " pref; + List.iter (output_pat ctx) pats; + pr ctx " *)\n%s" pref; + output_pats ctx pats; + pr ctx " ->\n"; + output_action ctx (" "^pref) mems r +let output_default_clause ctx pref mems r = + pr ctx "%s| _ ->\n" pref; + output_action ctx (" "^pref) mems r -let output_moves oc has_refill moves = +let output_moves ctx pref moves = let t = Hashtbl.create 17 in let add_move i (m,mems) = let mems,r = try Hashtbl.find t m with Not_found -> mems,[] in @@ -126,98 +166,187 @@ let output_moves oc has_refill moves = Hashtbl.iter (fun m (mems,pats) -> if m <> !most_frequent then - output_clause oc has_refill (List.rev pats) mems m) + output_clause ctx pref (List.rev pats) mems m) t ; - output_default_clause oc has_refill !most_mems !most_frequent + output_default_clause ctx pref !most_mems !most_frequent -let output_tag_actions pref oc mvs = - output_string oc "(*" ; +let output_tag_actions pref ctx mvs = + pr ctx "%s(*" pref; List.iter (fun i -> match i with - | SetTag (t,m) -> fprintf oc " t%d <- [%d] ;" t m - | EraseTag t -> fprintf oc " t%d <- -1 ;" t) + | SetTag (t,m) -> pr ctx " t%d <- [%d] ;" t m + | EraseTag t -> pr ctx " t%d <- -1 ;" t) mvs ; - output_string oc " *)\n" ; + pr ctx " *)\n" ; List.iter (fun i -> match i with | SetTag (t,m) -> - fprintf oc "%s%a <- %a ;\n" + pr ctx "%s%a <- %a ;\n" pref output_mem_access t output_mem_access m | EraseTag t -> - fprintf oc "%s%a <- -1 ;\n" + pr ctx "%s%a <- -1 ;\n" pref output_mem_access t) mvs -let output_trans pref oc has_refill i trans = - let entry = sprintf "__ocaml_lex_state%d" i in - fprintf oc "%s %s lexbuf %s= " pref entry - (if has_refill then "k " else ""); - match trans with +let output_trans_body pref ctx = function | Perform (n,mvs) -> - output_tag_actions " " oc mvs ; - fprintf oc " %s%d\n" - (if has_refill then "k lexbuf " else "") - n + output_tag_actions pref ctx mvs ; + pr ctx "%slexbuf.Lexing.lex_curr_pos <- _curr;\n" pref; + pr ctx "%slexbuf.Lexing.lex_last_pos <- _last;\n" pref; + pr ctx "%s%s%d\n" pref (if ctx.has_refill then "k lexbuf " else "") n | Shift (trans, move) -> - begin match trans with - | Remember (n,mvs) -> - output_tag_actions " " oc mvs ; - fprintf oc - " lexbuf.Lexing.lex_last_pos <- lexbuf.Lexing.lex_curr_pos ;\n" ; - fprintf oc " lexbuf.Lexing.lex_last_action <- %d ;\n" n; - | No_remember -> () - end; - if has_refill then - let next = entry ^ "_next" in - fprintf oc " __ocaml_lex_next_char lexbuf %s k\n" next; - fprintf oc "and %s lexbuf k = function " next - else - output_string oc "match __ocaml_lex_next_char lexbuf with\n"; - output_moves oc has_refill move - -let output_automata oc has_refill auto = - output_auto_defs oc has_refill; + let ctx = + match trans with + | Remember (n,mvs) -> + output_tag_actions pref ctx mvs ; + pr ctx "%slet _last = _curr in\n" pref; + begin match ctx.last_action with + | Some i when i = n -> + pr ctx "%s(* let _last_action = %d in*)\n" pref n; + ctx + | _ -> + pr ctx "%slet _last_action = %d in\n" pref n; + {ctx with last_action = Some n} + end + | No_remember -> + ctx + in + if ctx.has_refill then begin + (* TODO: bind this 'state' function at toplevel instead *) + pr ctx + "%slet state lexbuf _last_action _buf _len _curr _last k = function\n" + pref; + output_moves ctx pref move; + pr ctx "%sin\n\ + %sif _curr >= _len then\n\ + %s __ocaml_lex_refill_buf lexbuf _buf _len _curr _last _last_action state k\n\ + %selse\n\ + %s state lexbuf _last_action _buf _len (_curr + 1) _last k\n\ + %s (Char.code (Bytes.unsafe_get _buf _curr))\n" + pref pref pref pref pref pref + end + else begin + pr ctx "%slet next_char, _buf, _len, _curr, _last =\n\ + %s if _curr >= _len then\n\ + %s __ocaml_lex_refill_buf lexbuf _buf _len _curr _last\n\ + %s else\n\ + %s Char.code (Bytes.unsafe_get _buf _curr),\n\ + %s _buf, _len, (_curr + 1), _last\n\ + %sin\n\ + %sbegin match next_char with\n" + pref pref pref pref pref pref pref pref; + output_moves ctx (pref ^ " ") move; + pr ctx "%send\n" pref + end + +let output_automata ctx auto inline = + output_auto_defs ctx; let n = Array.length auto in - output_trans "let rec" oc has_refill 0 auto.(0) ; - for i = 1 to n-1 do - output_trans "\nand" oc has_refill i auto.(i) - done ; - output_char oc '\n' + let first = ref true in + for i = 0 to n-1 do + if not inline.(i) then begin + pr ctx + "%s __ocaml_lex_state%d lexbuf _last_action _buf _len _curr _last %s=\n" + (if !first then "let rec" else "\nand") + i + (if ctx.has_refill then "k " else ""); + output_trans_body " " ctx auto.(i); + first := false; + end + done; + pr ctx "\n\n" (* Output the entries *) -let output_entry ic oc has_refill tr e = - let init_num, init_moves = e.auto_initial_state in - fprintf oc "%s %alexbuf =\n __ocaml_lex_init_lexbuf lexbuf %d; %a" - e.auto_name output_args e.auto_args - e.auto_mem_size - (output_memory_actions " ") init_moves; - fprintf oc - (if has_refill - then "\n __ocaml_lex_state%d lexbuf (fun lexbuf __ocaml_lex_result ->" - else "\n let __ocaml_lex_result = __ocaml_lex_state%d lexbuf in") - init_num; - output_string oc "\ -\n lexbuf.Lexing.lex_start_p <- lexbuf.Lexing.lex_curr_p;\ -\n lexbuf.Lexing.lex_curr_p <- {lexbuf.Lexing.lex_curr_p with\ -\n Lexing.pos_cnum = lexbuf.Lexing.lex_abs_pos+lexbuf.Lexing.lex_curr_pos};\ -\n match __ocaml_lex_result with\n"; +let output_init ctx pref e init_moves = + if e.auto_mem_size > 0 then + pr ctx "%slexbuf.Lexing.lex_mem <- Array.make %d (-1);\n" + pref e.auto_mem_size; + pr ctx "%slet _curr = lexbuf.Lexing.lex_curr_pos in\n" pref; + pr ctx "%slet _last = _curr in\n" pref; + pr ctx "%slet _len = lexbuf.Lexing.lex_buffer_len in\n" pref; + pr ctx "%slet _buf = lexbuf.Lexing.lex_buffer in\n" pref; + pr ctx "%slet _last_action = -1 in\n" pref; + pr ctx "%slexbuf.Lexing.lex_start_pos <- _curr;\n" pref; + output_memory_actions pref ctx.oc init_moves + +let output_rules ic ctx pref tr e = + pr ctx "%sbegin\n" pref; + pr ctx "%s let _curr_p = lexbuf.Lexing.lex_curr_p in\n" pref; + pr ctx "%s if _curr_p != Lexing.dummy_pos then begin\n" pref; + pr ctx "%s lexbuf.Lexing.lex_start_p <- _curr_p;\n" pref; + pr ctx "%s lexbuf.Lexing.lex_curr_p <-\n" pref; + pr ctx "%s {_curr_p with Lexing.pos_cnum =\n" pref; + pr ctx "%s lexbuf.Lexing.lex_abs_pos+lexbuf.Lexing.lex_curr_pos}\n" pref; + pr ctx "%s end\n" pref; + pr ctx "%send;\n" pref; + pr ctx "%smatch __ocaml_lex_result with\n" pref; List.iter (fun (num, env, loc) -> - fprintf oc " | "; - fprintf oc "%d ->\n" num; - output_env ic oc tr env ; - copy_chunk ic oc tr loc true; - fprintf oc "\n") + pr ctx "%s| %d ->\n" pref num; + output_env ic ctx.oc tr env; + copy_chunk ic ctx.oc tr loc true; + pr ctx "\n") e.auto_actions; - fprintf oc " | _ -> raise (Failure \"lexing: empty token\")\n"; - if has_refill then - output_string oc " )\n\n" - else - output_string oc "\n\n" + pr ctx "%s| _ -> raise (Failure \"lexing: empty token\")\n" pref + +let output_entry ic ctx tr e = + let init_num, init_moves = e.auto_initial_state in + pr ctx "%s %alexbuf =\n" e.auto_name output_args e.auto_args; + + if ctx.has_refill then begin + pr ctx " let k lexbuf __ocaml_lex_result =\n"; + output_rules ic ctx " " tr e; + pr ctx " in\n"; + output_init ctx " " e init_moves; + ctx.goto_state ctx " " init_num + end else begin + pr ctx " let __ocaml_lex_result =\n"; + output_init ctx " " e init_moves; + ctx.goto_state ctx " " init_num; + pr ctx " in\n"; + output_rules ic ctx " " tr e + end; + pr ctx "\n\n" + + +(* Determine which states to inline *) +let choose_inlining entry_points transitions = + let counters = Array.make (Array.length transitions) 0 in + let count i = counters.(i) <- counters.(i) + 1 in + List.iter (fun e -> count (fst e.auto_initial_state)) entry_points; + Array.iter + (function + | Shift (_, a) -> + let tbl = Hashtbl.create 8 in + Array.iter + (function + | (Goto i, _) when not (Hashtbl.mem tbl i) -> + Hashtbl.add tbl i (); count i + | _ -> () + ) + a + | Perform _ -> () + ) + transitions; + Array.mapi + (fun i -> function + | Perform _ -> true + | Shift _ -> counters.(i) = 1 + ) + transitions + +let goto_state inline transitions ctx pref n = + if inline.(n) then + output_trans_body pref ctx transitions.(n) + else + pr ctx "%s__ocaml_lex_state%d lexbuf %s _buf _len _curr _last%s\n" + pref n + (last_action ctx) + (if ctx.has_refill then " k" else "") (* Main output function *) @@ -226,15 +355,24 @@ let output_lexdef ic oc tr header rh copy_chunk ic oc tr header false; let has_refill = output_refill_handler ic oc tr rh in - output_automata oc has_refill transitions; + let inline = choose_inlining entry_points transitions in + let ctx = + { + has_refill; + oc; + goto_state = goto_state inline transitions; + last_action = None; + } + in + output_automata ctx transitions inline; begin match entry_points with [] -> () | entry1 :: entries -> output_string oc "let rec "; - output_entry ic oc has_refill tr entry1; + output_entry ic ctx tr entry1; List.iter (fun e -> output_string oc "and "; - output_entry ic oc has_refill tr e) + output_entry ic ctx tr e) entries; output_string oc ";;\n\n"; end; diff --git a/man/ocaml.m b/man/ocaml.m index 3b20ada0..edfc2b6b 100644 --- a/man/ocaml.m +++ b/man/ocaml.m @@ -170,8 +170,7 @@ are supported. .B \-safe\-string Enforce the separation between types .BR string \ and\ bytes , -thereby making strings read-only. This will become the default in -a future version of OCaml. +thereby making strings read-only. This is the default. .TP .B \-short\-paths When a type is visible under several module-paths, use the shortest @@ -207,9 +206,9 @@ accesses an array or string outside of its bounds. .B \-unsafe\-string Identify the types .BR string \ and\ bytes , -thereby making strings writable. For reasons of backward compatibility, -this is the default setting for the moment, but this will change in a future -version of OCaml. +thereby making strings writable. +This is intended for compatibility with old source code and should not +be used with new software. .TP .B \-version Print version string and exit. diff --git a/man/ocamlc.m b/man/ocamlc.m index 532397e5..810e821a 100644 --- a/man/ocamlc.m +++ b/man/ocamlc.m @@ -611,8 +611,7 @@ suffix is supported and gives a debug version of the runtime. .B \-safe\-string Enforce the separation between types .BR string \ and\ bytes , -thereby making strings read-only. This will become the default in -a future version of OCaml. +thereby making strings read-only. This is the default. .TP .B \-short\-paths When a type is visible under several module-paths, use the shortest @@ -622,11 +621,6 @@ warning messages. .B \-strict\-sequence Force the left-hand part of each sequence to have type unit. .TP -.B \-thread -Compile or link multithreaded programs, in combination with the -system "threads" library described in -.IR The\ OCaml\ user's\ manual . -.TP .B \-unboxed\-types When a type is unboxable (i.e. a record with a single argument or a concrete datatype with a single constructor of one argument) it will @@ -650,9 +644,9 @@ accesses an array or string outside of its bounds. .B \-unsafe\-string Identify the types .BR string \ and\ bytes , -thereby making strings writable. For reasons of backward compatibility, -this is the default setting for the moment, but this will change in a future -version of OCaml. +thereby making strings writable. +This is intended for compatibility with old source code and should not +be used with new software. .TP .BI \-use\-runtime \ runtime\-name Generate a bytecode executable file that can be executed on the custom diff --git a/man/ocamlopt.m b/man/ocamlopt.m index f2a2b208..9bd34f4f 100644 --- a/man/ocamlopt.m +++ b/man/ocamlopt.m @@ -360,6 +360,12 @@ setting the option ensures that this module will always be linked if it is put in a library and this library is linked. .TP +.B \-linscan +Use linear scan register allocation. Compiling with this allocator is faster +than with the usual graph coloring allocator, sometimes quite drastically so for +long functions and modules. On the other hand, the generated code can be a bit +slower. +.TP .B \-no-alias-deps Do not record dependencies for module aliases. .TP @@ -554,8 +560,7 @@ is saved in the file .B \-safe\-string Enforce the separation between types .BR string \ and\ bytes , -thereby making strings read-only. This will become the default in -a future version of OCaml. +thereby making strings read-only. This is the default. .TP .B \-shared Build a plugin (usually .cmxs) that can be dynamically loaded with @@ -583,11 +588,6 @@ warning messages. .B \-strict\-sequence The left-hand part of a sequence must have type unit. .TP -.B \-thread -Compile or link multithreaded programs, in combination with the -system threads library described in -.IR "The OCaml user's manual" . -.TP .B \-unboxed\-types When a type is unboxable (i.e. a record with a single argument or a concrete datatype with a single constructor of one argument) it will @@ -618,9 +618,9 @@ exception. .B \-unsafe\-string Identify the types .BR string \ and\ bytes , -thereby making strings writable. For reasons of backward compatibility, -this is the default setting for the moment, but this will change in a future -version of OCaml. +thereby making strings writable. +This is intended for compatibility with old source code and should not +be used with new software. .TP .B \-v Print the version number of the compiler and the location of the diff --git a/manual/LICENSE-for-the-manual b/manual/LICENSE-for-the-manual new file mode 100644 index 00000000..c104a053 --- /dev/null +++ b/manual/LICENSE-for-the-manual @@ -0,0 +1,20 @@ +The present documentation is copyright Institut National de Recherche +en Informatique et en Automatique (INRIA). + +The OCaml documentation and user's manual may be reproduced and +distributed in whole or in part, subject to the following conditions: + +- The copyright notice above and this permission notice must be + preserved complete on all complete or partial copies. + +- Any translation or derivative work of the OCaml documentation and + user's manual must be approved by the authors in writing before + distribution. + +- If you distribute the OCaml documentation and user's manual in part, + instructions for obtaining the complete version of this manual must + be included, and a means for obtaining a complete version provided. + +- Small portions may be reproduced as illustrations for reviews or + quotes in other works without this permission notice if proper + citation is given. diff --git a/manual/Makefile b/manual/Makefile new file mode 100644 index 00000000..d866494d --- /dev/null +++ b/manual/Makefile @@ -0,0 +1,32 @@ +all: tools + cd manual; ${MAKE} all + ${MAKE} tests +# cd fpcl; ${MAKE} all + +clean: + cd manual; ${MAKE} clean + cd tools; ${MAKE} clean +# cd fpcl; ${MAKE} clean + +release: + cd manual; ${MAKE} release +# cd fpcl; ${MAKE} release + +.PHONY: tools +tools: + cd tools; ${MAKE} clean; ${MAKE} all + +# The pregen-etex target generates the latex files from the .etex +# files to ensure that this phase of the manual build process, which +# may execute OCaml fragments and expect certain outputs, is correct +pregen-etex: tools + cd manual; $(MAKE) etex-files + +# pregen builds both .etex files and the documentation of the standard library +pregen: tools + cd manual; $(MAKE) files + +# test the consistency of the manual and the compiler source +.PHONY:tests +tests: + ${MAKE} -C tests all diff --git a/manual/README.md b/manual/README.md new file mode 100644 index 00000000..26f818c0 --- /dev/null +++ b/manual/README.md @@ -0,0 +1,252 @@ +OCAML DOCUMENTATION +=================== + +Prerequisites +------------- + +- Any prerequisites required to build OCaml from sources. + +- The Unix editor 'ed', no longer installed by default on some systems. + +- A LaTeX installation. + +- The HeVeA LaTeX-to-HTML convertor (available in OPAM): + + +Note that you must make sure `hevea.sty` is installed into TeX properly. Your +package manager may not do this for you. Run `kpsewhich hevea.sty` to check. + + +Building +-------- + +0. Install the OCaml distribution. + +1. Run `make` in the manual. + +NB: If you already set `LD_LIBRARY_PATH` (OS X: `DYLD_LIBRARY_PATH`) + in your environment don't forget to add + `otherlibs/unix:otherlibs/str` to it in an absolute way. + +Outputs +------- + +In the manual: + +- The HTML Manual is in directory `htmlman`. The main file is `index.html`. + +- The plain text manual is in directory `textman` as file `manual.txt`. + +- The Info manual is in directory `infoman`. + +- The DVI manual is in directory `texstuff` as file `manual.dvi`. + +- The PDF manual is in directory `texstuff` as file `pdfmanual.pdf`. + +Source files +------------ +The manual is written in an extended dialect of latex and is split in many +source files. During the build process, the sources files are converted into +classical latex file using the tools available in `tools`. These files are +then converted to the different output formats using either latex or hevea. + +Each part of the manual corresponds to a specific directory, and each distinct +chapters (or sometimes sections) are mapped to a distinct `.etex` file: + +- Part I, Introduction to OCaml: `tutorials` + - The core language: `coreexamples.etex` + - The module system: `moduleexamples.etex` + - Objects in OCaml: `objectexamples.etex` + - Labels and variants: `lablexamples.etex` + - Advanced examples with classes and modules: `advexamples.etex` + +- Part II, The OCaml language: `refman` + This part is separated in two very distinct chapters; the + `OCaml language` chapter and the `Language extensions` chapter. + + - The OCaml language: `refman.etex` + This chapter consists in a technical description of the OCaml language. + Each section of this chapter is mapped to a separated latex file: + - `lex.etex`, `values.etex`, `names.etex`, `types.etex`, `const.etex`, + `patterns.etex`, `expr.etex`, `typedecl.etex`, `classes.etex`, + `modtypes.etex`, `compunit.etex` + + - Language extensions: `exten.etex` + This chapter contains a description of all recent features of the OCaml + language. + +- Part III, The OCaml tools: 'cmds' + - Batch compilation (ocamlc): `comp.etex` + - The toplevel system (ocaml): `top.etex` + - The runtime system (ocamlrun): `runtime.etex` + - Native-code compilation (ocamlopt): `native.etex` + - Lexer and parser generators (ocamllex, ocamlyacc): `lexyacc.etex` + - Dependency generator (ocamldep): `ocamldep.etex` + - The browser/editor (ocamlbrowser): `browser.etex` + - The documentation generator (ocamldoc): `ocamldoc.etex` + - The debugger (ocamldebug): `debugger.etex` + - Profiling (ocamlprof): `profil.etex` + - The ocamlbuild compilation manager: `ocamlbuild.etex` + - Interfacing C with OCaml: `intf-c.etex` + - Optimisation with Flambda: `flambda.etex` + - Memory profiling with Spacetime: `spacetime.etex` + - Fuzzing with afl-fuzz: `afl-fuzz.etex` + +Note that ocamlc,ocamlopt and the toplevel options overlap a lot. +Consequently, these options are described together in the file +`unified-options.etex` and then included from `comp.etex`, `native.etex`, +and `top.etex`. If you need to update this list of options, the top comment +of `unified-options.etex` contains the relevant information. + +- Part IV, The OCaml library: 'libref' + This parts contains an brief presentation of all libraries bundled with the + compilers and the api documentation generated for these libraries. + - The core library: `core.etex` + - The standard library: `stdlib.etex` + - The compiler front-end: `compilerlibs.etex` + - The unix library: Unix system calls: `libunix.etex` + - The legacy num library: this library has been removed from the core + distribution, see `libnum.etex` + - The str library: regular expressions and string processing: `libstr.etex` + - The threads library: `libthreads.etex` + - The graphics library: `libgraph.etex` + - The dynlink library: dynamic loading and linking of object files: + `libdynlink.etex` + - The bigarray library: `libbigarray.etex` + +Latex extensions +---------------- + +### Caml environments + +The tool `tool/caml-tex2` is used to generate the latex code for the examples +in the introduction and language extension parts of the manual. It implements +two pseudo-environments: `caml_example` and `caml_eval`. + +The pseudo-environment `caml_example` evaluates its contents using an ocaml +interpreter and then translates both the input code and the interpreter output +to latex code, e.g. +```latex +\begin{caml_example}{toplevel} +let f x = x;; +\end{caml_example} +``` +Note that the toplevel output can be suppressed by using a `*` suffix: +```latex +\begin{caml_example*}{verbatim} +let f x = x +\end{caml_example*} +``` + +The `{verbatim}` or `{toplevel}` argument of the environment corresponds +to the the mode of the example, two modes are available `toplevel` and +`verbatim`. +The `toplevel` mode mimics the appearance and behavior of the toplevel. +In particular, toplevel examples must end with a double semi-colon `;;`, +otherwise an error would be raised. +The `verbatim` does not require a final `;;` and is intended to be +a lighter mode for code examples. + +By default, `caml_tex2` raises an error and stops if the output of one +the `caml_example` environment contains an unexpected error or warning. +If such an error or warning is, in fact, expected, it is necessary to +indicate the expected output status to `caml_tex2` by adding either +an option to the `caml_example` environment: +```latex +\begin{caml_example}{toplevel}[error] +1 + 2. ;; +\end{caml_example} + or for warning +\begin{caml_example}[warning=8] +let f None = None;; +\end{caml_example} +``` +or an annotation to the concerned phrase: + +```latex +\begin{caml_example}{toplevel} +1 + 2. [@@expect error] ;; +let f None = None [@@expect warning 8];; +3 + 4 [@@expect ok];; +\end{caml_example} +``` + +It is also possible to elide a code fragment by annotating it with +an `[@ellipsis]` attribute + +```latex +\begin{caml_example}{toplevel} +let f: type a. a list -> int = List.length[@ellipsis] ;; +\end{caml_example} +``` +For module components, it might be easier to hide them by using +`[@@@ellipsis.start]` and `[@@@ellipsis.stop]`: +```latex +\begin{caml_example*}{verbatim} +module M = struct + [@@@ellipsis.start] + type t = T + let x = 0 + [@@@ellipsis.stop] + end +\end{caml_example*} +``` + +Another possibility to avoid displaying distracting code is to use +the `caml_eval` environment. This environment is a companion environment +to `caml_example` and can be used to evaluate OCaml expressions in the +toplevel without printing anything: +```latex +\begin{caml_eval} +let pi = 4. *. atan 1.;; +\end{caml_eval} +\begin{caml_example}{toplevel} +let f x = x +. pi;; +\end{caml_example} +``` +Beware that the detection code for these pseudo-environments is quite brittle +and the environments must start and end at the beginning of the line. + +### Quoting + +The tool `tools/texquote2` provides support for verbatim-like quotes using +`\"` delimiters. More precisely, outside of caml environments and verbatim +environments, `texquote2` translates double quotes `"text"` to +`\machine{escaped_text}`. + +### BNF grammar notation + +The tool `tools/transf` provides support for BNF grammar notations and special +quotes for non-terminal. When transf is used, the environment `syntax` can +be used to describe grammars using BNF notation: +```latex +\begin{syntax} +expr: + value-path + | constant + | '(' expr ')' + | 'begin' expr 'end' + | '(' expr ':' typexpr ')' + | expr {{',' expr}} + | constr expr + | "`"tag-name expr + | expr '::' expr + | '[' expr { ';' expr } [';'] ']' + | '[|' expr { ';' expr } [';'] '|]' + | '{' field [':' typexpr] '=' expr% + { ';' field [':' typexpr] '=' expr } [';'] '}' +\end{syntax} +``` +Notice that terminal symbols are quoted using `'` delimiters. +Moreover, outside of the syntax environment, `@`-quotes can be used +to introduce fragment of grammar: `@'(' module-expr ')'@`. As a consequence, +when this extension is used `@` characters must be escaped as `\@`. +This extension is used mainly in the language reference part of the manual. +and a more complete description of the notation used is available in the +first subsection of `refman/refman.etex`. + +Consistency tests +----------------- + +The `tests` folder contains consistency tests that checks that the manual +and the rest of the compiler sources stay synced. diff --git a/manual/manual/.gitignore b/manual/manual/.gitignore new file mode 100644 index 00000000..71605a70 --- /dev/null +++ b/manual/manual/.gitignore @@ -0,0 +1,8 @@ +allfiles.tex +biblio.tex +foreword.tex +version.tex +warnings.etex +warnings.tex +foreword.htex +manual.html diff --git a/manual/manual/Makefile b/manual/manual/Makefile new file mode 100644 index 00000000..9cc3d819 --- /dev/null +++ b/manual/manual/Makefile @@ -0,0 +1,143 @@ +# $Id$ + +FILES=allfiles.tex biblio.tex foreword.tex version.tex warnings-help.etex +TEXINPUTS=.:..:../refman:../library:../cmds:../tutorials:../../styles: +TEXFONTS=../../styles: +RELEASE=$$HOME/release/$${RELEASENAME} +HEVEA=hevea +HACHA=hacha +INFO=-fix -exec xxdate.exe -info -w 79 +HTML=-fix -exec xxdate.exe -O +TEXT=-fix -exec xxdate.exe -text -w 79 +SRC = $(abspath ../../) + +export LD_LIBRARY_PATH ?= $(SRC)/otherlibs/unix/:$(SRC)/otherlibs/str/ +export DYLD_LIBRARY_PATH ?= $(SRC)/otherlibs/unix/:$(SRC)/otherlibs/str/ +SET_LD_PATH=CAML_LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) + +OCAMLDOC=$(if $(wildcard $(SRC)/ocamldoc/ocamldoc.opt),\ + $(SRC)/ocamldoc/ocamldoc.opt,\ + $(SET_LD_PATH) $(SRC)/byterun/ocamlrun $(SRC)/ocamldoc/ocamldoc)\ + -hide Pervasives -nostdlib -initially-opened-module Pervasives + +manual: files + cd texstuff; \ + TEXINPUTS=$(TEXINPUTS) latex manual.tex + +index:: + cd texstuff && \ + sh ../../tools/fix_index.sh manual.idx && \ + makeindex manual.idx + cd texstuff; makeindex manual.kwd.idx + +pdfmanual: files + cd texstuff; \ + TEXINPUTS=$(TEXINPUTS) pdflatex pdfmanual.tex + +index:: + cd texstuff && \ + sh ../../tools/fix_index.sh pdfmanual.idx && \ + makeindex pdfmanual.idx + cd texstuff; makeindex pdfmanual.kwd.idx + + +# Copy and unprefix the standard library when needed +include $(SRC)/ocamldoc/Makefile.unprefix + +html: files $(STDLIB_CMIS) + cd htmlman; \ + mkdir -p libref ; \ + $(OCAMLDOC) -colorize-code -sort -html \ + -d libref \ + -I $(STDLIB_UNPREFIXED) \ + $(STDLIB_MLIS) ; \ + cp -f ../style.css libref ; \ + ${HEVEA} ${HTML} -I .. -I ../refman -I ../library -I ../cmds \ + -I ../tutorials -I ../../styles -I ../texstuff manual.hva \ + -e macros.tex ../manual.tex ; \ + ${HACHA} -tocter manual.html ; \ + +info: files + cd infoman; rm -f ocaml.info*; \ + ${HEVEA} ${INFO} -o ocaml.info.body -I .. -I ../refman -I ../library \ + -I ../cmds -I ../tutorials -I ../../styles -I ../texstuff \ + ../manual.inf -e macros.tex ../manual.tex + cat manual.info.header infoman/ocaml.info.body > infoman/ocaml.info + cd infoman; rm -f ocaml.info.tmp ocaml.info.body ; gzip -9 ocaml.info* + +text: files + cd textman; \ + ${HEVEA} ${TEXT} -I .. -I ../refman -I ../library -I ../cmds \ + -I ../tutorials -I ../../styles -I ../texstuff \ + ../manual.inf -e macros.tex ../manual.tex + +etex-files: $(FILES) + cd refman; $(MAKE) etex-files RELEASEDIR=$(SRC) + cd library; $(MAKE) etex-files RELEASEDIR=$(SRC) + cd cmds; $(MAKE) etex-files RELEASEDIR=$(SRC) + cd tutorials; $(MAKE) etex-files RELEASEDIR=$(SRC) + +files: $(FILES) + cd refman; $(MAKE) all RELEASEDIR=$(SRC) + cd library; $(MAKE) all RELEASEDIR=$(SRC) + cd cmds; $(MAKE) all RELEASEDIR=$(SRC) + cd tutorials; $(MAKE) all RELEASEDIR=$(SRC) + +all: + $(MAKE) manual pdfmanual RELEASEDIR=$(SRC) + $(MAKE) manual pdfmanual RELEASEDIR=$(SRC) + $(MAKE) index RELEASEDIR=$(SRC) + $(MAKE) manual pdfmanual RELEASEDIR=$(SRC) + $(MAKE) html text info RELEASEDIR=$(SRC) + +clean: + rm -f $(FILES) + cd refman; $(MAKE) clean + cd library; $(MAKE) clean + cd cmds; $(MAKE) clean + cd tutorials; $(MAKE) clean + -rm -f texstuff/* + cd htmlman; rm -rf libref index.html manual*.html *.haux *.hind + cd textman; rm -f manual.txt *.haux *.hind + cd infoman; rm -f ocaml.info ocaml.info-* *.haux *.hind + rm -f warnings-help.etex + +release: + gzip < texstuff/manual.dvi > $(RELEASE)refman.dvi.gz + dvips -o '!gzip > $(RELEASE)refman.ps.gz' texstuff/manual.dvi + cp htmlman/manual.html $(RELEASE)refman.html + rm -f htmlman/manual.{html,haux,hmanual*,htoc} + tar zcf $(RELEASE)refman-html.tar.gz htmlman/*.* htmlman/libref + zip -8 $(RELEASE)refman-html.zip htmlman/*.* htmlman/libref/*.* + cp texstuff/pdfmanual.pdf $(RELEASE)refman.pdf + cp textman/manual.txt $(RELEASE)refman.txt + tar cf - infoman/ocaml.info* | gzip > $(RELEASE)refman.info.tar.gz + +.SUFFIXES: +.SUFFIXES: .tex .etex .htex + + +.etex.tex: + ../tools/texquote2 < $*.etex > $*.tex + +version.tex: $(SRC)/VERSION + sed -n -e '1s/^\([0-9]*\.[0-9]*\).*$$/\\def\\ocamlversion{\1}/p' \ + $(SRC)/VERSION > version.tex + +warnings-help.etex: $(SRC)/utils/warnings.ml $(SRC)/ocamlc + (echo "% This file is generated from (ocamlc -warn-help)";\ + echo "% according to a rule in manual/manual/Makefile.";\ + echo "% In particular, the reference to documentation sections";\ + echo "% are inserted through the Makefile, which should be updated";\ + echo "% when a new warning is documented.";\ + echo "%";\ + $(SET_LD_PATH) $(SRC)/boot/ocamlrun $(SRC)/ocamlc -warn-help \ + | sed -e 's/^ *\([0-9A-Z][0-9]*\)\(.*\)/\\item[\1] \2/'\ + ) >$@ +# sed --inplace is not portable, emulate + for i in 52 57; do\ + sed\ + s'/\\item\['$$i'\]/\\item\['$$i' (see \\ref{ss:warn'$$i'})\]/'\ + $@ > $@.tmp;\ + mv $@.tmp $@;\ + done diff --git a/manual/manual/allfiles.etex b/manual/manual/allfiles.etex new file mode 100644 index 00000000..7380648f --- /dev/null +++ b/manual/manual/allfiles.etex @@ -0,0 +1,110 @@ +\makeindex{\jobname} +\makeindex{\jobname.kwd} + +\setlength{\emergencystretch}{50pt} % pour que TeX resolve les overfull hbox lui-meme + +\begin{document} + +\thispagestyle{empty} +\begin{maintitle} +~\vfill +\Huge The OCaml system \\ + release \ocamlversion \\[1cm] +\Large Documentation and user's manual \\[1cm] +\large Xavier Leroy, \\ + Damien Doligez, Alain Frisch, Jacques Garrigue, Didier Rémy and Jérôme Vouillon \\[1cm] + \today \\ + ~ +\vfill +\normalsize Copyright \copyright\ \number\year\ Institut National de + Recherche en Informatique et en Automatique +\end{maintitle} +\cleardoublepage +\setcounter{page}{1} + + +\begin{htmlonly} +\begin{quote} +\rule{}{} +This manual is also available in +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.pdf}{PDF}. +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.ps.gz}{Postscript}, +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.dvi.gz}{DVI}, +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.txt}{plain text}, +as a +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman-html.tar.gz}{bundle of HTML files}, +and as a +\ahref{http://caml.inria.fr/distrib/ocaml-\ocamlversion/ocaml-\ocamlversion-refman.info.tar.gz}{bundle of Emacs Info files}. +\rule{}{} +\end{quote} +\end{htmlonly} + +\tableofcontents + +\input{foreword.tex} + +\part{An introduction to OCaml} +\label{p:tutorials} +\input{coreexamples.tex} +\input{moduleexamples.tex} +\input{objectexamples.tex} +\input{lablexamples.tex} +\input{polymorphism.tex} +\input{advexamples.tex} + +\part{The OCaml language} +\label{p:refman} +\input{refman.tex} +\input{exten.tex} + +\part{The OCaml tools} +\label{p:commands} + +\input{comp.tex} +\input{top.tex} +\input{runtime.tex} +\input{native.tex} +\input{lexyacc.tex} +\input{depend.tex} +\input{browser.tex} +\input{ocamldoc.tex} +\input{debugger.tex} +\input{profil.tex} +\input{ocamlbuild.tex} +% \input emacs.tex +\input{intf-c.tex} +\input{flambda.tex} +\input{spacetime.tex} +\input{afl-fuzz.tex} +\input{plugins} + +\part{The OCaml library} +\label{p:library} +\input{core.tex} +\input{stdlib.tex} +\input{compilerlibs.tex} +\input{libunix.tex} +\input{libnum.tex} +\input{libstr.tex} +\input{libthreads.tex} +\input{libgraph.tex} +\input{libdynlink.tex} +\input{libbigarray.tex} + +\part{Appendix} +\label{p:appendix} + +\ifouthtml +\begin{links} +\item \ahref{libref/index_modules.html}{Index of modules} +\item \ahref{libref/index_module_types.html}{Index of module types} +\item \ahref{libref/index_types.html}{Index of types} +\item \ahref{libref/index_exceptions.html}{Index of exceptions} +\item \ahref{libref/index_values.html}{Index of values} +\end{links} +\else +\printindex{\jobname}{Index to the library} +\fi +\printindex{\jobname.kwd}{Index of keywords} + +\end{document} diff --git a/manual/manual/biblio.etex b/manual/manual/biblio.etex new file mode 100644 index 00000000..dd4c26dd --- /dev/null +++ b/manual/manual/biblio.etex @@ -0,0 +1,240 @@ +\chapter{Further reading} + +For the interested reader, we list below some references to books and +reports related (sometimes loosely) to Caml Light. + +\section{Programming in ML} + +The books below are programming courses taught in ML. Their main goal +is to teach programming, not to describe ML in full details --- though +most contain fairly good introductions to the ML language. Some of +those books use the Standard ML dialect instead of the Caml dialect, +so you will have to keep in mind the differences in syntax and in +semantics. + +\begin{itemize} + +\item Pierre Weis and Xavier Leroy. {\it Le langage Caml.} +InterÉditions, 1993. + +The natural companion to this manual, provided you read French. This +book is a step-by-step introduction to programming in Caml, and +presents many realistic examples of Caml programs. + +\item Guy Cousineau and Michel Mauny. {\it Approche fonctionnelle de +la programmation}. Ediscience, 1995. + +Another Caml programming course written in French, with many original +examples. + +\item Lawrence C.\ Paulson. {\it ML for the working programmer.} +Cambridge University Press, 1991. + +A good introduction to programming in Standard ML. Develops a +theorem prover as a complete example. Contains a presentation of +the module system of Standard ML. + +\item Jeffrey D.\ Ullman. {\it Elements of ML programming.} +Prentice Hall, 1993. + +Another good introduction to programming in Standard ML. No realistic +examples, but a very detailed presentation of the language constructs. + +\item Ryan Stansifer. {\em ML primer.} Prentice-Hall, 1992. + +A short, but nice introduction to programming in Standard ML. + +\item Thérèse Accart Hardin and Véronique Donzeau-Gouge Viguié. {\em +Concepts et outils de la programmation. Du fonctionnel à +l'impératif avec Caml et Ada.} InterÉditions, 1992. + +A first course in programming, that first introduces the main programming +notions in Caml, then shows them underlying Ada. Intended for +beginners; slow-paced for the others. + +\item Rachel Harrison. {\em Abstract Data Types in Standard ML}. +John Wiley \& Sons, 1993. + +A presentation of Standard ML from the standpoint of abstract data +types. Uses intensively the Standard ML module system. + +\item Harold Abelson and Gerald Jay Sussman. +{\em Structure and Interpretation of Computer Programs.} The MIT +press, 1985. (French translation: {\em Structure et interprétation +des programmes informatiques}, InterÉditions, 1989.) + +An outstanding course on programming, taught in Scheme, the modern +dialect of Lisp. Well worth reading, even if you are more interested +in ML than in Lisp. + +\end{itemize} + +\section{Descriptions of ML dialects} + +The books and reports below are descriptions of various programming +languages from the ML family. They assume some familiarity with ML. + +\begin{itemize} + +\item Xavier Leroy and Pierre Weis. {\em Manuel de référence du +langage Caml.} InterÉditions, 1993. + +The French edition of the present reference manual and user's manual. + +\item Robert Harper. {\em Introduction to Standard ML.} Technical +report ECS-LFCS-86-14, University of Edinburgh, 1986. + +An overview of Standard ML, including the module system. Terse, but +still readable. + +\item Robin Milner, Mads Tofte and Robert Harper. {\em The definition +of Standard ML.} The MIT press, 1990. + +A complete formal definition of Standard ML, in the framework of +structured operational semantics. This book is probably the most +mathematically precise definition of a programming language ever +written. It is heavy on formalism and extremely terse, so +even readers who are thoroughly familiar with ML will have +major difficulties with it. + +\item Robin Milner and Mads Tofte. {\em Commentary on Standard ML.} +The MIT Press, 1991. + +A commentary on the book above, that attempts to explain the most +delicate parts and motivate the design choices. Easier to read than the +Definition, but still rather involving. + +\item Guy Cousineau and Gérard Huet. {\em The CAML primer.} Technical +report~122, INRIA, 1990. + +A short description of the original Caml system, from which Caml Light +has evolved. Some familiarity with Lisp is assumed. + +\item Pierre Weis et al. {\em The CAML reference manual, version +2.6.1.} Technical report~121, INRIA, 1990. + +The manual for the original Caml system, from which Caml Light +has evolved. + +\item Michael J.\ Gordon, Arthur J.\ Milner and Christopher P.\ Wadsworth. +{\em Edinburgh LCF.} Lecture Notes in Computer Science +volume~78, Springer-Verlag, 1979. + +This is the first published description of the ML language, at the +time when it was nothing more than the control language for the LCF +system, a theorem prover. This book is now obsolete, since the ML +language has much evolved since then; but it is still of historical +interest. + +\item Paul Hudak, Simon Peyton-Jones and Philip Wadler. {\em +Report on the programming language Haskell, version 1.1.} Technical +report, Yale University, 1991. + +Haskell is a purely functional language with lazy semantics that +shares many important points with ML (full functionality, polymorphic +typing), but has interesting features of its own (dynamic overloading, +also called type classes). + +\end{itemize} + +\section{Implementing functional programming languages} + +The references below are intended for those who are curious to learn +how a language like Caml Light is compiled and implemented. + +\begin{itemize} + +\item Xavier Leroy. {\em The ZINC experiment: an economical +implementation of the ML language.} Technical report~117, INRIA, 1990. +(Available by anonymous FTP on "ftp.inria.fr".) + +A description of the ZINC implementation, the prototype ML +implementation that has evolved into Caml Light. Large parts of this +report still apply to the current Caml Light system, in particular the +description of the execution model and abstract machine. Other parts +are now obsolete. Yet this report still gives a complete overview of the +implementation techniques used in Caml Light. + +\item Simon Peyton-Jones. {\em The implementation of functional +programming languages.} Prentice-Hall, 1987. (French translation: +{\em Mise en \oe uvre des langages fonctionnels de programmation}, +Masson, 1990.) + +An excellent description of the implementation of purely functional +languages with lazy semantics, using the technique known as graph +reduction. The part of the book that deals with the transformation +from ML to enriched lambda-calculus directly applies to Caml Light. +You will find a good description of how pattern-matching is compiled +and how types are inferred. The remainder of the book does not apply +directly to Caml Light, since Caml Light is not purely functional (it +has side-effects), has strict semantics, and does not use graph +reduction at all. + +\item Andrew W.\ Appel. {\em Compiling with continuations.} Cambridge +University Press, 1992. + +A complete description of an optimizing compiler for Standard ML, +based on an intermediate representation called continuation-passing +style. Shows how many advanced program optimizations can be applied to +ML. Not directly relevant to the Caml Light system, since Caml Light +does not use continuation-passing style at all, and makes little +attempts at optimizing programs. + +\end{itemize} + +\section{Applications of ML} + +The following reports show ML at work in various, sometimes +unexpected, areas. + +\begin{itemize} + +\item Emmanuel Chailloux and Guy Cousineau. {\em The MLgraph primer.} +Technical report 92-15, École Normale Supérieure, 1992. (Available by +anonymous FTP on "ftp.ens.fr".) +%, répertoire "biblio", fichier +% "liens-92-15.A4.300dpi.ps.Z".) + +Describes a Caml Light library that produces Postscript pictures +through high-level drawing functions. + +\item Xavier Leroy. {\em Programmation du système Unix en Caml Light.} +Technical report~147, INRIA, 1992. (Available by anonymous FTP on +"ftp.inria.fr".) +%, répertoire "INRIA/publication", fichier "RT-0147.ps.Z".) + +A Unix systems programming course, demonstrating the use of the Caml +Light library that gives access to Unix system calls. + +\item John H.\ Reppy. {\em Concurrent programming with events --- The +concurrent ML manual.} Cornell University, 1990. +(Available by anonymous FTP on "research.att.com".) +%, répertoire "dist/ml", fichier "CML-0.9.8.tar.Z".) + +Concurrent ML extends Standard ML of New Jersey with concurrent +processes that communicate through channels and events. + +\item Jeannette M. Wing, Manuel Faehndrich, J.\ Gregory Morrisett and +Scottt Nettles. {\em Extensions to Standard ML to support +transactions.} Technical report CMU-CS-92-132, Carnegie-Mellon +University, 1992. (Available by anonymous FTP on +"reports.adm.cs.cmu.edu".) +% , répertoire "1992", fichier "CMU-CS-92-132.ps".) + +How to integrate the basic database operations to Standard ML. + +\item Emden R.\ Gansner and John H.\ Reppy. {\em eXene.} Bell Labs, +1991. (Available by anonymous FTP on "research.att.com".) +%, répertoire "dist/ml", fichier "eXene-0.4.tar.Z".) + +An interface between Standard ML of New Jersey and the X Windows +windowing system. + +%% \item Daniel de Rauglaudre. {\em X toolkit in Caml Light.} INRIA, +%% 1992. (Included in the Caml Light distribution.) +%% % Disponible par FTP anonyme sur +%% % "ftp.inria.fr", répertoire "lang/caml-light", fichier "rt5.tar.Z".) +%% +%% An interface between Caml Light and the X Windows windowing system. + +\end{itemize} diff --git a/manual/manual/cmds/.gitignore b/manual/manual/cmds/.gitignore new file mode 100644 index 00000000..0d45900b --- /dev/null +++ b/manual/manual/cmds/.gitignore @@ -0,0 +1,3 @@ +*.tex +*.htex +warnings.etex diff --git a/manual/manual/cmds/Makefile b/manual/manual/cmds/Makefile new file mode 100644 index 00000000..3ff916ed --- /dev/null +++ b/manual/manual/cmds/Makefile @@ -0,0 +1,60 @@ +FILES=comp.tex top.tex runtime.tex native.tex lexyacc.tex intf-c.tex \ + depend.tex profil.tex debugger.tex browser.tex ocamldoc.tex \ + warnings-help.tex ocamlbuild.tex flambda.tex spacetime.tex \ + afl-fuzz.tex plugins.tex unified-options.tex + +TOPDIR=../../.. +include $(TOPDIR)/Makefile.tools + +LD_PATH="$(TOPDIR)/otherlibs/str:$(TOPDIR)/otherlibs/unix" + +TRANSF=$(SET_LD_PATH) $(OCAMLRUN) ../../tools/transf +TEXQUOTE=../../tools/texquote2 +FORMAT=../../tools/format-intf + +CAMLLATEX=$(SET_LD_PATH) $(OCAMLRUN) ../../tools/caml-tex2 \ +-caml "TERM=norepeat $(OCAML)" -n 80 -v false + +WITH_TRANSF= top.tex intf-c.tex flambda.tex spacetime.tex \ + afl-fuzz.tex lexyacc.tex debugger.tex + +WITH_CAMLEXAMPLE = ocamldoc.tex + +etex-files: $(FILES) + +all: $(FILES) + +clean:: + rm -f $(FILES) + rm -f *~ #*# + +.SUFFIXES: +.SUFFIXES: .tex .etex + +.etex.tex: + @$(TEXQUOTE) < $*.etex > $*.texquote_error.tex\ + && mv $*.texquote_error.tex $*.tex\ + || printf "Failure when generating %s\n" $*.tex + +$(WITH_TRANSF):%.tex:%.etex + @$(TRANSF) < $*.etex > $*.transf_error.tex \ + && mv $*.transf_error.tex $*.transf_gen.tex \ + && $(TEXQUOTE) < $*.transf_gen.tex > $*.texquote_error.tex \ + && mv $*.texquote_error.tex $*.tex \ + || printf "Failure when generating %s\n" $*.tex + + +$(WITH_CAMLEXAMPLE):%.tex:%.etex + @$(CAMLLATEX) -o $*.caml_tex_error.tex $*.etex \ + && mv $*.caml_tex_error.tex $*.gen.tex \ + && $(TRANSF) < $*.gen.tex > $*.transf_error.tex \ + && mv $*.transf_error.tex $*.gen.tex\ + && $(TEXQUOTE) < $*.gen.tex > $*.texquote_error.tex\ + && mv $*.texquote_error.tex $*.tex\ + || printf "Failure when generating %s\n" $*.tex + +warnings-help.etex: ../warnings-help.etex + cp ../warnings-help.etex . + +clean:: + rm -f warnings-help.etex diff --git a/manual/manual/cmds/afl-fuzz.etex b/manual/manual/cmds/afl-fuzz.etex new file mode 100644 index 00000000..5e8a4beb --- /dev/null +++ b/manual/manual/cmds/afl-fuzz.etex @@ -0,0 +1,74 @@ +\chapter{Fuzzing with afl-fuzz} +\pdfchapterfold{-9}{Fuzzing with afl-fuzz} +%HEVEA\cutname{afl-fuzz.html} + +\section{Overview} + +American fuzzy lop (``afl-fuzz'') is a {\em fuzzer}, a tool for +testing software by providing randomly-generated inputs, searching for +those inputs which cause the program to crash. + +Unlike most fuzzers, afl-fuzz observes the internal behaviour of the +program being tested, and adjusts the test cases it generates to +trigger unexplored execution paths. As a result, test cases generated +by afl-fuzz cover more of the possible behaviours of the tested +program than other fuzzers. + +This requires that programs to be tested are instrumented to +communicate with afl-fuzz. The native-code compiler ``ocamlopt'' can +generate such instrumentation, allowing afl-fuzz to be used against +programs written in OCaml. + +For more information on afl-fuzz, see the website at +\ifouthtml +\ahref{http://lcamtuf.coredump.cx/afl/}{http://lcamtuf.coredump.cx/afl/}. +\else +{\tt http://lcamtuf.coredump.cx/afl/} +\fi + +\section{Generating instrumentation} + +The instrumentation that afl-fuzz requires is not generated by +default, and must be explicitly enabled, by passing the {\tt + -afl-instrument} option to {\tt ocamlopt}. + +To fuzz a large system without modifying build tools, OCaml's {\tt + configure} script also accepts the {\tt afl-instrument} option. If +OCaml is configured with {\tt afl-instrument}, then all programs +compiled by {\tt ocamlopt} will be instrumented. + +\subsection{Advanced options} + +In rare cases, it is useful to control the amount of instrumentation +generated. By passing the {\tt -afl-inst-ratio N} argument to {\tt + ocamlopt} with {\tt N} less than 100, instrumentation can be +generated for only N\% of branches. (See the afl-fuzz documentation on +the parameter {\tt AFL\_INST\_RATIO} for the precise effect of this). + +\section{Example} + +As an example, we fuzz-test the following program, {\tt readline.ml}: + +\begin{verbatim} +let _ = + let s = read_line () in + match Array.to_list (Array.init (String.length s) (String.get s)) with + ['s'; 'e'; 'c'; 'r'; 'e'; 't'; ' '; 'c'; 'o'; 'd'; 'e'] -> failwith "uh oh" + | _ -> () +\end{verbatim} + +There is a single input (the string ``secret code'') which causes this +program to crash, but finding it by blind random search is infeasible. + +Instead, we compile with afl-fuzz instrumentation enabled: +\begin{verbatim} +ocamlopt -afl-instrument readline.ml -o readline +\end{verbatim} +Next, we run the program under afl-fuzz: +\begin{verbatim} +mkdir input +echo asdf > input/testcase +mkdir output +afl-fuzz -i input -o output ./readline +\end{verbatim} +By inspecting instrumentation output, the fuzzer finds the crashing input quickly. diff --git a/manual/manual/cmds/browser.etex b/manual/manual/cmds/browser.etex new file mode 100644 index 00000000..5eb5a4ff --- /dev/null +++ b/manual/manual/cmds/browser.etex @@ -0,0 +1,7 @@ +\chapter{The browser/editor (ocamlbrowser)} \label{c:browser} +\pdfchapter{The browser/editor (ocamlbrowser)} +%HEVEA\cutname{browser.html} + +Since OCaml version 4.02, the OCamlBrowser tool and the Labltk library +are distributed separately from the OCaml compiler. The project is now +hosted at \url{https://forge.ocamlcore.org/projects/labltk/}. diff --git a/manual/manual/cmds/comp.etex b/manual/manual/cmds/comp.etex new file mode 100644 index 00000000..3e601014 --- /dev/null +++ b/manual/manual/cmds/comp.etex @@ -0,0 +1,512 @@ +\chapter{Batch compilation (ocamlc)} \label{c:camlc} +\pdfchapter{Batch compilation (ocamlc)} +%HEVEA\cutname{comp.html} + +This chapter describes the OCaml batch compiler "ocamlc", +which compiles OCaml source files to bytecode object files and links +these object files to produce standalone bytecode executable files. +These executable files are then run by the bytecode interpreter +"ocamlrun". + +\section{Overview of the compiler} + +The "ocamlc" command has a command-line interface similar to the one of +most C compilers. It accepts several types of arguments and processes them +sequentially, after all options have been processed: + +\begin{itemize} +\item +Arguments ending in ".mli" are taken to be source files for +compilation unit interfaces. Interfaces specify the names exported by +compilation units: they declare value names with their types, define +public data types, declare abstract data types, and so on. From the +file \var{x}".mli", the "ocamlc" compiler produces a compiled interface +in the file \var{x}".cmi". + +\item +Arguments ending in ".ml" are taken to be source files for compilation +unit implementations. Implementations provide definitions for the +names exported by the unit, and also contain expressions to be +evaluated for their side-effects. From the file \var{x}".ml", the "ocamlc" +compiler produces compiled object bytecode in the file \var{x}".cmo". + +If the interface file \var{x}".mli" exists, the implementation +\var{x}".ml" is checked against the corresponding compiled interface +\var{x}".cmi", which is assumed to exist. If no interface +\var{x}".mli" is provided, the compilation of \var{x}".ml" produces a +compiled interface file \var{x}".cmi" in addition to the compiled +object code file \var{x}".cmo". The file \var{x}".cmi" produced +corresponds to an interface that exports everything that is defined in +the implementation \var{x}".ml". + +\item +Arguments ending in ".cmo" are taken to be compiled object bytecode. These +files are linked together, along with the object files obtained +by compiling ".ml" arguments (if any), and the OCaml standard +library, to produce a standalone executable program. The order in +which ".cmo" and ".ml" arguments are presented on the command line is +relevant: compilation units are initialized in that order at +run-time, and it is a link-time error to use a component of a unit +before having initialized it. Hence, a given \var{x}".cmo" file must come +before all ".cmo" files that refer to the unit \var{x}. + +\item +Arguments ending in ".cma" are taken to be libraries of object bytecode. +A library of object bytecode packs in a single file a set of object +bytecode files (".cmo" files). Libraries are built with "ocamlc -a" +(see the description of the "-a" option below). The object files +contained in the library are linked as regular ".cmo" files (see +above), in the order specified when the ".cma" file was built. The +only difference is that if an object file contained in a library is +not referenced anywhere in the program, then it is not linked in. + +\item +Arguments ending in ".c" are passed to the C compiler, which generates +a ".o" object file (".obj" under Windows). This object file is linked +with the program if the "-custom" flag is set (see the description of +"-custom" below). + +\item +Arguments ending in ".o" or ".a" (".obj" or ".lib" under Windows) +are assumed to be C object files and libraries. They are passed to the +C linker when linking in "-custom" mode (see the description of +"-custom" below). + +\item +Arguments ending in ".so" (".dll" under Windows) +are assumed to be C shared libraries (DLLs). During linking, they are +searched for external C functions referenced from the OCaml code, +and their names are written in the generated bytecode executable. +The run-time system "ocamlrun" then loads them dynamically at program +start-up time. + +\end{itemize} + +The output of the linking phase is a file containing compiled bytecode +that can be executed by the OCaml bytecode interpreter: +the command named "ocamlrun". If "a.out" is the name of the file +produced by the linking phase, the command +\begin{alltt} + ocamlrun a.out \nth{arg}{1} \nth{arg}{2} \ldots \nth{arg}{n} +\end{alltt} +executes the compiled code contained in "a.out", passing it as +arguments the character strings \nth{arg}{1} to \nth{arg}{n}. +(See chapter~\ref{c:runtime} for more details.) + +On most systems, the file produced by the linking +phase can be run directly, as in: +\begin{alltt} + ./a.out \nth{arg}{1} \nth{arg}{2} \ldots \nth{arg}{n} +\end{alltt} +The produced file has the executable bit set, and it manages to launch +the bytecode interpreter by itself. + +\section{Options}\label{s:comp-options} + +The following command-line options are recognized by "ocamlc". +The options "-pack", "-a", "-c" and "-output-obj" are mutually exclusive. +% Define boolean variables used by the macros in unified-options.etex +\newif\ifcomp \comptrue +\newif\ifnat \natfalse +\newif\iftop \topfalse +% unified-options gathers all options across the native/bytecode +% compilers and toplevel +\input{unified-options.tex} + +\paragraph{Contextual control of command-line options} + +The compiler command line can be modified ``from the outside'' +with the following mechanisms. These are experimental +and subject to change. They should be used only for experimental and +development work, not in released packages. + +\begin{options} +\item["OCAMLPARAM" \rm(environment variable)] +A set of arguments that will be inserted before or after the arguments from +the command line. Arguments are specified in a comma-separated list +of "name=value" pairs. A "_" is used to specify the position of +the command line arguments, i.e. "a=x,_,b=y" means that "a=x" should be +executed before parsing the arguments, and "b=y" after. Finally, +an alternative separator can be specified as the +first character of the string, within the set ":|; ,". +\item["ocaml_compiler_internal_params" \rm(file in the stdlib directory)] +A mapping of file names to lists of arguments that +will be added to the command line (and "OCAMLPARAM") arguments. +\item["OCAML_FLEXLINK" \rm(environment variable)] +Alternative executable to use on native +Windows for "flexlink" instead of the +configured value. Primarily used for bootstrapping. +\end{options} + +\section{Modules and the file system} + +This short section is intended to clarify the relationship between the +names of the modules corresponding to compilation units and the names +of the files that contain their compiled interface and compiled +implementation. + +The compiler always derives the module name by taking the capitalized +base name of the source file (".ml" or ".mli" file). That is, it +strips the leading directory name, if any, as well as the ".ml" or +".mli" suffix; then, it set the first letter to uppercase, in order to +comply with the requirement that module names must be capitalized. +For instance, compiling the file "mylib/misc.ml" provides an +implementation for the module named "Misc". Other compilation units +may refer to components defined in "mylib/misc.ml" under the names +"Misc."\var{name}; they can also do "open Misc", then use unqualified +names \var{name}. + +The ".cmi" and ".cmo" files produced by the compiler have the same +base name as the source file. Hence, the compiled files always have +their base name equal (modulo capitalization of the first letter) to +the name of the module they describe (for ".cmi" files) or implement +(for ".cmo" files). + +When the compiler encounters a reference to a free module identifier +"Mod", it looks in the search path for a file named "Mod.cmi" or "mod.cmi" +and loads the compiled interface +contained in that file. As a consequence, renaming ".cmi" files is not +advised: the name of a ".cmi" file must always correspond to the name +of the compilation unit it implements. It is admissible to move them +to another directory, if their base name is preserved, and the correct +"-I" options are given to the compiler. The compiler will flag an +error if it loads a ".cmi" file that has been renamed. + +Compiled bytecode files (".cmo" files), on the other hand, can be +freely renamed once created. That's because the linker never attempts +to find by itself the ".cmo" file that implements a module with a +given name: it relies instead on the user providing the list of ".cmo" +files by hand. + +\section{Common errors} \label{s:comp-errors} + +This section describes and explains the most frequently encountered +error messages. + +\begin{options} + +\item[Cannot find file \var{filename}] +The named file could not be found in the current directory, nor in the +directories of the search path. The \var{filename} is either a +compiled interface file (".cmi" file), or a compiled bytecode file +(".cmo" file). If \var{filename} has the format \var{mod}".cmi", this +means you are trying to compile a file that references identifiers +from module \var{mod}, but you have not yet compiled an interface for +module \var{mod}. Fix: compile \var{mod}".mli" or \var{mod}".ml" +first, to create the compiled interface \var{mod}".cmi". + +If \var{filename} has the format \var{mod}".cmo", this +means you are trying to link a bytecode object file that does not +exist yet. Fix: compile \var{mod}".ml" first. + +If your program spans several directories, this error can also appear +because you haven't specified the directories to look into. Fix: add +the correct "-I" options to the command line. + +\item[Corrupted compiled interface \var{filename}] +The compiler produces this error when it tries to read a compiled +interface file (".cmi" file) that has the wrong structure. This means +something went wrong when this ".cmi" file was written: the disk was +full, the compiler was interrupted in the middle of the file creation, +and so on. This error can also appear if a ".cmi" file is modified after +its creation by the compiler. Fix: remove the corrupted ".cmi" file, +and rebuild it. + +\item[This expression has type \nth{t}{1}, but is used with type \nth{t}{2}] +This is by far the most common type error in programs. Type \nth{t}{1} is +the type inferred for the expression (the part of the program that is +displayed in the error message), by looking at the expression itself. +Type \nth{t}{2} is the type expected by the context of the expression; it +is deduced by looking at how the value of this expression is used in +the rest of the program. If the two types \nth{t}{1} and \nth{t}{2} are not +compatible, then the error above is produced. + +In some cases, it is hard to understand why the two types \nth{t}{1} and +\nth{t}{2} are incompatible. For instance, the compiler can report that +``expression of type "foo" cannot be used with type "foo"'', and it +really seems that the two types "foo" are compatible. This is not +always true. Two type constructors can have the same name, but +actually represent different types. This can happen if a type +constructor is redefined. Example: +\begin{verbatim} + type foo = A | B + let f = function A -> 0 | B -> 1 + type foo = C | D + f C +\end{verbatim} +This result in the error message ``expression "C" of type "foo" cannot +be used with type "foo"''. + +\item[The type of this expression, \var{t}, contains type variables + that cannot be generalized] +Type variables ("'a", "'b", \ldots) in a type \var{t} can be in either +of two states: generalized (which means that the type \var{t} is valid +for all possible instantiations of the variables) and not generalized +(which means that the type \var{t} is valid only for one instantiation +of the variables). In a "let" binding "let "\var{name}" = "\var{expr}, +the type-checker normally generalizes as many type variables as +possible in the type of \var{expr}. However, this leads to unsoundness +(a well-typed program can crash) in conjunction with polymorphic +mutable data structures. To avoid this, generalization is performed at +"let" bindings only if the bound expression \var{expr} belongs to the +class of ``syntactic values'', which includes constants, identifiers, +functions, tuples of syntactic values, etc. In all other cases (for +instance, \var{expr} is a function application), a polymorphic mutable +could have been created and generalization is therefore turned off for +all variables occurring in contravariant or non-variant branches of the +type. For instance, if the type of a non-value is "'a list" the +variable is generalizable ("list" is a covariant type constructor), +but not in "'a list -> 'a list" (the left branch of "->" is +contravariant) or "'a ref" ("ref" is non-variant). + +Non-generalized type variables in a type cause no difficulties inside +a given structure or compilation unit (the contents of a ".ml" file, +or an interactive session), but they cannot be allowed inside +signatures nor in compiled interfaces (".cmi" file), because they +could be used inconsistently later. Therefore, the compiler +flags an error when a structure or compilation unit defines a value +\var{name} whose type contains non-generalized type variables. There +are two ways to fix this error: +\begin{itemize} +\item Add a type constraint or a ".mli" file to give a monomorphic +type (without type variables) to \var{name}. For instance, instead of +writing +\begin{verbatim} + let sort_int_list = Sort.list (<) + (* inferred type 'a list -> 'a list, with 'a not generalized *) +\end{verbatim} +write +\begin{verbatim} + let sort_int_list = (Sort.list (<) : int list -> int list);; +\end{verbatim} +\item If you really need \var{name} to have a polymorphic type, turn +its defining expression into a function by adding an extra parameter. +For instance, instead of writing +\begin{verbatim} + let map_length = List.map Array.length + (* inferred type 'a array list -> int list, with 'a not generalized *) +\end{verbatim} +write +\begin{verbatim} + let map_length lv = List.map Array.length lv +\end{verbatim} +\end{itemize} + +\item[Reference to undefined global \var{mod}] +This error appears when trying to link an incomplete or incorrectly +ordered set of files. Either you have forgotten to provide an +implementation for the compilation unit named \var{mod} on the command line +(typically, the file named \var{mod}".cmo", or a library containing +that file). Fix: add the missing ".ml" or ".cmo" file to the command +line. Or, you have provided an implementation for the module named +\var{mod}, but it comes too late on the command line: the +implementation of \var{mod} must come before all bytecode object files +that reference \var{mod}. Fix: change the order of ".ml" and ".cmo" +files on the command line. + +Of course, you will always encounter this error if you have mutually +recursive functions across modules. That is, function "Mod1.f" calls +function "Mod2.g", and function "Mod2.g" calls function "Mod1.f". +In this case, no matter what permutations you perform on the command +line, the program will be rejected at link-time. Fixes: +\begin{itemize} +\item Put "f" and "g" in the same module. +\item Parameterize one function by the other. +That is, instead of having +\begin{verbatim} +mod1.ml: let f x = ... Mod2.g ... +mod2.ml: let g y = ... Mod1.f ... +\end{verbatim} +define +\begin{verbatim} +mod1.ml: let f g x = ... g ... +mod2.ml: let rec g y = ... Mod1.f g ... +\end{verbatim} +and link "mod1.cmo" before "mod2.cmo". +\item Use a reference to hold one of the two functions, as in : +\begin{verbatim} +mod1.ml: let forward_g = + ref((fun x -> failwith "forward_g") : ) + let f x = ... !forward_g ... +mod2.ml: let g y = ... Mod1.f ... + let _ = Mod1.forward_g := g +\end{verbatim} +\end{itemize} + +\item[The external function \var{f} is not available] +This error appears when trying to link code that calls external +functions written in C. As explained in +chapter~\ref{c:intf-c}, such code must be linked with C libraries that +implement the required \var{f} C function. If the C libraries in +question are not shared libraries (DLLs), the code must be linked in +``custom runtime'' mode. Fix: add the required C libraries to the +command line, and possibly the "-custom" option. + +\end{options} + +\section{Warning reference} \label{s:comp-warnings} + +This section describes and explains in detail some warnings: + +\subsection{Warning 9: missing fields in a record pattern} + + When pattern matching on records, it can be useful to match only few + fields of a record. Eliding fields can be done either implicitly + or explicitly by ending the record pattern with "; _". + However, implicit field elision is at odd with pattern matching + exhaustiveness checks. + Enabling warning 9 prioritizes exhaustiveness checks over the + convenience of implicit field elision and will warn on implicit + field elision in record patterns. In particular, this warning can + help to spot exhaustive record pattern that may need to be updated + after the addition of new fields to a record type. + +\begin{verbatim} +type 'a point = {x='a ;y='a} +let dx { x } = x (* implicit field elision: trigger warning 9 *) +let dy { y; _ } = y (* explicit field elision: do not trigger warning 9 *) +\end{verbatim} + +\subsection{Warning 52: fragile constant pattern} +\label{ss:warn52} + + Some constructors, such as the exception constructors "Failure" and + "Invalid_argument", take as parameter a "string" value holding + a text message intended for the user. + + These text messages are usually not stable over time: call sites + building these constructors may refine the message in a future + version to make it more explicit, etc. Therefore, it is dangerous to + match over the precise value of the message. For example, until + OCaml 4.02, "Array.iter2" would raise the exception +\begin{verbatim} + Invalid_argument "arrays must have the same length" +\end{verbatim} + Since 4.03 it raises the more helpful message +\begin{verbatim} + Invalid_argument "Array.iter2: arrays must have the same length" +\end{verbatim} + but this means that any code of the form +\begin{verbatim} + try ... + with Invalid_argument "arrays must have the same length" -> ... +\end{verbatim} + is now broken and may suffer from uncaught exceptions. + + Warning 52 is there to prevent users from writing such fragile code + in the first place. It does not occur on every matching on a literal + string, but only in the case in which library authors expressed + their intent to possibly change the constructor parameter value in + the future, by using the attribute "ocaml.warn_on_literal_pattern" + (see the manual section on builtin attributes in + \ref{ss:builtin-attributes}): +\begin{verbatim} + type t = + | Foo of string [@ocaml.warn_on_literal_pattern] + | Bar of string + + let no_warning = function + | Bar "specific value" -> 0 + | _ -> 1 + + let warning = function + | Foo "specific value" -> 0 + | _ -> 1 + +> | Foo "specific value" -> 0 +> ^^^^^^^^^^^^^^^^ +> Warning 52: Code should not depend on the actual values of +> this constructor's arguments. They are only for information +> and may change in future versions. (See manual section 8.5) +\end{verbatim} + + In particular, all built-in exceptions with a string argument have + this attribute set: "Invalid_argument", "Failure", "Sys_error" will + all raise this warning if you match for a specific string argument. + + If your code raises this warning, you should {\em not} change the + way you test for the specific string to avoid the warning (for + example using a string equality inside the right-hand-side instead + of a literal pattern), as your code would remain fragile. You should + instead enlarge the scope of the pattern by matching on all possible + values. + +\begin{verbatim} + +let warning = function + | Foo _ -> 0 + | _ -> 1 +\end{verbatim} + + This may require some care: if the scrutinee may return several + different cases of the same pattern, or raise distinct instances of + the same exception, you may need to modify your code to separate + those several cases. + + For example, +\begin{verbatim} +try (int_of_string count_str, bool_of_string choice_str) with + | Failure "int_of_string" -> (0, true) + | Failure "bool_of_string" -> (-1, false) +\end{verbatim} + should be rewritten into more atomic tests. For example, + using the "exception" patterns documented in Section~\ref{s:exception-match}, + one can write: +\begin{verbatim} +match int_of_string count_str with + | exception (Failure _) -> (0, true) + | count -> + begin match bool_of_string choice_str with + | exception (Failure _) -> (-1, false) + | choice -> (count, choice) + end +\end{verbatim} + +The only case where that transformation is not possible is if a given +function call may raise distinct exceptions with the same constructor +but different string values. In this case, you will have to check for +specific string values. This is dangerous API design and it should be +discouraged: it's better to define more precise exception constructors +than store useful information in strings. + +\subsection{Warning 57: Ambiguous or-pattern variables under guard} +\label{ss:warn57} + + The semantics of or-patterns in OCaml is specified with + a left-to-right bias: a value \var{v} matches the pattern \var{p} "|" \var{q} + if it matches \var{p} or \var{q}, but if it matches both, + the environment captured by the match is the environment captured by + \var{p}, never the one captured by \var{q}. + + While this property is generally intuitive, there is at least one specific + case where a different semantics might be expected. + Consider a pattern followed by a when-guard: + "|"~\var{p}~"when"~\var{g}~"->"~\var{e}, for example: +\begin{verbatim} + | ((Const x, _) | (_, Const x)) when is_neutral x -> branch +\end{verbatim} + The semantics is clear: + match the scrutinee against the pattern, if it matches, test the guard, + and if the guard passes, take the branch. + In particular, consider the input "(Const"~\var{a}", Const"~\var{b}")", where + \var{a} fails the test "is_neutral"~\var{a}, while \var{b} passes the test + "is_neutral"~\var{b}. With the left-to-right semantics, the clause above is + {\em not} taken by its input: matching "(Const"~\var{a}", Const"~\var{b}")" + against the or-pattern succeeds in the left branch, it returns the + environment \var{x}~"->"~\var{a}, and then the guard + "is_neutral"~\var{a} is tested and fails, the branch is not taken. + + However, another semantics may be considered more natural here: + any pair that has one side passing the test will take the branch. With this + semantics the previous code fragment would be equivalent to +\begin{verbatim} + | (Const x, _) when is_neutral x -> branch + | (_, Const x) when is_neutral x -> branch +\end{verbatim} + This is {\em not} the semantics adopted by OCaml. + + Warning 57 is dedicated to these confusing cases where the + specified left-to-right semantics is not equivalent to a non-deterministic + semantics (any branch can be taken) relatively to a specific guard. + More precisely, it warns when guard uses ``ambiguous'' variables, that are bound + to different parts of the scrutinees by different sides of a or-pattern. diff --git a/manual/manual/cmds/debugger.etex b/manual/manual/cmds/debugger.etex new file mode 100644 index 00000000..31a2ad65 --- /dev/null +++ b/manual/manual/cmds/debugger.etex @@ -0,0 +1,674 @@ +\chapter{The debugger (ocamldebug)} \label{c:debugger} +\pdfchapter{The debugger (ocamldebug)} +%HEVEA\cutname{debugger.html} + +This chapter describes the OCaml source-level replay debugger +"ocamldebug". + +\begin{unix} The debugger is available on Unix systems that provide +BSD sockets. +\end{unix} + +\begin{windows} The debugger is available under the Cygwin port of +OCaml, but not under the native Win32 ports. +\end{windows} + +\section{Compiling for debugging} + +Before the debugger can be used, the program must be compiled and +linked with the "-g" option: all ".cmo" and ".cma" files that are part +of the program should have been created with "ocamlc -g", and they +must be linked together with "ocamlc -g". + +Compiling with "-g" entails no penalty on the running time of +programs: object files and bytecode executable files are bigger and +take longer to produce, but the executable files run at +exactly the same speed as if they had been compiled without "-g". + +\section{Invocation} + +\subsection{Starting the debugger} + +The OCaml debugger is invoked by running the program +"ocamldebug" with the name of the bytecode executable file as first +argument: +\begin{alltt} + ocamldebug \optvar{options} \var{program} \optvar{arguments} +\end{alltt} +The arguments following \var{program} are optional, and are passed as +command-line arguments to the program being debugged. (See also the +"set arguments" command.) + +The following command-line options are recognized: +\begin{options} +\item["-c " \var{count}] +Set the maximum number of simultaneously live checkpoints to \var{count}. + +\item["-cd " \var{dir}] +Run the debugger program from the working directory \var{dir}, +instead of the current directory. (See also the "cd" command.) + +\item["-emacs"] +Tell the debugger it is executed under Emacs. (See +section~\ref{s:inf-debugger} for information on how to run the +debugger under Emacs.) + +\item["-I "\var{directory}] +Add \var{directory} to the list of directories searched for source +files and compiled files. (See also the "directory" command.) + +\item["-s "\var{socket}] +Use \var{socket} for communicating with the debugged program. See the +description of the command "set socket" (section~\ref{s:communication}) +for the format of \var{socket}. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\subsection{Initialization file} + +On start-up, the debugger will read commands from an initialization +file before giving control to the user. The default file is +".ocamldebug" in the current directory if it exists, otherwise +".ocamldebug" in the user's home directory. + +\subsection{Exiting the debugger} + +The command "quit" exits the debugger. You can also exit the debugger +by typing an end-of-file character (usually "ctrl-D"). + +Typing an interrupt character (usually "ctrl-C") will not exit the +debugger, but will terminate the action of any debugger command that is in +progress and return to the debugger command level. + +\section{Commands} \label{s:debugger-commands} + +A debugger command is a single line of input. It starts with a command +name, which is followed by arguments depending on this name. Examples: +\begin{verbatim} + run + goto 1000 + set arguments arg1 arg2 +\end{verbatim} + +A command name can be truncated as long as there is no ambiguity. For +instance, "go 1000" is understood as "goto 1000", since there are no +other commands whose name starts with "go". For the most frequently +used commands, ambiguous abbreviations are allowed. For instance, "r" +stands for "run" even though there are others commands starting with +"r". You can test the validity of an abbreviation using the "help" command. + +If the previous command has been successful, a blank line (typing just +"RET") will repeat it. + +\subsection{Getting help} + +The OCaml debugger has a simple on-line help system, which gives +a brief description of each command and variable. + +\begin{options} +\item["help"] +Print the list of commands. + +\item["help "\var{command}] +Give help about the command \var{command}. + +\item["help set "\var{variable}, "help show "\var{variable}] +Give help about the variable \var{variable}. The list of all debugger +variables can be obtained with "help set". + +\item["help info "\var{topic}] +Give help about \var{topic}. Use "help info" to get a list of known topics. +\end{options} + +\subsection{Accessing the debugger state} + +\begin{options} +\item["set "\var{variable} \var{value}] +Set the debugger variable \var{variable} to the value \var{value}. + +\item["show "\var{variable}] +Print the value of the debugger variable \var{variable}. + +\item["info "\var{subject}] +Give information about the given subject. +For instance, "info breakpoints" will print the list of all breakpoints. +\end{options} + +\section{Executing a program} + +\subsection{Events} + +Events are ``interesting'' locations in the source code, corresponding +to the beginning or end of evaluation of ``interesting'' +sub-expressions. Events are the unit of single-stepping (stepping goes +to the next or previous event encountered in the program execution). +Also, breakpoints can only be set at events. Thus, events play the +role of line numbers in debuggers for conventional languages. + +During program execution, a counter is incremented at each event +encountered. The value of this counter is referred as the {\em current +time}. Thanks to reverse execution, it is possible to jump back and +forth to any time of the execution. + +Here is where the debugger events (written \event) are located in +the source code: +\begin{itemize} +\item Following a function application: +\begin{alltt} +(f arg)\event +\end{alltt} +\item On entrance to a function: +\begin{alltt} +fun x y z -> \event ... +\end{alltt} +\item On each case of a pattern-matching definition (function, +"match"\ldots"with" construct, "try"\ldots"with" construct): +\begin{alltt} +function pat1 -> \event expr1 + | ... + | patN -> \event exprN +\end{alltt} +\item Between subexpressions of a sequence: +\begin{alltt} +expr1; \event expr2; \event ...; \event exprN +\end{alltt} +\item In the two branches of a conditional expression: +\begin{alltt} +if cond then \event expr1 else \event expr2 +\end{alltt} +\item At the beginning of each iteration of a loop: +\begin{alltt} +while cond do \event body done +for i = a to b do \event body done +\end{alltt} +\end{itemize} +Exceptions: A function application followed by a function return is replaced +by the compiler by a jump (tail-call optimization). In this case, no +event is put after the function application. +% Also, no event is put after a function application when the function +% is external (written in C). + +\subsection{Starting the debugged program} + +The debugger starts executing the debugged program only when needed. +This allows setting breakpoints or assigning debugger variables before +execution starts. There are several ways to start execution: +\begin{options} +\item["run"] Run the program until a breakpoint is hit, or the program +terminates. +\item["goto 0"] Load the program and stop on the first event. +\item["goto "\var{time}] Load the program and execute it until the +given time. Useful when you already know approximately at what time +the problem appears. Also useful to set breakpoints on function values +that have not been computed at time 0 (see section~\ref{s:breakpoints}). +\end{options} + +The execution of a program is affected by certain information it +receives when the debugger starts it, such as the command-line +arguments to the program and its working directory. The debugger +provides commands to specify this information ("set arguments" and "cd"). +These commands must be used before program execution starts. If you try +to change the arguments or the working directory after starting your +program, the debugger will kill the program (after asking for confirmation). + +\subsection{Running the program} + +The following commands execute the program forward or backward, +starting at the current time. The execution will stop either when +specified by the command or when a breakpoint is encountered. + +\begin{options} +\item["run"] Execute the program forward from current time. Stops at +next breakpoint or when the program terminates. +\item["reverse"] Execute the program backward from current time. +Mostly useful to go to the last breakpoint encountered before the +current time. +\item["step "\optvar{count}] Run the program and stop at the next +event. With an argument, do it \var{count} times. If \var{count} is 0, +run until the program terminates or a breakpoint is hit. +\item["backstep "\optvar{count}] Run the program backward and stop at +the previous event. With an argument, do it \var{count} times. +\item["next "\optvar{count}] Run the program and stop at the next +event, skipping over function calls. With an argument, do it +\var{count} times. +\item["previous "\optvar{count}] Run the program backward and stop at +the previous event, skipping over function calls. With an argument, do +it \var{count} times. +\item["finish"] Run the program until the current function returns. +\item["start"] Run the program backward and stop at the first event +before the current function invocation. +\end{options} + +\subsection{Time travel} + +You can jump directly to a given time, without stopping on +breakpoints, using the "goto" command. + +As you move through the program, the debugger maintains an history of +the successive times you stop at. The "last" command can be used to +revisit these times: each "last" command moves one step back through +the history. That is useful mainly to undo commands such as "step" +and "next". + +\begin{options} +\item["goto "\var{time}] +Jump to the given time. +\item["last "\optvar{count}] +Go back to the latest time recorded in the execution history. With an +argument, do it \var{count} times. +\item["set history "\var{size}] +Set the size of the execution history. +\end{options} + +\subsection{Killing the program} + +\begin{options} +\item["kill"] Kill the program being executed. This command is mainly +useful if you wish to recompile the program without leaving the debugger. +\end{options} + +\section{Breakpoints} \label{s:breakpoints} + +A breakpoint causes the program to stop whenever a certain point in +the program is reached. It can be set in several ways using the +"break" command. Breakpoints are assigned numbers when set, for +further reference. The most comfortable way to set breakpoints is +through the Emacs interface (see section~\ref{s:inf-debugger}). + +\begin{options} +\item["break"] +Set a breakpoint at the current position in the program execution. The +current position must be on an event (i.e., neither at the beginning, +nor at the end of the program). + +\item["break "\var{function}] +Set a breakpoint at the beginning of \var{function}. This works only +when the functional value of the identifier \var{function} has been +computed and assigned to the identifier. Hence this command cannot be +used at the very beginning of the program execution, when all +identifiers are still undefined; use "goto" \var{time} to advance +execution until the functional value is available. + +\item["break \@" \optvar{module} \var{line}] +Set a breakpoint in module \var{module} (or in the current module if +\var{module} is not given), at the first event of line \var{line}. + +\item["break \@" \optvar{module} \var{line} \var{column}] +Set a breakpoint in module \var{module} (or in the current module if +\var{module} is not given), at the event closest to line \var{line}, +column \var{column}. + +\item["break \@" \optvar{module} "#" \var{character}] +Set a breakpoint in module \var{module} at the event closest to +character number \var{character}. + +\item["break "\var{address}] +Set a breakpoint at the code address \var{address}. + +\item["delete "\optvar{breakpoint-numbers}] +Delete the specified breakpoints. Without argument, all breakpoints +are deleted (after asking for confirmation). + +\item["info breakpoints"] Print the list of all breakpoints. +\end{options} + +\section{The call stack} + +Each time the program performs a function application, it saves the +location of the application (the return address) in a block of data +called a stack frame. The frame also contains the local variables of +the caller function. All the frames are allocated in a region of +memory called the call stack. The command "backtrace" (or "bt") +displays parts of the call stack. + +At any time, one of the stack frames is ``selected'' by the debugger; several +debugger commands refer implicitly to the selected frame. In particular, +whenever you ask the debugger for the value of a local variable, the +value is found in the selected frame. The commands "frame", "up" and "down" +select whichever frame you are interested in. + +When the program stops, the debugger automatically selects the +currently executing frame and describes it briefly as the "frame" +command does. + +\begin{options} +\item["frame"] +Describe the currently selected stack frame. + +\item["frame" \var{frame-number}] +Select a stack frame by number and describe it. The frame currently +executing when the program stopped has number 0; its caller has number +1; and so on up the call stack. + +\item["backtrace "\optvar{count}, "bt "\optvar{count}] +Print the call stack. This is useful to see which sequence of function +calls led to the currently executing frame. With a positive argument, +print only the innermost \var{count} frames. +With a negative argument, print only the outermost -\var{count} frames. + +\item["up" \optvar{count}] +Select and display the stack frame just ``above'' the selected frame, +that is, the frame that called the selected frame. An argument says how +many frames to go up. + +\item["down "\optvar{count}] +Select and display the stack frame just ``below'' the selected frame, +that is, the frame that was called by the selected frame. An argument +says how many frames to go down. +\end{options} + +\section{Examining variable values} + +The debugger can print the current value of simple expressions. The +expressions can involve program variables: all the identifiers that +are in scope at the selected program point can be accessed. + +Expressions that can be printed are a subset of OCaml +expressions, as described by the following grammar: +\begin{syntax} +simple-expr: + lowercase-ident + | { capitalized-ident '.' } lowercase-ident + | '*' + | '$' integer + | simple-expr '.' lowercase-ident + | simple-expr '.(' integer ')' + | simple-expr '.[' integer ']' + | '!' simple-expr + | '(' simple-expr ')' +\end{syntax} +The first two cases refer to a value identifier, either unqualified or +qualified by the path to the structure that define it. +"*" refers to the result just computed (typically, the value of a +function application), and is valid only if the selected event is an +``after'' event (typically, a function application). +@'$' integer@ refer to a previously printed value. The remaining four +forms select part of an expression: respectively, a record field, an +array element, a string element, and the current contents of a +reference. + +\begin{options} +\item["print "\var{variables}] +Print the values of the given variables. "print" can be abbreviated as +"p". +\item["display "\var{variables}] +Same as "print", but limit the depth of printing to 1. Useful to +browse large data structures without printing them in full. +"display" can be abbreviated as "d". +\end{options} + +When printing a complex expression, a name of the form "$"\var{integer} +is automatically assigned to its value. Such names are also assigned +to parts of the value that cannot be printed because the maximal +printing depth is exceeded. Named values can be printed later on +with the commands "p $"\var{integer} or "d $"\var{integer}. +Named values are valid only as long as the program is stopped. They +are forgotten as soon as the program resumes execution. + +\begin{options} +\item["set print_depth" \var{d}] +Limit the printing of values to a maximal depth of \var{d}. +\item["set print_length" \var{l}] +Limit the printing of values to at most \var{l} nodes printed. +\end{options} + +\section{Controlling the debugger} + +\subsection{Setting the program name and arguments} + +\begin{options} +\item["set program" \var{file}] +Set the program name to \var{file}. +\item["set arguments" \var{arguments}] +Give \var{arguments} as command-line arguments for the program. +\end{options} + +A shell is used to pass the arguments to the debugged program. You can +therefore use wildcards, shell variables, and file redirections inside +the arguments. To debug programs that read from standard input, it is +recommended to redirect their input from a file (using +"set arguments < input-file"), otherwise input to the program and +input to the debugger are not properly separated, and inputs are not +properly replayed when running the program backwards. + +\subsection{How programs are loaded} + +The "loadingmode" variable controls how the program is executed. + +\begin{options} +\item["set loadingmode direct"] +The program is run directly by the debugger. This is the default mode. +\item["set loadingmode runtime"] +The debugger execute the OCaml runtime "ocamlrun" on the program. +Rarely useful; moreover it prevents the debugging of programs compiled +in ``custom runtime'' mode. +\item["set loadingmode manual"] +The user starts manually the program, when asked by the debugger. +Allows remote debugging (see section~\ref{s:communication}). +\end{options} + +\subsection{Search path for files} + +The debugger searches for source files and compiled interface files in +a list of directories, the search path. The search path initially +contains the current directory "." and the standard library directory. +The "directory" command adds directories to the path. + +Whenever the search path is modified, the debugger will clear any +information it may have cached about the files. + +\begin{options} +\item["directory" \var{directorynames}] +Add the given directories to the search path. These directories are +added at the front, and will therefore be searched first. + +\item["directory" \var{directorynames} "for" \var{modulename}] +Same as "directory" \var{directorynames}, but the given directories will be +searched only when looking for the source file of a module that has +been packed into \var{modulename}. + +\item["directory"] +Reset the search path. This requires confirmation. +\end{options} + +\subsection{Working directory} + +Each time a program is started in the debugger, it inherits its working +directory from the current working directory of the debugger. This +working directory is initially whatever it inherited from its parent +process (typically the shell), but you can specify a new working +directory in the debugger with the "cd" command or the "-cd" +command-line option. + +\begin{options} +\item["cd" \var{directory}] +Set the working directory for "ocamldebug" to \var{directory}. + +\item["pwd"] +Print the working directory for "ocamldebug". +\end{options} + +\subsection{Turning reverse execution on and off} + +In some cases, you may want to turn reverse execution off. This speeds +up the program execution, and is also sometimes useful for interactive +programs. + +Normally, the debugger takes checkpoints of the program state from +time to time. That is, it makes a copy of the current state of the +program (using the Unix system call "fork"). If the variable +\var{checkpoints} is set to "off", the debugger will not take any +checkpoints. + +\begin{options} +\item["set checkpoints" \var{on/off}] +Select whether the debugger makes checkpoints or not. +\end{options} + +\subsection{Communication between the debugger and the program} +\label{s:communication} + +The debugger communicate with the program being debugged through a +Unix socket. You may need to change the socket name, for example if +you need to run the debugger on a machine and your program on another. + +\begin{options} +\item["set socket" \var{socket}] +Use \var{socket} for communication with the program. \var{socket} can be +either a file name, or an Internet port specification +\var{host}:\var{port}, where \var{host} is a host name or an Internet +address in dot notation, and \var{port} is a port number on the host. +\end{options} + +On the debugged program side, the socket name is passed through the +"CAML_DEBUG_SOCKET" environment variable. + +\subsection{Fine-tuning the debugger} \label{s:fine-tuning} + +Several variables enables to fine-tune the debugger. Reasonable +defaults are provided, and you should normally not have to change them. + +\begin{options} +\item["set processcount" \var{count}] +Set the maximum number of checkpoints to \var{count}. More checkpoints +facilitate going far back in time, but use more memory and create more +Unix processes. +\end{options} + +As checkpointing is quite expensive, it must not be done too often. On +the other hand, backward execution is faster when checkpoints are +taken more often. In particular, backward single-stepping is more +responsive when many checkpoints have been taken just before the +current time. To fine-tune the checkpointing strategy, the debugger +does not take checkpoints at the same frequency for long displacements +(e.g. "run") and small ones (e.g. "step"). The two variables "bigstep" +and "smallstep" contain the number of events between two checkpoints +in each case. + +\begin{options} +\item["set bigstep" \var{count}] +Set the number of events between two checkpoints for long displacements. +\item["set smallstep" \var{count}] +Set the number of events between two checkpoints for small +displacements. +\end{options} + +The following commands display information on checkpoints and events: + +\begin{options} +\item["info checkpoints"] +Print a list of checkpoints. +\item["info events" \optvar{module}] +Print the list of events in the given module (the current module, by default). +\end{options} + +\subsection{User-defined printers} + +Just as in the toplevel system (section~\ref{s:toplevel-directives}), +the user can register functions for printing values of certain types. +For technical reasons, the debugger cannot call printing functions +that reside in the program being debugged. The code for the printing +functions must therefore be loaded explicitly in the debugger. + +\begin{options} +\item["load_printer \""\var{file-name}"\""] +Load in the debugger the indicated ".cmo" or ".cma" object file. The +file is loaded in an environment consisting only of the OCaml +standard library plus the definitions provided by object files +previously loaded using "load_printer". If this file depends on other +object files not yet loaded, the debugger automatically loads them if +it is able to find them in the search path. The loaded file does not +have direct access to the modules of the program being debugged. + +\item["install_printer "\var{printer-name}] +Register the function named \var{printer-name} (a +value path) as a printer for objects whose types match the argument +type of the function. That is, the debugger will call +\var{printer-name} when it has such an object to print. +The printing function \var{printer-name} must use the "Format" library +module to produce its output, otherwise its output will not be +correctly located in the values printed by the toplevel loop. + +The value path \var{printer-name} must refer to one of the functions +defined by the object files loaded using "load_printer". It cannot +reference the functions of the program being debugged. + +\item["remove_printer "\var{printer-name}] +Remove the named function from the table of value printers. +\end{options} + +\section{Miscellaneous commands} + +\begin{options} +\item["list" \optvar{module} \optvar{beginning} \optvar{end}] +List the source of module \var{module}, from line number +\var{beginning} to line number \var{end}. By default, 20 lines of the +current module are displayed, starting 10 lines before the current +position. +\item["source" \var{filename}] +Read debugger commands from the script \var{filename}. +\end{options} + +\section{Running the debugger under Emacs} \label{s:inf-debugger} + +The most user-friendly way to use the debugger is to run it under Emacs. +See the file "emacs/README" in the distribution for information on how +to load the Emacs Lisp files for OCaml support. + +The OCaml debugger is started under Emacs by the command "M-x +camldebug", with argument the name of the executable file +\var{progname} to debug. Communication with the debugger takes place +in an Emacs buffer named "*camldebug-"\var{progname}"*". The editing +and history facilities of Shell mode are available for interacting +with the debugger. + +In addition, Emacs displays the source files containing the current +event (the current position in the program execution) and highlights +the location of the event. This display is updated synchronously with +the debugger action. + +The following bindings for the most common debugger commands are +available in the "*camldebug-"\var{progname}"*" buffer: + +\begin{options} +\item["C-c C-s"] (command "step"): execute the program one step forward. +\item["C-c C-k"] (command "backstep"): execute the program one step backward. +\item["C-c C-n"] (command "next"): execute the program one step +forward, skipping over function calls. +\item[Middle mouse button] (command "display"): display named value. +"$"\var{n} under mouse cursor (support incremental browsing of large +data structures). +\item["C-c C-p"] (command "print"): print value of identifier at point. +\item["C-c C-d"] (command "display"): display value of identifier at point. +\item["C-c C-r"] (command "run"): execute the program forward to next +breakpoint. +\item["C-c C-v"] (command "reverse"): execute the program backward to +latest breakpoint. +\item["C-c C-l"] (command "last"): go back one step in the command history. +\item["C-c C-t"] (command "backtrace"): display backtrace of function calls. +\item["C-c C-f"] (command "finish"): run forward till the current +function returns. +\item["C-c <"] (command "up"): select the stack frame below the +current frame. +\item["C-c >"] (command "down"): select the stack frame above the +current frame. +\end{options} + +In all buffers in OCaml editing mode, the following debugger commands +are also available: + +\begin{options} +\item["C-x C-a C-b"] (command "break"): set a breakpoint at event closest +to point +\item["C-x C-a C-p"] (command "print"): print value of identifier at point +\item["C-x C-a C-d"] (command "display"): display value of identifier at point +\end{options} diff --git a/manual/manual/cmds/depend.etex b/manual/manual/cmds/depend.etex new file mode 100644 index 00000000..b5dadd58 --- /dev/null +++ b/manual/manual/cmds/depend.etex @@ -0,0 +1,222 @@ +\chapter{Dependency generator (ocamldep)} \label{c:camldep} +\pdfchapter{Dependency generator (ocamldep)} +%HEVEA\cutname{depend.html} + +The "ocamldep" command scans a set of OCaml source files +(".ml" and ".mli" files) for references to external compilation units, +and outputs dependency lines in a format suitable for the "make" +utility. This ensures that "make" will compile the source files in the +correct order, and recompile those files that need to when a source +file is modified. + +The typical usage is: +\begin{alltt} + ocamldep \var{options} *.mli *.ml > .depend +\end{alltt} +where "*.mli *.ml" expands to all source files in the current +directory and ".depend" is the file that should contain the +dependencies. (See below for a typical "Makefile".) + +Dependencies are generated both for compiling with the bytecode +compiler "ocamlc" and with the native-code compiler "ocamlopt". + +\section{Options} + +The following command-line options are recognized by "ocamldep". + +\begin{options} + +\item["-absname"] +Show absolute filenames in error messages. + +\item["-all"] +Generate dependencies on all required files, rather than assuming +implicit dependencies. + +\item["-allow-approx"] +Allow falling back on a lexer-based approximation when parsing fails. + +\item["-args" \var{filename}] + Read additional newline-terminated command line arguments from \var{filename}. + +\item["-args0" \var{filename}] + Read additional null character terminated command line arguments from \var{filename}. + +\item["-as-map"] +For the following files, do not include delayed dependencies for +module aliases. +This option assumes that they are compiled using options +"-no-alias-deps -w -49", and that those files or their interface are +passed with the "-map" option when computing dependencies for other +files. Note also that for dependencies to be correct in the +implementation of a map file, its interface should not coerce any of +the aliases it contains. + +\item["-debug-map"] +Dump the delayed dependency map for each map file. + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +source files. If a source file "foo.ml" mentions an external +compilation unit "Bar", a dependency on that unit's interface +"bar.cmi" is generated only if the source for "bar" is found in the +current directory or in one of the directories specified with "-I". +Otherwise, "Bar" is assumed to be a module from the standard library, +and no dependencies are generated. For programs that span multiple +directories, it is recommended to pass "ocamldep" the same "-I" options +that are passed to the compiler. + +\item["-impl" \var{file}] +Process \var{file} as a ".ml" file. + +\item["-intf" \var{file}] +Process \var{file} as a ".mli" file. + +\item["-map" \var{file}] +Read an propagate the delayed dependencies for module aliases in +\var{file}, so that the following files will depend on the +exported aliased modules if they use them. See the example below. + +\item["-ml-synonym" \var{.ext}] +Consider the given extension (with leading dot) to be a synonym for .ml. + +\item["-mli-synonym" \var{.ext}] +Consider the given extension (with leading dot) to be a synonym for .mli. + +\item["-modules"] +Output raw dependencies of the form +\begin{verbatim} + filename: Module1 Module2 ... ModuleN +\end{verbatim} +where "Module1", \ldots, "ModuleN" are the names of the compilation +units referenced within the file "filename", but these names are not +resolved to source file names. Such raw dependencies cannot be used +by "make", but can be post-processed by other tools such as "Omake". + +\item["-native"] +Generate dependencies for a pure native-code program (no bytecode +version). When an implementation file (".ml" file) has no explicit +interface file (".mli" file), "ocamldep" generates dependencies on the +bytecode compiled file (".cmo" file) to reflect interface changes. +This can cause unnecessary bytecode recompilations for programs that +are compiled to native-code only. The flag "-native" causes +dependencies on native compiled files (".cmx") to be generated instead +of on ".cmo" files. (This flag makes no difference if all source files +have explicit ".mli" interface files.) + +\item["-one-line"] +Output one line per file, regardless of the length. + +\item["-open" \var{module}] +Assume that module \var{module} is opened before parsing each of the +following files. + +\item["-plugin" \var{plugin}] +Dynamically load the code of the given \var{plugin} +(a ".cmo", ".cma" or ".cmxs" file) in "ocamldep". \var{plugin} must exist in +the same kind of code as "ocamldep" ("ocamldep.byte" must load bytecode +plugins, while "ocamldep.opt" must load native code plugins), and +extension adaptation is done automatically for ".cma" files (to ".cmxs" files +if "ocamldep" is compiled in native code). + +\item["-pp" \var{command}] +Cause "ocamldep" to call the given \var{command} as a preprocessor +for each source file. + +\item["-ppx" \var{command}] +Pipe abstract syntax trees through preprocessor \var{command}. + +\item["-shared"] +Generate dependencies for native plugin files (.cmxs) in addition to +native compiled files (.cmx). + +\item["-slash"] +Under Windows, use a forward slash (/) as the path separator instead +of the usual backward slash ($\backslash$). Under Unix, this option does +nothing. + +\item["-sort"] +Sort files according to their dependencies. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\section{A typical Makefile} + +Here is a template "Makefile" for a OCaml program. + +\begin{verbatim} +OCAMLC=ocamlc +OCAMLOPT=ocamlopt +OCAMLDEP=ocamldep +INCLUDES= # all relevant -I options here +OCAMLFLAGS=$(INCLUDES) # add other options for ocamlc here +OCAMLOPTFLAGS=$(INCLUDES) # add other options for ocamlopt here + +# prog1 should be compiled to bytecode, and is composed of three +# units: mod1, mod2 and mod3. + +# The list of object files for prog1 +PROG1_OBJS=mod1.cmo mod2.cmo mod3.cmo + +prog1: $(PROG1_OBJS) + $(OCAMLC) -o prog1 $(OCAMLFLAGS) $(PROG1_OBJS) + +# prog2 should be compiled to native-code, and is composed of two +# units: mod4 and mod5. + +# The list of object files for prog2 +PROG2_OBJS=mod4.cmx mod5.cmx + +prog2: $(PROG2_OBJS) + $(OCAMLOPT) -o prog2 $(OCAMLFLAGS) $(PROG2_OBJS) + +# Common rules +.SUFFIXES: .ml .mli .cmo .cmi .cmx + +.ml.cmo: + $(OCAMLC) $(OCAMLFLAGS) -c $< + +.mli.cmi: + $(OCAMLC) $(OCAMLFLAGS) -c $< + +.ml.cmx: + $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< + +# Clean up +clean: + rm -f prog1 prog2 + rm -f *.cm[iox] + +# Dependencies +depend: + $(OCAMLDEP) $(INCLUDES) *.mli *.ml > .depend + +include .depend +\end{verbatim} + +If you use module aliases to give shorter names to modules, you need +to change the above definitions. Assuming that your map file is called +"mylib.mli", here are minimal modifications. +\begin{verbatim} +OCAMLFLAGS=$(INCLUDES) -open Mylib + +mylib.cmi: mylib.mli + $(OCAMLC) $(INCLUDES) -no-alias-deps -w -49 -c $< + +depend: + $(OCAMLDEP) $(INCLUDES) -map mylib.mli $(PROG1_OBJS:.cmo=.ml) > .depend +\end{verbatim} +Note that in this case you should not compute dependencies for +"mylib.mli" together with the other files, hence the need to pass +explicitly the list of files to process. +If "mylib.mli" itself has dependencies, you should compute them using +"-as-map". diff --git a/manual/manual/cmds/flambda.etex b/manual/manual/cmds/flambda.etex new file mode 100644 index 00000000..063029ec --- /dev/null +++ b/manual/manual/cmds/flambda.etex @@ -0,0 +1,1343 @@ +\chapter{Optimisation with Flambda} +\pdfchapterfold{-9}{Optimisation with Flambda} +%HEVEA\cutname{flambda.html} + +\section{Overview} + +{\em Flambda} is the term used to describe a series of optimisation passes +provided by the native code compilers as of OCaml 4.03. + +Flambda aims to make it easier to write idiomatic OCaml code without +incurring performance penalties. + +To use the Flambda optimisers it is necessary to pass the {\tt -flambda} +option to the OCaml {\tt configure} script. (There is no support for a +single compiler that can operate in both Flambda and non-Flambda modes.) +Code compiled with Flambda +cannot be linked into the same program as code compiled without Flambda. +Attempting to do this will result in a compiler error. + +Whether or not a particular {\tt ocamlopt} uses Flambda may be +determined by invoking it with the {\tt -config} option and looking +for any line starting with ``{\tt flambda:}''. If such a line is present +and says ``{\tt true}'', then Flambda is supported, otherwise it is not. + +Flambda provides full optimisation across different compilation units, +so long as the {\tt .cmx} files for the dependencies of the unit currently +being compiled are available. (A compilation unit corresponds to a +single {\tt .ml} source file.) However it does not yet act entirely as +a whole-program compiler: for example, elimination of dead code across +a complete set of compilation units is not supported. + +Optimisation with Flambda is not currently supported when generating +bytecode. + +Flambda should not in general affect the semantics of existing programs. +Two exceptions to this rule are: possible elimination of pure code +that is being benchmarked (see section\ \ref{inhibition}) and changes in +behaviour of code using unsafe operations (see section\ \ref{unsafe}). + +Flambda does not yet optimise array or string bounds checks. Neither +does it take hints for optimisation from any assertions written by the +user in the code. + +Consult the {\em Glossary} at the end of this chapter for definitions of +technical terms used below. + +\section{Command-line flags} + +The Flambda optimisers provide a variety of command-line flags that may +be used to control their behaviour. Detailed descriptions of each flag +are given in the referenced sections. Those sections also describe any +arguments which the particular flags take. + +Commonly-used options: +\begin{options} +\item[\machine{-O2}] Perform more optimisation than usual. Compilation +times may be lengthened. (This flag is an abbreviation for a certain +set of parameters described in section\ \ref{defaults}.) +\item[\machine{-O3}] Perform even more optimisation than usual, possibly +including unrolling of recursive functions. Compilation times may be +significantly lengthened. +\item[\machine{-Oclassic}] Make inlining decisions at the point of +definition of a function rather than at the call site(s). This mirrors +the behaviour of OCaml compilers not using Flambda. Compared to compilation +using the new Flambda inlining heuristics (for example at {\tt -O2}) it +produces +smaller {\tt .cmx} files, shorter compilation times and code that probably +runs rather slower. When using {\tt -Oclassic}, only the following options +described in this section are relevant: {\tt -inlining-report} and +{\tt -inline}. If any other of the options described in this section are +used, the behaviour is undefined and may cause an error in future versions +of the compiler. +\item[\machine{-inlining-report}] Emit {\tt .inlining} files (one per +round of optimisation) showing all of the inliner's decisions. +\end{options} + +Less commonly-used options: +\begin{options} +\item[\machine{-remove-unused-arguments}] Remove unused function arguments +even when the argument is not specialised. This may have a small +performance penalty. +See section\ \ref{remove-unused-args}. +\item[\machine{-unbox-closures}] Pass free variables via specialised arguments +rather than closures (an optimisation for reducing allocation). See +section\ \ref{unbox-closures}. This may have a small performance penalty. +\end{options} + +Advanced options, only needed for detailed tuning: +\begin{options} +\item[\machine{-inline}] The behaviour depends on whether {\tt -Oclassic} +is used. +\begin{itemize} +\item When not in {\tt -Oclassic} mode, {\tt -inline} limits the total +size of functions considered for inlining during any speculative inlining +search. (See section\ \ref{speculation}.) Note that +this parameter does +{\bf not} control the assessment as to whether any particular function may +be inlined. Raising it to excessive amounts will not necessarily cause +more functions to be inlined. +\item When in {\tt -Oclassic} mode, {\tt -inline} behaves as in +previous versions of the compiler: it is the maximum size of function to +be considered for inlining. See section\ \ref{classic}. +\end{itemize} +\item[\machine{-inline-toplevel}] The equivalent of {\tt -inline} but used +when speculative inlining starts at toplevel. See +section\ \ref{speculation}. +Not used in {\tt -Oclassic} mode. +\item[\machine{-inline-branch-factor}] Controls how the inliner assesses +whether a code path is likely to be hot or cold. See +section\ \ref{assessment-inlining}. +\item[\machine{-inline-alloc-cost}, + \machine{-inline-branch-cost}, + \machine{-inline-call-cost}] Controls how the inliner assesses the runtime + performance penalties associated with various operations. See + section\ \ref{assessment-inlining}. +\item[\machine{-inline-indirect-cost}, + \machine{-inline-prim-cost}] Likewise. +\item[\machine{-inline-lifting-benefit}] Controls inlining of functors +at toplevel. See section\ \ref{assessment-inlining}. +\item[\machine{-inline-max-depth}] The maximum depth of any +speculative inlining search. See section\ \ref{speculation}. +\item[\machine{-inline-max-unroll}] The maximum depth of any unrolling of +recursive functions during any speculative inlining search. +See section\ \ref{speculation}. +\item[\machine{-no-unbox-free-vars-of-closures}] % +Do not unbox closure variables. See section\ \ref{unbox-fvs}. +\item[\machine{-no-unbox-specialised-args}] % +Do not unbox arguments to which functions have been specialised. See +section\ \ref{unbox-spec-args}. +\item[\machine{-rounds}] How many rounds of optimisation to perform. +See section\ \ref{rounds}. +\item[\machine{-unbox-closures-factor}] Scaling factor for benefit +calculation when using {\tt -unbox-closures}. See +section\ \ref{unbox-closures}. +\end{options} + +\paragraph{Notes} +\begin{itemize} +\item The set of command line flags relating to optimisation should typically +be specified to be the same across an entire project. Flambda does not +currently record the requested flags in the {\tt .cmx} files. As such, +inlining of functions from previously-compiled units will subject their code +to the optimisation parameters of the unit currently being compiled, rather +than those specified when they were previously compiled. It is hoped to +rectify this deficiency in the future. + +\item Flambda-specific flags do not affect linking with the exception of +affecting the optimisation of code in the startup file (containing +generated functions such as currying helpers). Typically such optimisation +will not be significant, so eliding such flags at link time might be +reasonable. + +\item Flambda-specific flags are silently accepted even when the +{\tt -flambda} option was not provided to the {\tt configure} script. +(There is no means provided to change this behaviour.) +This is intended to make it more +straightforward to run benchmarks with and without the Flambda optimisers +in effect. +\item Some of the Flambda flags may be subject to change in future +releases. +\end{itemize} + +\subsection{Specification of optimisation parameters by round}\label{rounds} + +Flambda operates in {\em rounds}: one round consists of a certain sequence +of transformations that may then be repeated in order to achieve more +satisfactory results. The number of rounds can be set manually using the +{\tt -rounds} parameter (although this is not necessary when using +predefined optimisation levels such as with {\tt -O2} and {\tt -O3}). +For high optimisation the number of rounds might be set at 3 or 4. + +Command-line flags that may apply per round, for example those with +{\tt "-cost"} in the name, accept arguments of the form: +\begin{center} +{\em n}{\tt\ |\ }{\em round}{\tt =}{\em n}[{\tt,}...] +\end{center} +\begin{itemize} +\item If the first form is used, with a single integer specified, +the value will apply to all rounds. +\item If the second form is used, zero-based {\em round} integers specify +values which are to be used only for those rounds. +\end{itemize} + +The flags {\tt -Oclassic}, {\tt -O2} and {\tt -O3} are applied before all +other flags, meaning that certain parameters may be overridden without +having to specify every parameter usually invoked by the given optimisation +level. + +\section{Inlining} + +{\em Inlining} refers to the copying of the code of a function to a +place where the function is called. +The code of the function will be surrounded by bindings of its parameters +to the corresponding arguments. + +The aims of inlining are: +\begin{itemize} +\item to reduce the runtime overhead caused by function calls (including +setting up for such calls and returning afterwards); +\item to reduce instruction cache misses by expressing frequently-taken +paths through the program using fewer machine instructions; and +\item to reduce the amount of allocation (especially of closures). +\end{itemize} +These goals are often reached not just by inlining itself but also by +other optimisations that the compiler is able to perform as a result of +inlining. + +When a recursive call to a function (within the definition of that function +or another in the same mutually-recursive group) is inlined, the procedure is +also known as {\em unrolling}. This is somewhat akin to loop peeling. +For example, given the following code: +\begin{verbatim} +let rec fact x = + if x = 0 then + 1 + else + x * fact (x - 1) + +let n = fact 4 +\end{verbatim} +unrolling once at the call site {\tt fact 4} produces (with the body of +{\tt fact} unchanged): +\begin{verbatim} +let n = + if 4 = 0 then + 1 + else + 4 * fact (4 - 1) +\end{verbatim} +This simplifies to: +\begin{verbatim} +let n = 4 * fact 3 +\end{verbatim} + +%% CR pchambart: A specific section for unrolling might be worth (telling +%% when this is beneficial) + +Flambda provides significantly enhanced inlining capabilities relative to +previous versions of the compiler. + +\subsubsection{Aside: when inlining is performed} + +Inlining is performed together with all of the other Flambda optimisation +passes, that is to say, after closure conversion. This has three particular +advantages over a potentially more straightforward implementation prior to +closure conversion: +\begin{itemize} +\item It permits higher-order inlining, for example when a non-inlinable +function always returns the same function yet with different environments +of definition. Not all such cases are supported yet, but it is intended +that such support will be improved in future. +\item It is easier to integrate with cross-module optimisation, since +imported information about other modules is already in the correct +intermediate language. +\item It becomes more straightforward to optimise closure allocations since +the layout of closures does not have to be estimated in any way: it is +known. Similarly, +it becomes more straightforward to control which variables end up +in which closures, helping to avoid closure bloat. +\end{itemize} + +\subsection{Classic inlining heuristic}\label{classic} + +In {\tt -Oclassic} mode the behaviour of the Flambda inliner +mimics previous versions +of the compiler. (Code may still be subject to further optimisations not +performed by previous versions of the compiler: functors may be inlined, +constants are lifted and unused code is eliminated all as described elsewhere +in this chapter. See sections \ref{functors},\ \ref{lift-const} % +and\ \ref{remove-unused}. +At the definition site of a function, the body of the +function is measured. It will then be marked as eligible for inlining +(and hence inlined at every direct call site) if: +\begin{itemize} +\item the measured size (in unspecified units) is smaller than that of a +function call plus the argument of the {\tt -inline} command-line flag; and +\item the function is not recursive. +\end{itemize} + +Non-Flambda versions of the compiler cannot inline functions that +contain a definition of another function. However {\tt -Oclassic} does +permit this. Further, non-Flambda versions also cannot inline functions +that are only themselves exposed as a result of a previous pass of inlining, +but again this is permitted by {\tt -Oclassic}. +For example: +\begin{verbatim} +module M : sig + val i : int +end = struct + let f x = + let g y = x + y in + g + let h = f 3 + let i = h 4 (* h is correctly discovered to be g and inlined *) +end +\end{verbatim} + +All of this contrasts with the normal Flambda mode, that is to say +without {\tt -Oclassic}, where: +\begin{itemize} +\item the inlining decision is made at the {\bf call site}; and +\item recursive functions can be handled, by {\em specialisation} (see +below). +\end{itemize} +The Flambda mode is described in the next section. + +\subsection{Overview of ``Flambda'' inlining heuristics} + +The Flambda inlining heuristics, used whenever the compiler is configured +for Flambda and {\tt -Oclassic} was not specified, make inlining decisions +at call sites. This helps in situations where the context is important. +For example: +\begin{verbatim} +let f b x = + if b then + x + else + ... big expression ... + +let g x = f true x +\end{verbatim} +In this case, we would like to inline {\tt f} into {\tt g}, because a +conditional jump can be eliminated and the code size should reduce. If the +inlining decision has been made after the declaration of {\tt f} without +seeing the use, its size would have probably made it ineligible for +inlining; but at the call site, its final size can be known. Further, +this function should probably not be inlined systematically: if {\tt b} +is unknown, or indeed {\tt false}, there is little benefit to trade off +against a large increase in code size. In the existing non-Flambda inliner +this isn't a great problem because chains of inlining were cut off fairly +quickly. However it has led to excessive use of overly-large inlining +parameters such as {\tt -inline 10000}. + +In more detail, at each call site the following procedure is followed: +\begin{itemize} +\item Determine whether it is clear that inlining would be beneficial +without, for the moment, doing any inlining within the function itself. +(The exact assessment of {\em benefit} is described below.) If so, the +function is inlined. +\item If inlining the function is not clearly beneficial, then inlining +will be performed {\em speculatively} inside the function itself. The +search for speculative inlining possibilities is controlled by two +parameters: the {\em inlining threshold} and the {\em inlining depth}. +(These are described in more detail below.) +\begin{itemize} +\item If such speculation shows that performing some inlining inside the +function would be beneficial, then such inlining is performed and the +resulting function inlined at the original call site. +\item Otherwise, nothing happens. +\end{itemize} +\end{itemize} +Inlining within recursive functions of calls to other +functions in the same mutually-recursive group is kept in check by +an {\em unrolling depth}, described below. This ensures that functions are +not unrolled to excess. (Unrolling is only enabled +if {\tt -O3} optimisation level is selected and/or the +{\tt -inline-max-unroll} +flag is passed with an argument greater than zero.) + +\subsection{Handling of specific language constructs} + +\subsubsection{Functors}\label{functors} + +There is nothing particular about functors that inhibits inlining compared +to normal functions. To the inliner, these both look the same, except +that functors are marked as such. + +Applications of functors at toplevel are biased in favour of inlining. +(This bias may be adjusted: +see the documentation for {\tt -inline-lifting-benefit} below.) + +Applications of functors not at toplevel, for example in a local module +inside some other expression, are treated by the inliner identically to +normal function calls. + +\subsubsection{First-class modules} + +The inliner will be able to consider inlining a call to a function in a first +class module if it knows which particular function is going to be called. +The presence of the first-class module record that wraps the set of functions +in the module does not per se inhibit inlining. + +\subsubsection{Objects} + +Method calls to objects are not at present inlined by Flambda. + +\subsection{Inlining reports} + +If the {\tt -inlining-report} option is provided to the compiler then a file +will be emitted corresponding to each round of optimisation. For the +OCaml source file {\em basename}{\tt .ml} the files +are named {\em basename}{\tt .}{\em round}{\tt.inlining.org}, +with {\em round} a +zero-based integer. Inside the files, which are formatted as ``org mode'', +will be found English prose describing the decisions that the inliner took. + +\subsection{Assessment of inlining benefit}\label{assessment-inlining} + +Inlining typically +results in an increase in code size, which if left unchecked, may not only +lead to grossly large executables and excessive compilation times but also +a decrease in performance due to worse locality. As such, the +Flambda inliner trades off the change in code size against +the expected runtime performance benefit, with the benefit being computed +based on the number of operations that the compiler observes may be removed +as a result of inlining. + +For example given the following code: +\begin{verbatim} +let f b x = + if b then + x + else + ... big expression ... + +let g x = f true x +\end{verbatim} +it would be observed that inlining of {\tt f} would remove: +\begin{itemize} +\item one direct call; +\item one conditional branch. +\end{itemize} + +Formally, an estimate of runtime performance benefit is computed by +first summing +the cost of the operations that are known to be removed as a result of the +inlining and subsequent simplification of the inlined body. +The individual costs for the various kinds of operations may be adjusted +using the various {\tt -inline-...-cost} flags as follows. Costs are +specified as integers. All of these flags accept a single argument +describing such integers using the conventions +detailed in section\ \ref{rounds}. +\begin{options} +\item[\machine{-inline-alloc-cost}] The cost of an allocation. +\item[\machine{-inline-branch-cost}] The cost of a branch. +\item[\machine{-inline-call-cost}] The cost of a direct function call. +\item[\machine{-inline-indirect-cost}] The cost of an indirect function call. +\item[\machine{-inline-prim-cost}] The cost of a {\em primitive}. Primitives +encompass operations including arithmetic and memory access. +\end{options} +(Default values are described in section\ \ref{defaults} below.) + +The initial benefit value is then scaled by a factor that attempts to +compensate for the fact that the current point in the code, if under some +number of conditional branches, may be cold. (Flambda does not currently +compute hot and cold paths.) The factor---the estimated probability that +the inliner really is on a {\em hot} path---is calculated as +$\frac{1}{(1 + f)^{d}}$, where $f$ is set by +{\tt -inline-branch-factor} and $d$ is the nesting depth of branches +at the current point. As the inliner descends into more deeply-nested +branches, the benefit of inlining thus lessens. + +The resulting benefit value is known as the {\em estimated benefit}. + +The change in code size is also estimated: morally speaking it should be the +change in machine code size, but since that is not available to the inliner, +an approximation is used. + +If the estimated benefit exceeds the increase in code size then the inlined +version of the function will be kept. Otherwise the function will not be +inlined. + +Applications of functors at toplevel will be given +an additional benefit (which may be controlled by the +{\tt -inline-lifting-benefit} flag) to bias inlining in such situations +towards keeping the inlined version. + +\subsection{Control of speculation}\label{speculation} + +As described above, there are three parameters that restrict the search +for inlining opportunities during speculation: +\begin{itemize} +\item the {\em inlining threshold}; +\item the {\em inlining depth}; +\item the {\em unrolling depth}. +\end{itemize} +These parameters are ultimately bounded by the arguments provided to +the corresponding command-line flags (or their default values): +\begin{itemize} +\item {\tt -inline} (or, if the call site that triggered speculation is +at toplevel, {\tt -inline-toplevel}); +\item {\tt -inline-max-depth}; +\item {\tt -inline-max-unroll}. +\end{itemize} +{\bf Note in particular} that {\tt -inline} does not have the meaning that +it has in the previous compiler or in {\tt -Oclassic} mode. In both of those +situations {\tt -inline} was effectively some kind of basic assessment of +inlining benefit. However in Flambda inlining mode it corresponds to a +constraint on the search; the assessment of benefit is independent, as +described above. + +When speculation starts the inlining threshold starts at the value set +by {\tt -inline} (or {\tt -inline-toplevel} if appropriate, see above). +Upon making a speculative inlining decision the +threshold is reduced by the code size of the function being inlined. +If the threshold becomes exhausted, at or below zero, no further speculation +will be performed. + +The inlining depth starts at zero +and is increased by one every time the inliner +descends into another function. It is then decreased by one every time the +inliner leaves such function. If the depth exceeds the value set by +{\tt -inline-max-depth} then speculation stops. This parameter is intended +as a general backstop for situations where the inlining +threshold does not control the search sufficiently. + +The unrolling depth applies to calls within the same mutually-recursive +group of functions. Each time an inlining of such a call is performed +the depth is incremented by one when examining the resulting body. If the +depth reaches the limit set by {\tt -inline-max-unroll} then speculation +stops. + +\section{Specialisation}\label{specialisation} + +The inliner may discover a call site to a recursive function where +something is known about the arguments: for example, they may be equal to +some other variables currently in scope. In this situation it may be +beneficial to {\em specialise} the function to those arguments. This is +done by copying the declaration of the function (and any others involved +in any same mutually-recursive declaration) and noting the extra information +about the arguments. The arguments augmented by this information are known +as {\em specialised arguments}. In order to try to ensure that specialisation +is not performed uselessly, arguments are only specialised if it can be shown +that they are {\em invariant}: in other words, during the execution of the +recursive function(s) themselves, the arguments never change. + +Unless overridden by an attribute (see below), specialisation of a function +will not be attempted if: +\begin{itemize} +\item the compiler is in {\tt -Oclassic} mode; +\item the function is not obviously recursive; +\item the function is not closed. +\end{itemize} + +The compiler can prove invariance of function arguments across multiple +functions within a recursive group (although this has some limitations, +as shown by the example below). + +It should be noted that the {\em unboxing of closures} pass (see below) +can introduce specialised arguments on non-recursive functions. (No other +place in the compiler currently does this.) + +\paragraph{Example: the well-known {\tt List.iter} function} +This function might be written like so: +\begin{verbatim} +let rec iter f l = + match l with + | [] -> () + | h :: t -> + f h; + iter f t +\end{verbatim} +and used like this: +\begin{verbatim} +let print_int x = + print_endline (string_of_int x) + +let run xs = + iter print_int (List.rev xs) +\end{verbatim} +The argument {\tt f} to {\tt iter} is invariant so the function may be +specialised: +\begin{verbatim} +let run xs = + let rec iter' f l = + (* The compiler knows: f holds the same value as foo throughout iter'. *) + match l with + | [] -> () + | h :: t -> + f h; + iter' f t + in + iter' print_int (List.rev xs) +\end{verbatim} +The compiler notes down that for the function {\tt iter'}, the argument +{\tt f} is specialised to the constant closure {\tt print\_int}. This +means that the body of {\tt iter'} may be simplified: +\begin{verbatim} +let run xs = + let rec iter' f l = + (* The compiler knows: f holds the same value as foo throughout iter'. *) + match l with + | [] -> () + | h :: t -> + print_int h; (* this is now a direct call *) + iter' f t + in + iter' print_int (List.rev xs) +\end{verbatim} +The call to {\tt print\_int} can indeed be inlined: +\begin{verbatim} +let run xs = + let rec iter' f l = + (* The compiler knows: f holds the same value as foo throughout iter'. *) + match l with + | [] -> () + | h :: t -> + print_endline (string_of_int h); + iter' f t + in + iter' print_int (List.rev xs) +\end{verbatim} +The unused specialised argument {\tt f} may now be removed, leaving: +\begin{verbatim} +let run xs = + let rec iter' l = + match l with + | [] -> () + | h :: t -> + print_endline (string_of_int h); + iter' t + in + iter' (List.rev xs) +\end{verbatim} + +\paragraph{Aside on invariant parameters.} The compiler cannot currently +detect invariance in cases such as the following. +\begin{verbatim} +let rec iter_swap f g l = + match l with + | [] -> () + | 0 :: t -> + iter_swap g f l + | h :: t -> + f h; + iter_swap f g t +\end{verbatim} + +\subsection{Assessment of specialisation benefit} + +The benefit of specialisation is assessed in a similar way as for inlining. +Specialised argument information may mean that the body of the function +being specialised can be simplified: the removed operations are accumulated +into a benefit. This, together with the size of the duplicated (specialised) +function declaration, is then assessed against the size of the call to the +original function. + +\section{Default settings of parameters}\label{defaults} + +The default settings (when not using {\tt -Oclassic}) are for one +round of optimisation using the following parameters. +% CR-soon mshinwell: for 4.04, let's autogenerate these. + +\begin{tableau}{|l|l|}{Parameter}{Setting} +\entree{{\tt -inline}}{10} +\entree{{\tt -inline-branch-factor}}{0.1} +\entree{{\tt -inline-alloc-cost}}{7} +\entree{{\tt -inline-branch-cost}}{5} +\entree{{\tt -inline-call-cost}}{5} +\entree{{\tt -inline-indirect-cost}}{4} +\entree{{\tt -inline-prim-cost}}{3} +\entree{{\tt -inline-lifting-benefit}}{1300} +\entree{{\tt -inline-toplevel}}{160} +\entree{{\tt -inline-max-depth}}{1} +\entree{{\tt -inline-max-unroll}}{0} +\entree{{\tt -unbox-closures-factor}}{10} +\end{tableau} + +\subsection{Settings at -O2 optimisation level} + +When {\tt -O2} is specified two rounds of optimisation are performed. +The first round uses the default parameters (see above). The second uses +the following parameters. + +\begin{tableau}{|l|l|}{Parameter}{Setting} +\entree{{\tt -inline}}{25} +\entree{{\tt -inline-branch-factor}}{Same as default} +\entree{{\tt -inline-alloc-cost}}{Double the default} +\entree{{\tt -inline-branch-cost}}{Double the default} +\entree{{\tt -inline-call-cost}}{Double the default} +\entree{{\tt -inline-indirect-cost}}{Double the default} +\entree{{\tt -inline-prim-cost}}{Double the default} +\entree{{\tt -inline-lifting-benefit}}{Same as default} +\entree{{\tt -inline-toplevel}}{400} +\entree{{\tt -inline-max-depth}}{2} +\entree{{\tt -inline-max-unroll}}{Same as default} +\entree{{\tt -unbox-closures-factor}}{Same as default} +\end{tableau} + +\subsection{Settings at -O3 optimisation level} + +When {\tt -O3} is specified three rounds of optimisation are performed. +The first two rounds are as for {\tt -O2}. The third round uses +the following parameters. + +\begin{tableau}{|l|l|}{Parameter}{Setting} +\entree{{\tt -inline}}{50} +\entree{{\tt -inline-branch-factor}}{Same as default} +\entree{{\tt -inline-alloc-cost}}{Triple the default} +\entree{{\tt -inline-branch-cost}}{Triple the default} +\entree{{\tt -inline-call-cost}}{Triple the default} +\entree{{\tt -inline-indirect-cost}}{Triple the default} +\entree{{\tt -inline-prim-cost}}{Triple the default} +\entree{{\tt -inline-lifting-benefit}}{Same as default} +\entree{{\tt -inline-toplevel}}{800} +\entree{{\tt -inline-max-depth}}{3} +\entree{{\tt -inline-max-unroll}}{1} +\entree{{\tt -unbox-closures-factor}}{Same as default} +\end{tableau} + +\section{Manual control of inlining and specialisation} + +Should the inliner prove recalcitrant and refuse to inline a particular +function, or if the observed inlining decisions are not to the programmer's +satisfaction for some other reason, inlining behaviour can be dictated by the +programmer directly in the source code. +One example where this might be appropriate is when the programmer, +but not the compiler, knows that a particular function call is on a cold +code path. It might be desirable to prevent inlining of the function so +that the code size along the hot path is kept smaller, so as to increase +locality. + +The inliner is directed using attributes. +For non-recursive functions (and one-step unrolling of recursive functions, +although {\tt \@unroll} is more clear for this purpose) +the following are supported: +\begin{options} +\item[{\machine{\@\@inline always}} or {\machine{\@\@inline never}}] Attached +to a {\em declaration} of a function or functor, these direct the inliner to +either +always or never inline, irrespective of the size/benefit calculation. (If +the function is recursive then the body is substituted and no special +action is taken for the recursive call site(s).) +{\machine{\@\@inline}} with no argument is equivalent to +{\machine{\@\@inline always}}. +\item[{\machine{\@inlined always}} or {\machine{\@inlined never}}] Attached +to a function {\em application}, these direct the inliner likewise. These +attributes at call sites override any other attribute that may be present +on the corresponding declaration. +{\machine{\@inlined}} with no argument is equivalent to +{\machine{\@inlined always}}. +\end{options} + +For recursive functions the relevant attributes are: +\begin{options} +\item[{\machine{\@\@specialise always}} or {\machine{\@\@specialise never}}]% +Attached to a declaration of a function +or functor, this directs the inliner to either always or never +specialise the function so +long as it has appropriate contextual knowledge, irrespective of the +size/benefit calculation. +{\machine{\@\@specialise}} with no argument is equivalent to +{\machine{\@\@specialise always}}. +\item[{\machine{\@specialised always}} or {\machine{\@specialised never}}]% +Attached to a function application, this +directs the inliner likewise. This attribute at a call site overrides any +other attribute that may be present on the corresponding declaration. +(Note that the function will still only be specialised if there exist +one or more invariant parameters whose values are known.) +{\machine{\@specialised}} with no argument is equivalent to +{\machine{\@specialised always}}. +\item[{\machine{\@unrolled }}$n$] This attribute is attached to a function +application and always takes an integer argument. Each time the inliner sees +the attribute it behaves as follows: +\begin{itemize} +\item If $n$ is zero or less, nothing happens. +\item Otherwise the function being called is substituted at the call site +with its body having been rewritten such that +any recursive calls to that function {\em or +any others in the same mutually-recursive group} are annotated with the +attribute {\tt unrolled(}$n - 1${\tt )}. Inlining may continue on that body. +\end{itemize} +As such, $n$ behaves as the ``maximum depth of unrolling''. +\end{options} + +A compiler warning will be emitted if it was found impossible to obey an +annotation from an {\tt \@inlined} or {\tt \@specialised} attribute. + +\paragraph{Example showing correct placement of attributes} +\begin{verbatim} +module F (M : sig type t end) = struct + let[@inline never] bar x = + x * 3 + + let foo x = + (bar [@inlined]) (42 + x) +end [@@inline never] + +module X = F [@inlined] (struct type t = int end) +\end{verbatim} + +\section{Simplification} + +Simplification, which is run in conjunction with inlining, +propagates information (known as {\em approximations}) about which +variables hold what values at runtime. Certain relationships between +variables and symbols are also tracked: for example, some variable may be +known to always hold the same value as some other variable; or perhaps +some variable may be known to always hold the value pointed to by some +symbol. + +The propagation can help to eliminate allocations in cases such as: +\begin{verbatim} +let f x y = + ... + let p = x, y in + ... + ... (fst p) ... (snd p) ... +\end{verbatim} +The projections from {\tt p} may be replaced by uses of the variables +{\tt x} and {\tt y}, potentially meaning that {\tt p} becomes unused. + +The propagation performed by the simplification pass is also important for +discovering which functions flow to indirect call sites. This can enable +the transformation of such call sites into direct call sites, which makes +them eligible for an inlining transformation. + +Note that no information is propagated about the contents of strings, +even in {\tt safe-string} mode, because it cannot yet be guaranteed +that they are immutable throughout a given program. + +\section{Other code motion transformations} + +\subsection{Lifting of constants}\label{lift-const} + +Expressions found to be constant will be lifted to symbol +bindings---that is to say, they will be statically allocated in the +object file---when +they evaluate to boxed values. Such constants may be straightforward numeric +constants, such as the floating-point number {\tt 42.0}, or more complicated +values such as constant closures. + +Lifting of constants to toplevel reduces allocation at runtime. + +The compiler aims to share constants lifted to toplevel such that there +are no duplicate definitions. However if {\tt .cmx} files are hidden +from the compiler then maximal sharing may not be possible. + +\paragraph{Notes about float arrays} % +The following language semantics apply specifically to constant float arrays. +(By ``constant float array'' is meant an array consisting entirely of floating +point numbers that are known at compile time. A common case is a literal +such as {\tt [| 42.0; 43.0; |]}. +\begin{itemize} +\item Constant float arrays at the toplevel are mutable and never shared. +(That is to say, for each +such definition there is a distinct symbol in the data section of the object +file pointing at the array.) +\item Constant float arrays not at toplevel are mutable and are created each +time the expression is evaluated. This can be thought of as an operation that +takes an immutable array (which in the source code has no associated name; let +us call it the {\em initialising array}) and +duplicates it into a fresh mutable array. +\begin{itemize} +\item If the array is of size four or less, the expression will create a +fresh block and write the values into it one by one. There is no reference +to the initialising array as a whole. + +\item Otherwise, the initialising array is lifted out and subject to the +normal constant sharing procedure; +creation of the array consists of bulk copying the initialising array +into a fresh value on the OCaml heap. +\end{itemize} +\end{itemize} + +\subsection{Lifting of toplevel let bindings} + +Toplevel {\tt let}-expressions may be lifted to symbol bindings to ensure +that the corresponding bound variables are not captured by closures. If the +defining expression of a given binding is found to be constant, it is bound +as such (the technical term is a {\em let-symbol} binding). + +Otherwise, the symbol is bound to a (statically-allocated) +{\em preallocated block} containing one field. At runtime, the defining +expression will be evaluated and the first field of the block filled with +the resulting value. This {\em initialise-symbol} binding +causes one extra indirection but ensures, by +virtue of the symbol's address being known at compile time, that uses of the +value are not captured by closures. + +It should be noted that the blocks corresponding to initialise-symbol +bindings are kept alive forever, by virtue of them occurring in a static +table of GC roots within the object file. This extended lifetime of +expressions may on occasion be surprising. If it is desired to create +some non-constant value (for example when writing GC tests) that does not +have this +extended lifetime, then it may be created and used inside a function, +with the application point of that function (perhaps at toplevel)---or +indeed the function declaration itself---marked +as to never be inlined. This technique prevents lifting of the definition +of the value in question (assuming of course that it is not constant). + +\section{Unboxing transformations} + +The transformations in this section relate to the splitting apart of +{\em boxed} (that is to say, non-immediate) values. They are largely +intended to reduce allocation, which tends to result in a runtime +performance profile with lower variance and smaller tails. + +\subsection{Unboxing of closure variables}\label{unbox-fvs} + +This transformation is enabled unless +{\tt -no-unbox-free-vars-of-closures} is provided. + +Variables that appear in closure environments may themselves be boxed +values. As such, they may be split into further closure variables, each +of which corresponds to some projection from the original closure variable(s). +This transformation is called {\em unboxing of closure variables} or +{\em unboxing of free variables of closures}. It is only applied when +there is +reasonable certainty that there are no uses of the boxed free variable itself +within the corresponding function bodies. +% CR-someday mshinwell: Actually, we probably don't check this carefully +% enough. It needs a global analysis in case there is an out-of-scope +% projection. + +\paragraph{Example:} In the following code, the compiler observes that +the closure returned from the function {\tt f} contains a variable {\tt pair} +(free in the body of {\tt f}) that may be split into two separate variables. +\begin{verbatim} +let f x0 x1 = + let pair = x0, x1 in + Printf.printf "foo\n"; + fun y -> + fst pair + snd pair + y +\end{verbatim} +After some simplification one obtains: +\begin{verbatim} +let f x0 x1 = + let pair_0 = x0 in + let pair_1 = x1 in + Printf.printf "foo\n"; + fun y -> + pair_0 + pair_1 + y +\end{verbatim} +and then: +\begin{verbatim} +let f x0 x1 = + Printf.printf "foo\n"; + fun y -> + x0 + x1 + y +\end{verbatim} +The allocation of the pair has been eliminated. + +This transformation does not operate if it would cause the closure to +contain more than twice as many closure variables as it did beforehand. + +\subsection{Unboxing of specialised arguments}\label{unbox-spec-args} + +This transformation is enabled unless +{\tt -no-unbox-specialised-args} is provided. + +It may become the case during compilation that one or more invariant arguments +to a function become specialised to a particular value. When such values are +themselves boxed the corresponding specialised arguments may be split into +more specialised arguments corresponding to the projections out of the boxed +value that occur within the function body. This transformation is called +{\em unboxing of specialised arguments}. It is only applied when there is +reasonable certainty that the boxed argument itself is unused within the +function. + +If the function in question is involved in a recursive group then unboxing +of specialised arguments may be immediately replicated across the group +based on the dataflow between invariant arguments. + +\paragraph{Example:} Having been given the following code, the compiler +will inline {\tt loop} into {\tt f}, and then observe {\tt inv} +being invariant and always the pair formed by adding {\tt 42} and {\tt 43} +to the argument {\tt x} of the function {\tt f}. +\begin{verbatim} +let rec loop inv xs = + match xs with + | [] -> fst inv + snd inv + | x::xs -> x + loop2 xs inv +and loop2 ys inv = + match ys with + | [] -> 4 + | y::ys -> y - loop inv ys + +let f x = + Printf.printf "%d\n" (loop (x + 42, x + 43) [1; 2; 3]) +\end{verbatim} +Since the functions have sufficiently few arguments, more specialised +arguments will be added. After some simplification one obtains: +\begin{verbatim} +let f x = + let rec loop' xs inv_0 inv_1 = + match xs with + | [] -> inv_0 + inv_1 + | x::xs -> x + loop2' xs inv_0 inv_1 + and loop2' ys inv_0 inv_1 = + match ys with + | [] -> 4 + | y::ys -> y - loop' ys inv_0 inv_1 + in + Printf.printf "%d\n" (loop' [1; 2; 3] (x + 42) (x + 43)) +\end{verbatim} +The allocation of the pair within {\tt f} has been removed. (Since the +two closures for {\tt loop'} and {\tt loop2'} are constant they will also be +lifted to toplevel with no runtime allocation penalty. This +would also happen without having run the transformation to unbox +specialise arguments.) + +The transformation to unbox specialised arguments never introduces extra +allocation. + +The transformation will not unbox arguments if it would result in the +original function having sufficiently many arguments so as to inhibit +tail-call optimisation. + +The transformation is implemented by creating a wrapper function that +accepts the original arguments. Meanwhile, the original function is renamed +and extra arguments are added corresponding to the unboxed specialised +arguments; this new function +is called from the wrapper. The wrapper will then be inlined +at direct call sites. Indeed, all call sites will be direct unless +{\tt -unbox-closures} is being used, since they will have been generated +by the compiler when originally specialising the function. (In the case +of {\tt -unbox-closures} other functions may appear with specialised +arguments; in this case there may be indirect calls and these will incur +a small penalty owing to having to bounce through the wrapper. The technique +of {\em direct call surrogates} used for {\tt -unbox-closures} is not +used by the transformation to unbox specialised arguments.) + +\subsection{Unboxing of closures}\label{unbox-closures} + +This transformation is {\em not} enabled by default. It may be enabled +using the {\tt -unbox-closures} flag. + +The transformation replaces closure variables by specialised arguments. +The aim is to cause more closures to become closed. It is particularly +applicable, as a means of reducing allocation, where the function concerned +cannot be inlined or specialised. For example, some non-recursive function +might be too large to inline; or some recursive function might offer +no opportunities for specialisation perhaps because its only argument is +one of type {\tt unit}. + +At present there may be a small penalty in terms of actual runtime +performance when this transformation is enabled, although more stable +performance may be obtained due to reduced allocation. It is recommended +that developers experiment to determine whether the option is beneficial +for their code. (It is expected that in the future it will be possible +for the performance degradation to be removed.) + +\paragraph{Simple example:} In the following code (which might typically +occur when {\tt g} is too large to inline) the value of {\tt x} would usually +be communicated to the application of the {\tt +} function via the closure +of {\tt g}. +\begin{verbatim} +let f x = + let g y = + x + y + in + (g [@inlined never]) 42 +\end{verbatim} +Unboxing of the closure causes the value for {\tt x} inside {\tt g} to +be passed as an argument to {\tt g} rather than through its closure. This +means that the closure of {\tt g} becomes constant and may be lifted to +toplevel, eliminating the runtime allocation. + +The transformation is implemented by adding a new wrapper function in the +manner of that used when unboxing specialised arguments. The closure +variables are still free in the wrapper, but the intention is that when +the wrapper is inlined at direct call sites, the relevant values are +passed directly to the main function via the new specialised arguments. + +Adding such a wrapper will penalise indirect calls to the function +(which might exist in arbitrary places; remember that this transformation +is not for example applied only on functions the compiler has produced +as a result of specialisation) since such calls will bounce through +the wrapper. To +mitigate this, if a function is small enough when weighed up against +the number of free variables being removed, it will be duplicated by the +transformation to obtain two versions: the original (used for indirect calls, +since we can do no better) and the wrapper/rewritten function pair as +described in the previous paragraph. The wrapper/rewritten function pair +will only be used at direct call sites of the function. (The wrapper in +this case is known as a {\em direct call surrogate}, since +it takes the place of another function---the unchanged version used for +indirect calls---at direct call sites.) + +The {\tt -unbox-closures-factor} command line flag, which takes an +integer, may be used to adjust the point at which a function is deemed +large enough to be ineligible for duplication. The benefit of +duplication is scaled by the integer before being evaluated against the +size. + +\paragraph{Harder example:} In the following code, there are two closure +variables that would typically cause closure allocations. One is called +{\tt fv} and occurs inside the function {\tt baz}; the other is called +{\tt z} and occurs inside the function {\tt bar}. +In this toy (yet sophisticated) example we again use an attribute to +simulate the typical situation where the first argument of {\tt baz} is +too large to inline. +\begin{verbatim} +let foo c = + let rec bar zs fv = + match zs with + | [] -> [] + | z::zs -> + let rec baz f = function + | [] -> [] + | a::l -> let r = fv + ((f [@inlined never]) a) in r :: baz f l + in + (map2 (fun y -> z + y) [z; 2; 3; 4]) @ bar zs fv + in + Printf.printf "%d" (List.length (bar [1; 2; 3; 4] c)) +\end{verbatim} +The code resulting from applying {\tt -O3 -unbox-closures} to this code +passes the free variables via function arguments in +order to eliminate all closure allocation in this example (aside from any +that might be performed inside {\tt printf}). + +\section{Removal of unused code and values}\label{remove-unused} + +\subsection{Removal of redundant let expressions} + +The simplification pass removes unused {\tt let} bindings so long as +their corresponding defining expressions have ``no effects''. See +the section ``Treatment of effects'' below for the precise definition of +this term. + +\subsection{Removal of redundant program constructs} + +This transformation is analogous to the removal of {\tt let}-expressions +whose defining expressions have no effects. It operates instead on symbol +bindings, removing those that have no effects. + +\subsection{Removal of unused arguments}\label{remove-unused-args} + +This transformation is only enabled by default for specialised arguments. +It may be enabled for all arguments using the {\tt -remove-unused-arguments} +flag. + +The pass analyses functions to determine which arguments are unused. +Removal is effected by creating a wrapper function, which will be inlined +at every direct call site, that accepts the original arguments and then +discards the unused ones before calling the original function. As a +consequence, this transformation may be detrimental if the original +function is usually indirectly called, since such calls will now bounce +through the wrapper. (The technique of {\em direct call surrogates} used +to reduce this penalty during unboxing of closure variables (see above) +does not yet apply to the pass that removes unused arguments.) + +\subsection{Removal of unused closure variables} + +This transformation performs an analysis across +the whole compilation unit to determine whether there exist closure variables +that are never used. Such closure variables are then eliminated. (Note that +this has to be a whole-unit analysis because a projection of a closure +variable from some particular closure may have propagated to an arbitrary +location within the code due to inlining.) + +\section{Other code transformations} + +\subsection{Transformation of non-escaping references into mutable variables} + +Flambda performs a simple analysis analogous to that performed elsewhere +in the compiler that can transform {\tt ref}s into mutable variables +that may then be held in registers (or on the stack as appropriate) rather +than being allocated on the OCaml heap. This only happens so long as the +reference concerned can be shown to not escape from its defining scope. + +\subsection{Substitution of closure variables for specialised arguments} + +This transformation discovers closure variables that are known to be +equal to specialised arguments. Such closure variables are replaced by +the specialised arguments; the closure variables may then be removed by +the ``removal of unused closure variables'' pass (see below). + +\section{Treatment of effects} + +The Flambda optimisers classify expressions in order to determine whether +an expression: +\begin{itemize} +\item does not need to be evaluated at all; and/or +\item may be duplicated. +\end{itemize} + +This is done by forming judgements on the {\em effects} and the {\em coeffects} +that might be performed were the expression to be executed. Effects talk +about how the expression might affect the world; coeffects talk about how +the world might affect the expression. + +Effects are classified as follows: +\begin{options} +\item[{\bf No effects:}] The expression does not change the observable state +of the world. For example, it must not write to any mutable storage, +call arbitrary external functions or change control flow (e.g. by raising +an exception). Note that allocation is {\em not} classed as having +``no effects'' (see below). +\begin{itemize} +\item It is assumed in the compiler that expressions with no +effects, whose results are not used, may be eliminated. (This typically +happens where the expression in question is the defining expression of a +{\tt let}; in such cases the {\tt let}-expression will be +eliminated.) It is further +assumed that such expressions with no effects may be +duplicated (and thus possibly executed more than once). +\item Exceptions arising from allocation points, for example +``out of memory'' or +exceptions propagated from finalizers or signal handlers, are treated as +``effects out of the ether'' and thus ignored for our determination here +of effectfulness. The same goes for floating point operations that may +cause hardware traps on some platforms. +\end{itemize} +\item[{\bf Only generative effects:}] The expression does not change the +observable state of the world save for possibly affecting the state of +the garbage collector by performing an allocation. Expressions +that only have generative effects and whose results are unused +may be eliminated by the compiler. However, unlike expressions with +``no effects'', such expressions will never be eligible for duplication. +\item[{\bf Arbitrary effects:}] All other expressions. +\end{options} + +There is a single classification for coeffects: +\begin{options} +\item[{\bf No coeffects:}] The expression does not observe the effects (in +the sense described above) of other expressions. For example, it must not +read from any mutable storage or call arbitrary external functions. +\end{options} + +It is assumed in the compiler that, subject to data dependencies, +expressions with neither effects nor coeffects may be reordered with +respect to other expressions. + +\section{Compilation of statically-allocated modules} + +Compilation of modules that are able to be statically allocated (for example, +the module corresponding to an entire compilation unit, as opposed to a first +class module dependent on values computed at runtime) initially follows the +strategy used for bytecode. A sequence of {\tt let}-bindings, which may be +interspersed with arbitrary effects, surrounds a record creation that becomes +the module block. The Flambda-specific transformation follows: these bindings +are lifted to toplevel symbols, as described above. + +\section{Inhibition of optimisation}\label{inhibition} + +Especially when writing benchmarking suites that run non-side-effecting +algorithms in loops, it may be found that the optimiser entirely +elides the code being benchmarked. This behaviour can be prevented by +using the {\tt Sys.opaque\_identity} function (which indeed behaves as a +normal OCaml function and does not possess any ``magic'' semantics). The +documentation of the {\tt Sys} module should be consulted for further details. + +\section{Use of unsafe operations}\label{unsafe} + +The behaviour of the Flambda simplification pass means that certain unsafe +operations, which may without Flambda or when using previous versions of +the compiler be safe, must not be used. This specifically refers to +functions found in the {\tt Obj} module. + +In particular, it is forbidden to change any value (for example using +{\tt Obj.set\_field} or {\tt Obj.set\_tag}) that is not mutable. +(Values returned from C stubs +are always treated as mutable.) The compiler will emit warning 59 if it +detects such a write---but it cannot warn in all cases. Here is an example +of code that will trigger the warning: +\begin{verbatim} +let f x = + let a = 42, x in + (Obj.magic a : int ref) := 1; + fst a +\end{verbatim} +The reason this is unsafe is because the simplification pass believes that +{\tt fst a} holds the value {\tt 42}; and indeed it must, unless type +soundness has been broken via unsafe operations. + +If it must be the case that code has to be written that triggers warning 59, +but the code is known to actually be correct (for some definition of +correct), then {\tt Sys.opaque\_identity} may be used to wrap the value +before unsafe operations are performed upon it. Great care must be taken +when doing this to ensure that the opacity is added at the correct place. +It must be emphasised that this use of {\tt Sys.opaque\_identity} is only +for {\bf exceptional} cases. It should not be used in normal code or to +try to guide the optimiser. + +As an example, this code will return the integer {\tt 1}: +\begin{verbatim} +let f x = + let a = Sys.opaque_identity (42, x) in + (Obj.magic a : int ref) := 1; + fst a +\end{verbatim} +However the following code will still return {\tt 42}: +\begin{verbatim} +let f x = + let a = 42, x in + Sys.opaque_identity (Obj.magic a : int ref) := 1; + fst a +\end{verbatim} + +High levels of inlining performed by Flambda may expose bugs in code +thought previously to be correct. Take care, for example, not +to add type annotations that claim some mutable value is always immediate +if it might be possible for an unsafe operation to update it to a boxed +value. + +\section{Glossary} + +The following terminology is used in this chapter of the manual. + +\begin{options} +\item[{\bf Call site}] See {\em direct call site} and % +{\em indirect call site} below. +\item[{\bf Closed function}] A function whose body has no free variables +except its parameters and any to which are bound other functions within +the same (possibly mutually-recursive) declaration. +\item[{\bf Closure}] The runtime representation of a function. This +includes pointers to the code of the function +together with the values of any variables that are used in the body of +the function but actually defined outside of the function, in the +enclosing scope. +The values of such variables, collectively known as the +{\em environment}, are required because the function may be +invoked from a place where the original bindings of such variables are +no longer in scope. A group of possibly +mutually-recursive functions defined using {\em let rec} all share a +single closure. (Note to developers: in the Flambda source code a +{\em closure} always corresponds to a single function; a +{\em set of closures} refers to a group of such.) +\item[{\bf Closure variable}] A member of the environment held within the +closure of a given function. +\item[{\bf Constant}] Some entity (typically an expression) the value of which +is known by the compiler at compile time. Constantness may be explicit from +the source code or inferred by the Flambda optimisers. +\item[{\bf Constant closure}] A closure that is statically allocated in an +object file. It is almost always the case that the environment portion of +such a closure is empty. +\item[{\bf Defining expression}] The expression {\tt e} in % +{\tt let x = e in e'}. +\item[{\bf Direct call site}] A place in a program's code where a function is +called and it is known at compile time which function it will always be. +\item[{\bf Indirect call site}] A place in a program's code where a function +is called but is not known to be a {\em direct call site}. +\item[{\bf Program}] A collection of {\em symbol bindings} forming the +definition of a single compilation unit (i.e. {\tt .cmx} file). +\item[{\bf Specialised argument}] An argument to a function that is known +to always hold a particular value at runtime. These are introduced by the +inliner when specialising recursive functions; and the {\tt unbox-closures} +pass. (See section\ \ref{specialisation}.) +\item[{\bf Symbol}] A name referencing a particular place in an object file +or executable image. At that particular place will be some constant value. +Symbols may be examined using operating system-specific tools (for +example {\tt objdump} on Linux). +\item[{\bf Symbol binding}] Analogous to a {\tt let}-expression but working +at the level of symbols defined in the object file. The address of a symbol is +fixed, but it may be bound to both constant and non-constant expressions. +\item[{\bf Toplevel}] An expression in the current program which is not +enclosed within any function declaration. +\item[{\bf Variable}] A named entity to which some OCaml value is bound by a +{\tt let} expression, pattern-matching construction, or similar. +\end{options} diff --git a/manual/manual/cmds/intf-c.etex b/manual/manual/cmds/intf-c.etex new file mode 100644 index 00000000..b8b5fcf7 --- /dev/null +++ b/manual/manual/cmds/intf-c.etex @@ -0,0 +1,2643 @@ +\chapter{Interfacing\label{c:intf-c} C with OCaml} +\pdfchapterfold{-9}{Interfacing C with OCaml} +%HEVEA\cutname{intfc.html} + +This chapter describes how user-defined primitives, written in C, can +be linked with OCaml code and called from OCaml functions, and how +these C functions can call back to OCaml code. + +\section{Overview and compilation information} +\pdfsection{Overview and compilation information} + +\subsection{Declaring primitives} + +\begin{syntax} +definition: ... + | 'external' value-name ':' typexpr '=' external-declaration +; +external-declaration: string-literal [ string-literal [ string-literal ] ] +\end{syntax} + +User primitives are declared in an implementation file or +@"struct"\ldots"end"@ module expression using the @"external"@ keyword: +\begin{alltt} + external \var{name} : \var{type} = \var{C-function-name} +\end{alltt} +This defines the value name \var{name} as a function with type +\var{type} that executes by calling the given C function. +For instance, here is how the "input" primitive is declared in the +standard library module "Pervasives": +\begin{verbatim} + external input : in_channel -> bytes -> int -> int -> int + = "input" +\end{verbatim} +Primitives with several arguments are always curried. The C function +does not necessarily have the same name as the ML function. + +External functions thus defined can be specified in interface files or +@"sig"\ldots"end"@ signatures either as regular values +\begin{alltt} + val \var{name} : \var{type} +\end{alltt} +thus hiding their implementation as C functions, or explicitly as +``manifest'' external functions +\begin{alltt} + external \var{name} : \var{type} = \var{C-function-name} +\end{alltt} +The latter is slightly more efficient, as it allows clients of the +module to call directly the C function instead of going through the +corresponding OCaml function. On the other hand, it should not be used +in library modules if they have side-effects at toplevel, as this +direct call interferes with the linker's algorithm for removing unused +modules from libraries at link-time. + +The arity (number of arguments) of a primitive is automatically +determined from its OCaml type in the "external" declaration, by +counting the number of function arrows in the type. For instance, +"input" above has arity 4, and the "input" C function is called with +four arguments. Similarly, +\begin{verbatim} + external input2 : in_channel * bytes * int * int -> int = "input2" +\end{verbatim} +has arity 1, and the "input2" C function receives one argument (which +is a quadruple of OCaml values). + +Type abbreviations are not expanded when determining the arity of a +primitive. For instance, +\begin{verbatim} + type int_endo = int -> int + external f : int_endo -> int_endo = "f" + external g : (int -> int) -> (int -> int) = "f" +\end{verbatim} +"f" has arity 1, but "g" has arity 2. This allows a primitive to +return a functional value (as in the "f" example above): just remember +to name the functional return type in a type abbreviation. + +The language accepts external declarations with one or two +flag strings in addition to the C function's name. These flags are +reserved for the implementation of the standard library. + +\subsection{Implementing primitives} + +User primitives with arity $n \leq 5$ are implemented by C functions +that take $n$ arguments of type "value", and return a result of type +"value". The type "value" is the type of the representations for OCaml +values. It encodes objects of several base types (integers, +floating-point numbers, strings,~\ldots) as well as OCaml data +structures. The type "value" and the associated conversion +functions and macros are described in detail below. For instance, +here is the declaration for the C function implementing the "input" +primitive: +\begin{verbatim} +CAMLprim value input(value channel, value buffer, value offset, value length) +{ + ... +} +\end{verbatim} +When the primitive function is applied in an OCaml program, the C +function is called with the values of the expressions to which the +primitive is applied as arguments. The value returned by the function is +passed back to the OCaml program as the result of the function +application. + +User primitives with arity greater than 5 should be implemented by two +C functions. The first function, to be used in conjunction with the +bytecode compiler "ocamlc", receives two arguments: a pointer to an +array of OCaml values (the values for the arguments), and an +integer which is the number of arguments provided. The other function, +to be used in conjunction with the native-code compiler "ocamlopt", +takes its arguments directly. For instance, here are the two C +functions for the 7-argument primitive "Nat.add_nat": +\begin{verbatim} +CAMLprim value add_nat_native(value nat1, value ofs1, value len1, + value nat2, value ofs2, value len2, + value carry_in) +{ + ... +} +CAMLprim value add_nat_bytecode(value * argv, int argn) +{ + return add_nat_native(argv[0], argv[1], argv[2], argv[3], + argv[4], argv[5], argv[6]); +} +\end{verbatim} +The names of the two C functions must be given in the primitive +declaration, as follows: +\begin{alltt} + external \var{name} : \var{type} = + \var{bytecode-C-function-name} \var{native-code-C-function-name} +\end{alltt} +For instance, in the case of "add_nat", the declaration is: +\begin{verbatim} + external add_nat: nat -> int -> int -> nat -> int -> int -> int -> int + = "add_nat_bytecode" "add_nat_native" +\end{verbatim} + +Implementing a user primitive is actually two separate tasks: on the +one hand, decoding the arguments to extract C values from the given +OCaml values, and encoding the return value as an OCaml +value; on the other hand, actually computing the result from the arguments. +Except for very simple primitives, it is often preferable to have two +distinct C functions to implement these two tasks. The first function +actually implements the primitive, taking native C values as +arguments and returning a native C value. The second function, +often called the ``stub code'', is a simple wrapper around the first +function that converts its arguments from OCaml values to C values, +call the first function, and convert the returned C value to OCaml +value. For instance, here is the stub code for the "input" +primitive: +\begin{verbatim} +CAMLprim value input(value channel, value buffer, value offset, value length) +{ + return Val_long(getblock((struct channel *) channel, + &Byte(buffer, Long_val(offset)), + Long_val(length))); +} +\end{verbatim} +(Here, "Val_long", "Long_val" and so on are conversion macros for the +type "value", that will be described later. The "CAMLprim" macro +expands to the required compiler directives to ensure that the +function is exported and accessible from OCaml.) +The hard work is performed by the function "getblock", which is +declared as: +\begin{verbatim} +long getblock(struct channel * channel, char * p, long n) +{ + ... +} +\end{verbatim} + +To write C code that operates on OCaml values, the following +include files are provided: +\begin{tableau}{|l|p{12cm}|}{Include file}{Provides} +\entree{"caml/mlvalues.h"}{definition of the "value" type, and conversion +macros} +\entree{"caml/alloc.h"}{allocation functions (to create structured OCaml +objects)} +\entree{"caml/memory.h"}{miscellaneous memory-related functions +and macros (for GC interface, in-place modification of structures, etc).} +\entree{"caml/fail.h"}{functions for raising exceptions +(see section~\ref{s:c-exceptions})} +\entree{"caml/callback.h"}{callback from C to OCaml (see +section~\ref{s:callback}).} +\entree{"caml/custom.h"}{operations on custom blocks (see +section~\ref{s:custom}).} +\entree{"caml/intext.h"}{operations for writing user-defined +serialization and deserialization functions for custom blocks +(see section~\ref{s:custom}).} +\entree{"caml/threads.h"}{operations for interfacing in the presence + of multiple threads (see section~\ref{s:C-multithreading}).} +\end{tableau} +These files reside in the "caml/" subdirectory of the OCaml +standard library directory, which is returned by the command +"ocamlc -where" (usually "/usr/local/lib/ocaml" or "/usr/lib/ocaml"). + +By default, header files in the "caml/" subdirectory give only access +to the public interface of the OCaml runtime. It is possible to define +the macro "CAML_INTERNALS" to get access to a lower-level interface, +but this lower-level interface is more likely to change and break +programs that use it. + +{\bf Note:} It is recommended to define the macro "CAML_NAME_SPACE" +before including these header files. If you do not define it, the +header files will also define short names (without the "caml_" prefix) +for most functions, which usually produce clashes with names defined +by other C libraries that you might use. Including the header files +without "CAML_NAME_SPACE" is only supported for backward +compatibility. + +\subsection{Statically linking C code with OCaml code} +\label{staticlink-c-code} + +The OCaml runtime system comprises three main parts: the bytecode +interpreter, the memory manager, and a set of C functions that +implement the primitive operations. Some bytecode instructions are +provided to call these C functions, designated by their offset in a +table of functions (the table of primitives). + +In the default mode, the OCaml linker produces bytecode for the +standard runtime system, with a standard set of primitives. References +to primitives that are not in this standard set result in the +``unavailable C primitive'' error. (Unless dynamic loading of C +libraries is supported -- see section~\ref{dynlink-c-code} below.) + +In the ``custom runtime'' mode, the OCaml linker scans the +object files and determines the set of required primitives. Then, it +builds a suitable runtime system, by calling the native code linker with: +\begin{itemize} +\item the table of the required primitives; +\item a library that provides the bytecode interpreter, the +memory manager, and the standard primitives; +\item libraries and object code files (".o" files) mentioned on the +command line for the OCaml linker, that provide implementations +for the user's primitives. +\end{itemize} +This builds a runtime system with the required primitives. The OCaml +linker generates bytecode for this custom runtime system. The +bytecode is appended to the end of the custom runtime system, so that +it will be automatically executed when the output file (custom +runtime + bytecode) is launched. + +To link in ``custom runtime'' mode, execute the "ocamlc" command with: +\begin{itemize} +\item the "-custom" option; +\item the names of the desired OCaml object files (".cmo" and ".cma" files) ; +\item the names of the C object files and libraries (".o" and ".a" +files) that implement the required primitives. Under Unix and Windows, +a library named "lib"\var{name}".a" (respectively, ".lib") residing in one of +the standard library directories can also be specified as "-cclib -l"\var{name}. +\end{itemize} + +If you are using the native-code compiler "ocamlopt", the "-custom" +flag is not needed, as the final linking phase of "ocamlopt" always +builds a standalone executable. To build a mixed OCaml/C executable, +execute the "ocamlopt" command with: +\begin{itemize} +\item the names of the desired OCaml native object files (".cmx" and +".cmxa" files); +\item the names of the C object files and libraries (".o", ".a", +".so" or ".dll" files) that implement the required primitives. +\end{itemize} + +Starting with Objective Caml 3.00, it is possible to record the +"-custom" option as well as the names of C libraries in an OCaml +library file ".cma" or ".cmxa". For instance, consider an OCaml library +"mylib.cma", built from the OCaml object files "a.cmo" and "b.cmo", +which reference C code in "libmylib.a". If the library is +built as follows: +\begin{alltt} + ocamlc -a -o mylib.cma -custom a.cmo b.cmo -cclib -lmylib +\end{alltt} +users of the library can simply link with "mylib.cma": +\begin{alltt} + ocamlc -o myprog mylib.cma ... +\end{alltt} +and the system will automatically add the "-custom" and "-cclib +-lmylib" options, achieving the same effect as +\begin{alltt} + ocamlc -o myprog -custom a.cmo b.cmo ... -cclib -lmylib +\end{alltt} +The alternative is of course to build the library without extra +options: +\begin{alltt} + ocamlc -a -o mylib.cma a.cmo b.cmo +\end{alltt} +and then ask users to provide the "-custom" and "-cclib -lmylib" +options themselves at link-time: +\begin{alltt} + ocamlc -o myprog -custom mylib.cma ... -cclib -lmylib +\end{alltt} +The former alternative is more convenient for the final users of the +library, however. + +\subsection{Dynamically linking C code with OCaml code} +\label{dynlink-c-code} + +Starting with Objective Caml 3.03, an alternative to static linking of C code +using the "-custom" code is provided. In this mode, the OCaml linker +generates a pure bytecode executable (no embedded custom runtime +system) that simply records the names of dynamically-loaded libraries +containing the C code. The standard OCaml runtime system "ocamlrun" +then loads dynamically these libraries, and resolves references to the +required primitives, before executing the bytecode. + +This facility is currently supported and known to work well under +Linux, MacOS~X, and Windows. It is supported, but not +fully tested yet, under FreeBSD, Tru64, Solaris and Irix. It is not +supported yet under other Unixes. + +To dynamically link C code with OCaml code, the C code must first be +compiled into a shared library (under Unix) or DLL (under Windows). +This involves 1- compiling the C files with appropriate C compiler +flags for producing position-independent code (when required by the +operating system), and 2- building a +shared library from the resulting object files. The resulting shared +library or DLL file must be installed in a place where "ocamlrun" can +find it later at program start-up time (see +section~\ref{s-ocamlrun-dllpath}). +Finally (step 3), execute the "ocamlc" command with +\begin{itemize} +\item the names of the desired OCaml object files (".cmo" and ".cma" files) ; +\item the names of the C shared libraries (".so" or ".dll" files) that +implement the required primitives. Under Unix and Windows, +a library named "dll"\var{name}".so" (respectively, ".dll") residing +in one of the standard library directories can also be specified as +"-dllib -l"\var{name}. +\end{itemize} +Do {\em not} set the "-custom" flag, otherwise you're back to static linking +as described in section~\ref{staticlink-c-code}. +The "ocamlmklib" tool (see section~\ref{s-ocamlmklib}) +automates steps 2 and 3. + +As in the case of static linking, it is possible (and recommended) to +record the names of C libraries in an OCaml ".cma" library archive. +Consider again an OCaml library +"mylib.cma", built from the OCaml object files "a.cmo" and "b.cmo", +which reference C code in "dllmylib.so". If the library is +built as follows: +\begin{alltt} + ocamlc -a -o mylib.cma a.cmo b.cmo -dllib -lmylib +\end{alltt} +users of the library can simply link with "mylib.cma": +\begin{alltt} + ocamlc -o myprog mylib.cma ... +\end{alltt} +and the system will automatically add the "-dllib -lmylib" option, +achieving the same effect as +\begin{alltt} + ocamlc -o myprog a.cmo b.cmo ... -dllib -lmylib +\end{alltt} +Using this mechanism, users of the library "mylib.cma" do not need to +known that it references C code, nor whether this C code must be +statically linked (using "-custom") or dynamically linked. + +\subsection{Choosing between static linking and dynamic linking} + +After having described two different ways of linking C code with OCaml +code, we now review the pros and cons of each, to help developers of +mixed OCaml/C libraries decide. + +The main advantage of dynamic linking is that it preserves the +platform-independence of bytecode executables. That is, the bytecode +executable contains no machine code, and can therefore be compiled on +platform $A$ and executed on other platforms $B$, $C$, \ldots, as long +as the required shared libraries are available on all these +platforms. In contrast, executables generated by "ocamlc -custom" run +only on the platform on which they were created, because they embark a +custom-tailored runtime system specific to that platform. In +addition, dynamic linking results in smaller executables. + +Another advantage of dynamic linking is that the final users of the +library do not need to have a C compiler, C linker, and C runtime +libraries installed on their machines. This is no big deal under +Unix and Cygwin, but many Windows users are reluctant to install +Microsoft Visual C just to be able to do "ocamlc -custom". + +There are two drawbacks to dynamic linking. The first is that the +resulting executable is not stand-alone: it requires the shared +libraries, as well as "ocamlrun", to be installed on the machine +executing the code. If you wish to distribute a stand-alone +executable, it is better to link it statically, using "ocamlc -custom +-ccopt -static" or "ocamlopt -ccopt -static". Dynamic linking also +raises the ``DLL hell'' problem: some care must be taken to ensure +that the right versions of the shared libraries are found at start-up +time. + +The second drawback of dynamic linking is that it complicates the +construction of the library. The C compiler and linker flags to +compile to position-independent code and build a shared library vary +wildly between different Unix systems. Also, dynamic linking is not +supported on all Unix systems, requiring a fall-back case to static +linking in the Makefile for the library. The "ocamlmklib" command +(see section~\ref{s-ocamlmklib}) tries to hide some of these system +dependencies. + +In conclusion: dynamic linking is highly recommended under the native +Windows port, because there are no portability problems and it is much +more convenient for the end users. Under Unix, dynamic linking should +be considered for mature, frequently used libraries because it +enhances platform-independence of bytecode executables. For new or +rarely-used libraries, static linking is much simpler to set up in a +portable way. + +\subsection{Building standalone custom runtime systems} +\label{s:custom-runtime} + +It is sometimes inconvenient to build a custom runtime system each +time OCaml code is linked with C libraries, like "ocamlc -custom" does. +For one thing, the building of the runtime system is slow on some +systems (that have bad linkers or slow remote file systems); for +another thing, the platform-independence of bytecode files is lost, +forcing to perform one "ocamlc -custom" link per platform of interest. + +An alternative to "ocamlc -custom" is to build separately a custom +runtime system integrating the desired C libraries, then generate +``pure'' bytecode executables (not containing their own runtime +system) that can run on this custom runtime. This is achieved by the +"-make-runtime" and "-use-runtime" flags to "ocamlc". For example, +to build a custom runtime system integrating the C parts of the +``Unix'' and ``Threads'' libraries, do: +\begin{verbatim} + ocamlc -make-runtime -o /home/me/ocamlunixrun unix.cma threads.cma +\end{verbatim} +To generate a bytecode executable that runs on this runtime system, +do: +\begin{alltt} + ocamlc -use-runtime /home/me/ocamlunixrun -o myprog \char92 + unix.cma threads.cma {\it{your .cmo and .cma files}} +\end{alltt} +The bytecode executable "myprog" can then be launched as usual: +"myprog" \var{args} or "/home/me/ocamlunixrun myprog" \var{args}. + +Notice that the bytecode libraries "unix.cma" and "threads.cma" must +be given twice: when building the runtime system (so that "ocamlc" +knows which C primitives are required) and also when building the +bytecode executable (so that the bytecode from "unix.cma" and +"threads.cma" is actually linked in). + +\section{The \texttt{value} type} +\pdfsection{The value type} + +All OCaml objects are represented by the C type "value", +defined in the include file "caml/mlvalues.h", along with macros to +manipulate values of that type. An object of type "value" is either: +\begin{itemize} +\item an unboxed integer; +\item a pointer to a block inside the heap (such as the blocks +allocated through one of the \verb"caml_alloc_*" functions below); +\item a pointer to an object outside the heap (e.g., a pointer to a block +allocated by "malloc", or to a C variable). + %%% FIXME will change in 4.02.0 (?) +\end{itemize} + +\subsection{Integer values} + +Integer values encode 63-bit signed integers (31-bit on 32-bit +architectures). They are unboxed (unallocated). + +\subsection{Blocks} + +Blocks in the heap are garbage-collected, and therefore have strict +structure constraints. Each block includes a header containing the +size of the block (in words), and the tag of the block. +The tag governs how the contents of the blocks are structured. A tag +lower than "No_scan_tag" indicates a structured block, containing +well-formed values, which is recursively traversed by the garbage +collector. A tag greater than or equal to "No_scan_tag" indicates a +raw block, whose contents are not scanned by the garbage collector. +For the benefit of ad-hoc polymorphic primitives such as equality and +structured input-output, structured and raw blocks are further +classified according to their tags as follows: +\begin{tableau}{|l|p{10cm}|}{Tag}{Contents of the block} +\entree{0 to $\hbox{"No_scan_tag"}-1$}{A structured block (an array of +OCaml objects). Each field is a "value".} +\entree{"Closure_tag"}{A closure representing a functional value. The first +word is a pointer to a piece of code, the remaining words are +"value" containing the environment.} +\entree{"String_tag"}{A character string or a byte sequence.} +\entree{"Double_tag"}{A double-precision floating-point number.} +\entree{"Double_array_tag"}{An array or record of double-precision +floating-point numbers.} +\entree{"Abstract_tag"}{A block representing an abstract datatype.} +\entree{"Custom_tag"}{A block representing an abstract datatype + with user-defined finalization, comparison, hashing, + serialization and deserialization functions atttached.} +\end{tableau} + +\subsection{Pointers outside the heap} + +Any word-aligned pointer to an address outside the heap can be safely +cast to and from the type "value". This includes pointers returned by +"malloc", and pointers to C variables (of size at least one word) +obtained with the \verb'&' operator. + %%% FIXME will change in 4.02.0 (?) + +Caution: if a pointer returned by "malloc" is cast to the type "value" +and returned to OCaml, explicit deallocation of the pointer using +"free" is potentially dangerous, because the pointer may still be +accessible from the OCaml world. Worse, the memory space deallocated +by "free" can later be reallocated as part of the OCaml heap; the +pointer, formerly pointing outside the OCaml heap, now points inside +the OCaml heap, and this can crash the garbage collector. To avoid +these problems, it is preferable to wrap the pointer in a OCaml block +with tag "Abstract_tag" or "Custom_tag". + +\section{Representation of OCaml data types} +\pdfsection{Representation of OCaml data types} + +This section describes how OCaml data types are encoded in the +"value" type. + +\subsection{Atomic types} + +\begin{tableau}{|l|l|}{OCaml type}{Encoding} +\entree{"int"}{Unboxed integer values.} +\entree{"char"}{Unboxed integer values (ASCII code).} +\entree{"float"}{Blocks with tag "Double_tag".} +\entree{"bytes"}{Blocks with tag "String_tag".} +\entree{"string"}{Blocks with tag "String_tag".} +\entree{"int32"}{Blocks with tag "Custom_tag".} +\entree{"int64"}{Blocks with tag "Custom_tag".} +\entree{"nativeint"}{Blocks with tag "Custom_tag".} +\end{tableau} + +\subsection{Tuples and records} +\label{ss:tuples-and-records} + +Tuples are represented by pointers to blocks, with tag~0. + +Records are also represented by zero-tagged blocks. The ordering of +labels in the record type declaration determines the layout of +the record fields: the value associated to the label +declared first is stored in field~0 of the block, the value associated +to the second label goes in field~1, and so on. + +As an optimization, records whose fields all have static type "float" +are represented as arrays of floating-point numbers, with tag +"Double_array_tag". (See the section below on arrays.) + +As another optimization, unboxable record types are represented +specially; unboxable record types are the immutable record types that +have only one field. An unboxable type will be represented in one of +two ways: boxed or unboxed. Boxed record types are represented as +described above (by a block with tag 0 or "Double_array_tag"). An +unboxed record type is represented directly by the value of its field +(i.e. there is no block to represent the record itself). + +The representation is chosen according to the following, in decreasing +order of priority: +\begin{itemize} +\item An attribute ("[\@\@boxed]" or "[\@\@unboxed]") on the type declaration. +\item A compiler option ("-unboxed-types" or "-no-unboxed-types"). +\item The default representation. In the present version of OCaml, the +default is the boxed representation. +\end{itemize} + +\subsection{Arrays} + +Arrays of integers and pointers are represented like tuples, +that is, as pointers to blocks tagged~0. They are accessed with the +"Field" macro for reading and the "caml_modify" function for writing. + +Arrays of floating-point numbers (type "float array") +have a special, unboxed, more efficient representation. +These arrays are represented by pointers to blocks with tag +"Double_array_tag". They should be accessed with the "Double_field" +and "Store_double_field" macros. + +\subsection{Concrete data types} + +Constructed terms are represented either by unboxed integers (for +constant constructors) or by blocks whose tag encode the constructor +(for non-constant constructors). The constant constructors and the +non-constant constructors for a given concrete type are numbered +separately, starting from 0, in the order in which they appear in the +concrete type declaration. A constant constructor is represented by +the unboxed integer equal to its constructor number. A non-constant +constructor declared with $n$ arguments is represented by +a block of size $n$, tagged with the constructor number; the $n$ +fields contain its arguments. Example: + +\begin{tableau}{|l|p{8cm}|}{Constructed term}{Representation} +\entree{"()"}{"Val_int(0)"} +\entree{"false"}{"Val_int(0)"} +\entree{"true"}{"Val_int(1)"} +\entree{"[]"}{"Val_int(0)"} +\entree{"h::t"}{Block with size = 2 and tag = 0; first field +contains "h", second field "t".} +\end{tableau} + +As a convenience, "caml/mlvalues.h" defines the macros "Val_unit", +"Val_false" and "Val_true" to refer to "()", "false" and "true". + +The following example illustrates the assignment of +integers and block tags to constructors: +\begin{verbatim} +type t = + | A (* First constant constructor -> integer "Val_int(0)" *) + | B of string (* First non-constant constructor -> block with tag 0 *) + | C (* Second constant constructor -> integer "Val_int(1)" *) + | D of bool (* Second non-constant constructor -> block with tag 1 *) + | E of t * t (* Third non-constant constructor -> block with tag 2 *) +\end{verbatim} + + +As an optimization, unboxable concrete data types are represented +specially; a concrete data type is unboxable if it has exactly one +constructor and this constructor has exactly one argument. Unboxable +concrete data types are represented in the same ways as unboxable +record types: see the description in +section~\ref{ss:tuples-and-records}. + +\subsection{Objects} + +Objects are represented as blocks with tag "Object_tag". The first +field of the block refers to the object's class and associated method +suite, in a format that cannot easily be exploited from C. The second +field contains a unique object ID, used for comparisons. The remaining +fields of the object contain the values of the instance variables of +the object. It is unsafe to access directly instance variables, as the +type system provides no guarantee about the instance variables +contained by an object. +% Instance variables are stored in the order in which they +% appear in the class definition (taking inherited classes into +% account). + +One may extract a public method from an object using the C function +"caml_get_public_method" (declared in "".) +Since public method tags are hashed in the same way as variant tags, +and methods are functions taking self as first argument, if you want +to do the method call "foo#bar" from the C side, you should call: +\begin{verbatim} + callback(caml_get_public_method(foo, hash_variant("bar")), foo); +\end{verbatim} + +\subsection{Polymorphic variants} + +Like constructed terms, polymorphic variant values are represented either +as integers (for polymorphic variants without argument), or as blocks +(for polymorphic variants with an argument). Unlike constructed +terms, variant constructors are not numbered starting from 0, but +identified by a hash value (an OCaml integer), as computed by the C function +"hash_variant" (declared in ""): +the hash value for a variant constructor named, say, "VConstr" +is "hash_variant(\"VConstr\")". + +The variant value "`VConstr" is represented by +"hash_variant(\"VConstr\")". The variant value "`VConstr("\var{v}")" is +represented by a block of size 2 and tag 0, with field number 0 +containing "hash_variant(\"VConstr\")" and field number 1 containing +\var{v}. + +Unlike constructed values, polymorphic variant values taking several +arguments are not flattened. +That is, "`VConstr("\var{v}", "\var{w}")" is represented by a block +of size 2, whose field number 1 contains the representation of the +pair "("\var{v}", "\var{w}")", rather than a block of size 3 +containing \var{v} and \var{w} in fields 1 and 2. + +\section{Operations on values} +\pdfsection{Operations on values} + +\subsection{Kind tests} + +\begin{itemize} +\item "Is_long("\var{v}")" is true if value \var{v} is an immediate integer, +false otherwise +\item "Is_block("\var{v}")" is true if value \var{v} is a pointer to a block, +and false if it is an immediate integer. +\end{itemize} + +\subsection{Operations on integers} + +\begin{itemize} +\item "Val_long("\var{l}")" returns the value encoding the "long int" \var{l}. +\item "Long_val("\var{v}")" returns the "long int" encoded in value \var{v}. +\item "Val_int("\var{i}")" returns the value encoding the "int" \var{i}. +\item "Int_val("\var{v}")" returns the "int" encoded in value \var{v}. +\item "Val_bool("\var{x}")" returns the OCaml boolean representing the +truth value of the C integer \var{x}. +\item "Bool_val("\var{v}")" returns 0 if \var{v} is the OCaml boolean +"false", 1 if \var{v} is "true". +\item "Val_true", "Val_false" represent the OCaml booleans "true" and "false". +\end{itemize} + +\subsection{Accessing blocks} + +\begin{itemize} +\item "Wosize_val("\var{v}")" returns the size of the block \var{v}, in words, +excluding the header. +\item "Tag_val("\var{v}")" returns the tag of the block \var{v}. +\item "Field("\var{v}", "\var{n}")" returns the value contained in the +$n\th$ field of the structured block \var{v}. Fields are numbered from 0 to +$\hbox{"Wosize_val"}(v)-1$. +\item "Store_field("\var{b}", "\var{n}", "\var{v}")" stores the value +\var{v} in the field number \var{n} of value \var{b}, which must be a +structured block. +\item "Code_val("\var{v}")" returns the code part of the closure \var{v}. +\item "caml_string_length("\var{v}")" returns the length (number of bytes) +of the string or byte sequence \var{v}. +\item "Byte("\var{v}", "\var{n}")" returns the $n\th$ byte of the string +or byte sequence \var{v}, with type "char". Bytes are numbered from 0 to +$\hbox{"string_length"}(v)-1$. +\item "Byte_u("\var{v}", "\var{n}")" returns the $n\th$ byte of the string +or byte sequence \var{v}, with type "unsigned char". Bytes are +numbered from 0 to $\hbox{"string_length"}(v)-1$. +\item "String_val("\var{v}")" returns a pointer to the first byte of the string +\var{v}, with type "char *" or, when OCaml is configured with +"-force-safe-string", with type "const char *". +This pointer is a valid C string: there is a null byte after the last +byte in the string. However, OCaml strings can contain embedded null bytes, +which will confuse the usual C functions over strings. +\item "Bytes_val("\var{v}")" returns a pointer to the first byte of the +byte sequence \var{v}, with type "unsigned char *". +\item "Double_val("\var{v}")" returns the floating-point number contained in +value \var{v}, with type "double". +\item "Double_field("\var{v}", "\var{n}")" returns +the $n\th$ element of the array of floating-point numbers \var{v} (a +block tagged "Double_array_tag"). +\item "Store_double_field("\var{v}", "\var{n}", +"\var{d}")" stores the double precision floating-point number \var{d} +in the $n\th$ element of the array of floating-point numbers \var{v}. +\item "Data_custom_val("\var{v}")" returns a pointer to the data part +of the custom block \var{v}. This pointer has type "void *" and must +be cast to the type of the data contained in the custom block. +\item "Int32_val("\var{v}")" returns the 32-bit integer contained +in the "int32" \var{v}. +\item "Int64_val("\var{v}")" returns the 64-bit integer contained +in the "int64" \var{v}. +\item "Nativeint_val("\var{v}")" returns the long integer contained +in the "nativeint" \var{v}. +\item "caml_field_unboxed("\var{v}")" returns the value of the field +of a value \var{v} of any unboxed type (record or concrete data type). +\item "caml_field_boxed("\var{v}")" returns the value of the field +of a value \var{v} of any boxed type (record or concrete data type). +\item "caml_field_unboxable("\var{v}")" calls either +"caml_field_unboxed" or "caml_field_boxed" according to the default +representation of unboxable types in the current version of OCaml. +\end{itemize} +The expressions "Field("\var{v}", "\var{n}")", +"Byte("\var{v}", "\var{n}")" and +"Byte_u("\var{v}", "\var{n}")" +are valid l-values. Hence, they can be assigned to, resulting in an +in-place modification of value \var{v}. +Assigning directly to "Field("\var{v}", "\var{n}")" must +be done with care to avoid confusing the garbage collector (see +below). + +\subsection{Allocating blocks} + +\subsubsection{Simple interface} + +\begin{itemize} +\item +"Atom("\var{t}")" returns an ``atom'' (zero-sized block) with tag \var{t}. +Zero-sized blocks are preallocated outside of the heap. It is +incorrect to try and allocate a zero-sized block using the functions below. +For instance, "Atom(0)" represents the empty array. +\item +"caml_alloc("\var{n}", "\var{t}")" returns a fresh block of size \var{n} +with tag \var{t}. If \var{t} is less than "No_scan_tag", then the +fields of the block are initialized with a valid value in order to +satisfy the GC constraints. +\item +"caml_alloc_tuple("\var{n}")" returns a fresh block of size +\var{n} words, with tag 0. +\item +"caml_alloc_string("\var{n}")" returns a byte sequence (or string) value of +length \var{n} bytes. The sequence initially contains uninitialized bytes. +\item +"caml_alloc_initialized_string("\var{n}", "\var{p}")" returns a byte sequence +(or string) value of length \var{n} bytes. The value is initialized from the +\var{n} bytes starting at address \var{p}. +\item +"caml_copy_string("\var{s}")" returns a string or byte sequence value +containing a copy of the null-terminated C string \var{s} (a "char *"). +\item +"caml_copy_double("\var{d}")" returns a floating-point value initialized +with the "double" \var{d}. +\item +"caml_copy_int32("\var{i}")", "caml_copy_int64("\var{i}")" and +"caml_copy_nativeint("\var{i}")" return a value of OCaml type "int32", +"int64" and "nativeint", respectively, initialized with the integer +\var{i}. +\item +"caml_alloc_array("\var{f}", "\var{a}")" allocates an array of values, calling +function \var{f} over each element of the input array \var{a} to transform it +into a value. The array \var{a} is an array of pointers terminated by the +null pointer. The function \var{f} receives each pointer as argument, and +returns a value. The zero-tagged block returned by +"alloc_array("\var{f}", "\var{a}")" is filled with the values returned by the +successive calls to \var{f}. (This function must not be used to build +an array of floating-point numbers.) +\item +"caml_copy_string_array("\var{p}")" allocates an array of strings or byte +sequences, copied from the pointer to a string array \var{p} +(a "char **"). \var{p} must be NULL-terminated. +\item "caml_alloc_float_array("\var{n}")" allocates an array of floating point + numbers of size \var{n}. The array initially contains uninitialized values. +\item "caml_alloc_unboxed("\var{v}")" returns the value (of any unboxed +type) whose field is the value \var{v}. +\item "caml_alloc_boxed("\var{v}")" allocates and returns a value (of +any boxed type) whose field is the value \var{v}. +\item "caml_alloc_unboxable("\var{v}")" calls either +"caml_alloc_unboxed" or "caml_alloc_boxed" according to the default +representation of unboxable types in the current version of OCaml. +\end{itemize} + +\subsubsection{Low-level interface} + +The following functions are slightly more efficient than "caml_alloc", but +also much more difficult to use. + +From the standpoint of the allocation functions, blocks are divided +according to their size as zero-sized blocks, small blocks (with size +less than or equal to \verb"Max_young_wosize"), and large blocks (with +size greater than \verb"Max_young_wosize"). The constant +\verb"Max_young_wosize" is declared in the include file "mlvalues.h". It +is guaranteed to be at least 64 (words), so that any block with +constant size less than or equal to 64 can be assumed to be small. For +blocks whose size is computed at run-time, the size must be compared +against \verb"Max_young_wosize" to determine the correct allocation procedure. + +\begin{itemize} +\item +"caml_alloc_small("\var{n}", "\var{t}")" returns a fresh small block of size +$n \leq \hbox{"Max_young_wosize"}$ words, with tag \var{t}. +If this block is a structured block (i.e. if $t < \hbox{"No_scan_tag"}$), then +the fields of the block (initially containing garbage) must be initialized +with legal values (using direct assignment to the fields of the block) +before the next allocation. +\item +"caml_alloc_shr("\var{n}", "\var{t}")" returns a fresh block of size +\var{n}, with tag \var{t}. +The size of the block can be greater than \verb"Max_young_wosize". (It +can also be smaller, but in this case it is more efficient to call +"caml_alloc_small" instead of "caml_alloc_shr".) +If this block is a structured block (i.e. if $t < \hbox{"No_scan_tag"}$), then +the fields of the block (initially containing garbage) must be initialized +with legal values (using the "caml_initialize" function described below) +before the next allocation. +\end{itemize} + +\subsection{Raising exceptions} \label{s:c-exceptions} + +Two functions are provided to raise two standard exceptions: +\begin{itemize} +\item "caml_failwith("\var{s}")", where \var{s} is a null-terminated C string (with +type \verb"char *"), raises exception "Failure" with argument \var{s}. +\item "caml_invalid_argument("\var{s}")", where \var{s} is a null-terminated C +string (with type \verb"char *"), raises exception "Invalid_argument" +with argument \var{s}. +\end{itemize} + +Raising arbitrary exceptions from C is more delicate: the +exception identifier is dynamically allocated by the OCaml program, and +therefore must be communicated to the C function using the +registration facility described below in section~\ref{s:register-exn}. +Once the exception identifier is recovered in C, the following +functions actually raise the exception: +\begin{itemize} +\item "caml_raise_constant("\var{id}")" raises the exception \var{id} with +no argument; +\item "caml_raise_with_arg("\var{id}", "\var{v}")" raises the exception +\var{id} with the OCaml value \var{v} as argument; +\item "caml_raise_with_args("\var{id}", "\var{n}", "\var{v}")" +raises the exception \var{id} with the OCaml values +\var{v}"[0]", \ldots, \var{v}"["\var{n}"-1]" as arguments; +\item "caml_raise_with_string("\var{id}", "\var{s}")", where \var{s} is a +null-terminated C string, raises the exception \var{id} with a copy of +the C string \var{s} as argument. +\end{itemize} + +\section{Living in harmony with the garbage collector} +\pdfsection{Living in harmony with the garbage collector} + +Unused blocks in the heap are automatically reclaimed by the garbage +collector. This requires some cooperation from C code that +manipulates heap-allocated blocks. + +\subsection{Simple interface} + +All the macros described in this section are declared in the +"memory.h" header file. + +\begin{gcrule} +A function that has parameters or local variables of type "value" must +begin with a call to one of the "CAMLparam" macros and return with +"CAMLreturn", "CAMLreturn0", or "CAMLreturnT". In particular, "CAMLlocal" +and "CAMLxparam" can only be called \emph{after} "CAMLparam". +\end{gcrule} + +There are six "CAMLparam" macros: "CAMLparam0" to "CAMLparam5", which +take zero to five arguments respectively. If your function has no more +than 5 parameters of type "value", use the corresponding macros +with these parameters as arguments. If your function has more than 5 +parameters of type "value", use "CAMLparam5" with five of these +parameters, and use one or more calls to the "CAMLxparam" macros for +the remaining parameters ("CAMLxparam1" to "CAMLxparam5"). + +The macros "CAMLreturn", "CAMLreturn0", and "CAMLreturnT" are used to +replace the C +keyword "return". Every occurrence of "return x" must be replaced by +"CAMLreturn (x)" if "x" has type "value", or "CAMLreturnT (t, x)" +(where "t" is the type of "x"); every occurrence of "return" without +argument must be +replaced by "CAMLreturn0". If your C function is a procedure (i.e. if +it returns void), you must insert "CAMLreturn0" at the end (to replace +C's implicit "return"). + +\paragraph{Note:} some C compilers give bogus warnings about unused +variables "caml__dummy_xxx" at each use of "CAMLparam" and +"CAMLlocal". You should ignore them. + +\goodbreak + +Example: +\begin{verbatim} +void foo (value v1, value v2, value v3) +{ + CAMLparam3 (v1, v2, v3); + ... + CAMLreturn0; +} +\end{verbatim} + +\paragraph{Note:} if your function is a primitive with more than 5 arguments +for use with the byte-code runtime, its arguments are not "value"s and +must not be declared (they have types "value *" and "int"). + +\begin{gcrule} +Local variables of type "value" must be declared with one of the +"CAMLlocal" macros. Arrays of "value"s are declared with +"CAMLlocalN". These macros must be used at the beginning of the +function, not in a nested block. +\end{gcrule} + +The macros "CAMLlocal1" to "CAMLlocal5" declare and initialize one to +five local variables of type "value". The variable names are given as +arguments to the macros. "CAMLlocalN("\var{x}", "\var{n}")" declares +and initializes a local variable of type "value ["\var{n}"]". You can +use several calls to these macros if you have more than 5 local +variables. + +Example: +\begin{verbatim} +value bar (value v1, value v2, value v3) +{ + CAMLparam3 (v1, v2, v3); + CAMLlocal1 (result); + result = caml_alloc (3, 0); + ... + CAMLreturn (result); +} +\end{verbatim} + +\begin{gcrule} +Assignments to the fields of structured blocks must be done with the +"Store_field" macro (for normal blocks) or "Store_double_field" macro +(for arrays and records of floating-point numbers). Other assignments +must not use "Store_field" nor "Store_double_field". +\end{gcrule} + +"Store_field ("\var{b}", "\var{n}", "\var{v}")" stores the value +\var{v} in the field number \var{n} of value \var{b}, which must be a +block (i.e. "Is_block("\var{b}")" must be true). + +Example: +\begin{verbatim} +value bar (value v1, value v2, value v3) +{ + CAMLparam3 (v1, v2, v3); + CAMLlocal1 (result); + result = caml_alloc (3, 0); + Store_field (result, 0, v1); + Store_field (result, 1, v2); + Store_field (result, 2, v3); + CAMLreturn (result); +} +\end{verbatim} + +\paragraph{Warning:} The first argument of "Store_field" and +"Store_double_field" must be a variable declared by "CAMLparam*" or +a parameter declared by "CAMLlocal*" to ensure that a garbage +collection triggered by the evaluation of the other arguments will not +invalidate the first argument after it is computed. + +\paragraph{Use with CAMLlocalN:} Arrays of values declared using +"CAMLlocalN" must not be written to using "Store_field". +Use the normal C array syntax instead. + +\begin{gcrule} Global variables containing values must be registered +with the garbage collector using the "caml_register_global_root" function. +\end{gcrule} + +Registration of a global variable "v" is achieved by calling +"caml_register_global_root(&v)" just before or just after a valid +value is stored in "v" for the first time. You must not call any +of the OCaml runtime functions or macros between registering and +storing the value. + +A registered global variable "v" can be un-registered by calling +"caml_remove_global_root(&v)". + +If the contents of the global variable "v" are seldom modified after +registration, better performance can be achieved by calling +"caml_register_generational_global_root(&v)" to register "v" (after +its initialization with a valid "value", but before any allocation or +call to the GC functions), +and "caml_remove_generational_global_root(&v)" to un-register it. In +this case, you must not modify the value of "v" directly, but you must +use "caml_modify_generational_global_root(&v,x)" to set it to "x". +The garbage collector takes advantage of the guarantee that "v" is not +modified between calls to "caml_modify_generational_global_root" to scan it +less often. This improves performance if the +modifications of "v" happen less often than minor collections. + +\paragraph{Note:} The "CAML" macros use identifiers (local variables, type +identifiers, structure tags) that start with "caml__". Do not use any +identifier starting with "caml__" in your programs. + +\subsection{Low-level interface} + +% Il faudrait simplifier violemment ce qui suit. +% En gros, dire quand on n'a pas besoin de declarer les variables +% et dans quels cas on peut se passer de "Store_field". + +We now give the GC rules corresponding to the low-level allocation +functions "caml_alloc_small" and "caml_alloc_shr". You can ignore those rules +if you stick to the simplified allocation function "caml_alloc". + +\begin{gcrule} After a structured block (a block with tag less than +"No_scan_tag") is allocated with the low-level functions, all fields +of this block must be filled with well-formed values before the next +allocation operation. If the block has been allocated with +"caml_alloc_small", filling is performed by direct assignment to the fields +of the block: +\begin{alltt} + Field(\var{v}, \var{n}) = \nth{v}{n}; +\end{alltt} +If the block has been allocated with "caml_alloc_shr", filling is performed +through the "caml_initialize" function: +\begin{alltt} + caml_initialize(&Field(\var{v}, \var{n}), \nth{v}{n}); +\end{alltt} +\end{gcrule} + +The next allocation can trigger a garbage collection. The garbage +collector assumes that all structured blocks contain well-formed +values. Newly created blocks contain random data, which generally do +not represent well-formed values. + +If you really need to allocate before the fields can receive their +final value, first initialize with a constant value (e.g. +"Val_unit"), then allocate, then modify the fields with the correct +value (see rule~6). + +%% \begin{gcrule} Local variables and function parameters containing +%% values must be registered with the garbage collector (using the +%% "Begin_roots" and "End_roots" macros), if they are to survive a call +%% to an allocation function. +%% \end{gcrule} +%% +%% Registration is performed with the "Begin_roots" set of macros. +%% "Begin_roots1("\var{v}")" registers variable \var{v} with the garbage +%% collector. Generally, \var{v} will be a local variable or a +%% parameter of your function. It must be initialized to a valid value +%% (e.g. "Val_unit") before the first allocation. Likewise, +%% "Begin_roots2", \ldots, "Begin_roots5" +%% let you register up to 5 variables at the same time. "Begin_root" is +%% the same as "Begin_roots1". "Begin_roots_block("\var{ptr}","\var{size}")" +%% allows you to register an array of roots. \var{ptr} is a pointer to +%% the first element, and \var{size} is the number of elements in the +%% array. +%% +%% Once registered, each of your variables (or array element) has the +%% following properties: if it points to a heap-allocated block, this +%% block (and its contents) will not be reclaimed; moreover, if this +%% block is relocated by the garbage collector, the variable is updated +%% to point to the new location for the block. +%% +%% Each of the "Begin_roots" macros open a C block that must be closed +%% with a matching "End_roots" at the same nesting level. The block must +%% be exited normally (i.e. not with "return" or "goto"). However, the +%% roots are automatically un-registered if an OCaml exception is raised, +%% so you can exit the block with "failwith", "invalid_argument", or one +%% of the "raise" functions. +%% +%% {\bf Note:} The "Begin_roots" macros use a local variable and a +%% structure tag named "caml__roots_block". Do not use this identifier +%% in your programs. + +\begin{gcrule} Direct assignment to a field of a block, as in +\begin{alltt} + Field(\var{v}, \var{n}) = \var{w}; +\end{alltt} +is safe only if \var{v} is a block newly allocated by "caml_alloc_small"; +that is, if no allocation took place between the +allocation of \var{v} and the assignment to the field. In all other cases, +never assign directly. If the block has just been allocated by "caml_alloc_shr", +use "caml_initialize" to assign a value to a field for the first time: +\begin{alltt} + caml_initialize(&Field(\var{v}, \var{n}), \var{w}); +\end{alltt} +Otherwise, you are updating a field that previously contained a +well-formed value; then, call the "caml_modify" function: +\begin{alltt} + caml_modify(&Field(\var{v}, \var{n}), \var{w}); +\end{alltt} +\end{gcrule} + +To illustrate the rules above, here is a C function that builds and +returns a list containing the two integers given as parameters. +First, we write it using the simplified allocation functions: +\begin{verbatim} +value alloc_list_int(int i1, int i2) +{ + CAMLparam0 (); + CAMLlocal2 (result, r); + + r = caml_alloc(2, 0); /* Allocate a cons cell */ + Store_field(r, 0, Val_int(i2)); /* car = the integer i2 */ + Store_field(r, 1, Val_int(0)); /* cdr = the empty list [] */ + result = caml_alloc(2, 0); /* Allocate the other cons cell */ + Store_field(result, 0, Val_int(i1)); /* car = the integer i1 */ + Store_field(result, 1, r); /* cdr = the first cons cell */ + CAMLreturn (result); +} +\end{verbatim} +Here, the registering of "result" is not strictly needed, because no +allocation takes place after it gets its value, but it's easier and +safer to simply register all the local variables that have type "value". + +Here is the same function written using the low-level allocation +functions. We notice that the cons cells are small blocks and can be +allocated with "caml_alloc_small", and filled by direct assignments on +their fields. +\begin{verbatim} +value alloc_list_int(int i1, int i2) +{ + CAMLparam0 (); + CAMLlocal2 (result, r); + + r = caml_alloc_small(2, 0); /* Allocate a cons cell */ + Field(r, 0) = Val_int(i2); /* car = the integer i2 */ + Field(r, 1) = Val_int(0); /* cdr = the empty list [] */ + result = caml_alloc_small(2, 0); /* Allocate the other cons cell */ + Field(result, 0) = Val_int(i1); /* car = the integer i1 */ + Field(result, 1) = r; /* cdr = the first cons cell */ + CAMLreturn (result); +} +\end{verbatim} +In the two examples above, the list is built bottom-up. Here is an +alternate way, that proceeds top-down. It is less efficient, but +illustrates the use of "caml_modify". +\begin{verbatim} +value alloc_list_int(int i1, int i2) +{ + CAMLparam0 (); + CAMLlocal2 (tail, r); + + r = caml_alloc_small(2, 0); /* Allocate a cons cell */ + Field(r, 0) = Val_int(i1); /* car = the integer i1 */ + Field(r, 1) = Val_int(0); /* A dummy value + tail = caml_alloc_small(2, 0); /* Allocate the other cons cell */ + Field(tail, 0) = Val_int(i2); /* car = the integer i2 */ + Field(tail, 1) = Val_int(0); /* cdr = the empty list [] */ + caml_modify(&Field(r, 1), tail); /* cdr of the result = tail */ + CAMLreturn (r); +} +\end{verbatim} +It would be incorrect to perform +"Field(r, 1) = tail" directly, because the allocation of "tail" +has taken place since "r" was allocated. + + +\section{A complete example} +\pdfsection{A complete example} + +This section outlines how the functions from the Unix "curses" library +can be made available to OCaml programs. First of all, here is +the interface "curses.ml" that declares the "curses" primitives and +data types: +\begin{verbatim} +(* File curses.ml -- declaration of primitives and data types *) +type window (* The type "window" remains abstract *) +external initscr: unit -> window = "caml_curses_initscr" +external endwin: unit -> unit = "caml_curses_endwin" +external refresh: unit -> unit = "caml_curses_refresh" +external wrefresh : window -> unit = "caml_curses_wrefresh" +external newwin: int -> int -> int -> int -> window = "caml_curses_newwin" +external addch: char -> unit = "caml_curses_addch" +external mvwaddch: window -> int -> int -> char -> unit = "caml_curses_mvwaddch" +external addstr: string -> unit = "caml_curses_addstr" +external mvwaddstr: window -> int -> int -> string -> unit + = "caml_curses_mvwaddstr" +(* lots more omitted *) +\end{verbatim} +To compile this interface: +\begin{verbatim} + ocamlc -c curses.ml +\end{verbatim} + +To implement these functions, we just have to provide the stub code; +the core functions are already implemented in the "curses" library. +The stub code file, "curses_stubs.c", looks like this: +\begin{verbatim} +/* File curses_stubs.c -- stub code for curses */ +#include +#include +#include +#include +#include + +/* Encapsulation of opaque window handles (of type WINDOW *) + as OCaml custom blocks. */ + +static struct custom_operations curses_window_ops = { + "fr.inria.caml.curses_windows", + custom_finalize_default, + custom_compare_default, + custom_hash_default, + custom_serialize_default, + custom_deserialize_default, + custom_compare_ext_default +}; + +/* Accessing the WINDOW * part of an OCaml custom block */ +#define Window_val(v) (*((WINDOW **) Data_custom_val(v))) + +/* Allocating an OCaml custom block to hold the given WINDOW * */ +static value alloc_window(WINDOW * w) +{ + value v = alloc_custom(&curses_window_ops, sizeof(WINDOW *), 0, 1); + Window_val(v) = w; + return v; +} + +value caml_curses_initscr(value unit) +{ + CAMLparam1 (unit); + CAMLreturn (alloc_window(initscr())); +} + +value caml_curses_endwin(value unit) +{ + CAMLparam1 (unit); + endwin(); + CAMLreturn (Val_unit); +} + +value caml_curses_refresh(value unit) +{ + CAMLparam1 (unit); + refresh(); + CAMLreturn (Val_unit); +} + +value caml_curses_wrefresh(value win) +{ + CAMLparam1 (win); + wrefresh(Window_val(win)); + CAMLreturn (Val_unit); +} + +value caml_curses_newwin(value nlines, value ncols, value x0, value y0) +{ + CAMLparam4 (nlines, ncols, x0, y0); + CAMLreturn (alloc_window(newwin(Int_val(nlines), Int_val(ncols), + Int_val(x0), Int_val(y0)))); +} + +value caml_curses_addch(value c) +{ + CAMLparam1 (c); + addch(Int_val(c)); /* Characters are encoded like integers */ + CAMLreturn (Val_unit); +} + +value caml_curses_mvwaddch(value win, value x, value y, value c) +{ + CAMLparam4 (win, x, y, c); + mvwaddch(Window_val(win), Int_val(x), Int_val(y), Int_val(c)); + CAMLreturn (Val_unit); +} + +value caml_curses_addstr(value s) +{ + CAMLparam1 (s); + addstr(String_val(s)); + CAMLreturn (Val_unit); +} + +value caml_curses_mvwaddstr(value win, value x, value y, value s) +{ + CAMLparam4 (win, x, y, s); + mvwaddstr(Window_val(win), Int_val(x), Int_val(y), String_val(s)); + CAMLreturn (Val_unit); +} + +/* This goes on for pages. */ +\end{verbatim} + +The file "curses_stubs.c" can be compiled with: +\begin{verbatim} + cc -c -I`ocamlc -where` curses_stubs.c +\end{verbatim} +or, even simpler, +\begin{verbatim} + ocamlc -c curses_stubs.c +\end{verbatim} +(When passed a ".c" file, the "ocamlc" command simply calls the C +compiler on that file, with the right "-I" option.) + +Now, here is a sample OCaml program "prog.ml" that uses the "curses" +module: +\begin{verbatim} +(* File prog.ml -- main program using curses *) +open Curses;; +let main_window = initscr () in +let small_window = newwin 10 5 20 10 in + mvwaddstr main_window 10 2 "Hello"; + mvwaddstr small_window 4 3 "world"; + refresh(); + Unix.sleep 5; + endwin() +\end{verbatim} +To compile and link this program, run: +\begin{verbatim} + ocamlc -custom -o prog unix.cma curses.cmo prog.ml curses_stubs.o -cclib -lcurses +\end{verbatim} +(On some machines, you may need to put +"-cclib -lcurses -cclib -ltermcap" or "-cclib -ltermcap" +instead of "-cclib -lcurses".) + +%% Note by Damien: when I launch the program, it only displays "Hello" +%% and not "world". Why? + +\section{Advanced topic: callbacks from C to OCaml} \label{s:callback} +\pdfsection{Advanced topic: callbacks from C to OCaml} + +So far, we have described how to call C functions from OCaml. In this +section, we show how C functions can call OCaml functions, either as +callbacks (OCaml calls C which calls OCaml), or with the main program +written in C. + +\subsection{Applying OCaml closures from C} \label{s:callbacks} + +C functions can apply OCaml function values (closures) to OCaml values. +The following functions are provided to perform the applications: +\begin{itemize} +\item "caml_callback("\var{f, a}")" applies the functional value \var{f} to +the value \var{a} and returns the value returned by~\var{f}. +\item "caml_callback2("\var{f, a, b}")" applies the functional value \var{f} +(which is assumed to be a curried OCaml function with two arguments) to +\var{a} and \var{b}. +\item "caml_callback3("\var{f, a, b, c}")" applies the functional value \var{f} +(a curried OCaml function with three arguments) to \var{a}, \var{b} and \var{c}. +\item "caml_callbackN("\var{f, n, args}")" applies the functional value \var{f} +to the \var{n} arguments contained in the array of values \var{args}. +\end{itemize} +If the function \var{f} does not return, but raises an exception that +escapes the scope of the application, then this exception is +propagated to the next enclosing OCaml code, skipping over the C +code. That is, if an OCaml function \var{f} calls a C function \var{g} that +calls back an OCaml function \var{h} that raises a stray exception, then the +execution of \var{g} is interrupted and the exception is propagated back +into \var{f}. + +If the C code wishes to catch exceptions escaping the OCaml function, +it can use the functions "caml_callback_exn", "caml_callback2_exn", +"caml_callback3_exn", "caml_callbackN_exn". These functions take the same +arguments as their non-"_exn" counterparts, but catch escaping +exceptions and return them to the C code. The return value \var{v} of the +"caml_callback*_exn" functions must be tested with the macro +"Is_exception_result("\var{v}")". If the macro returns ``false'', no +exception occured, and \var{v} is the value returned by the OCaml +function. If "Is_exception_result("\var{v}")" returns ``true'', +an exception escaped, and its value (the exception descriptor) can be +recovered using "Extract_exception("\var{v}")". + +\paragraph{Warning:} If the OCaml function returned with an exception, +"Extract_exception" should be applied to the exception result prior +to calling a function that may trigger garbage collection. +Otherwise, if \var{v} is reachable during garbage collection, the runtime +can crash since \var{v} does not contain a valid value. + +Example: +\begin{verbatim} + value call_caml_f_ex(value closure, value arg) + { + CAMLparam2(closure, arg); + CAMLlocal2(res, tmp); + res = caml_callback_exn(closure, arg); + if(Is_exception_result(res)) { + res = Extract_exception(res); + tmp = caml_alloc(3, 0); /* Safe to allocate: res contains valid value. */ + ... + } + CAMLreturn (res); + } +\end{verbatim} + +\subsection{Obtaining or registering OCaml closures for use in C functions} + +There are two ways to obtain OCaml function values (closures) to +be passed to the "callback" functions described above. One way is to +pass the OCaml function as an argument to a primitive function. For +example, if the OCaml code contains the declaration +\begin{verbatim} + external apply : ('a -> 'b) -> 'a -> 'b = "caml_apply" +\end{verbatim} +the corresponding C stub can be written as follows: +\begin{verbatim} + CAMLprim value caml_apply(value vf, value vx) + { + CAMLparam2(vf, vx); + CAMLlocal1(vy); + vy = caml_callback(vf, vx); + CAMLreturn(vy); + } +\end{verbatim} + +Another possibility is to use the registration mechanism provided by +OCaml. This registration mechanism enables OCaml code to register +OCaml functions under some global name, and C code to retrieve the +corresponding closure by this global name. + +On the OCaml side, registration is performed by evaluating +"Callback.register" \var{n} \var{v}. Here, \var{n} is the global name +(an arbitrary string) and \var{v} the OCaml value. For instance: +\begin{verbatim} + let f x = print_string "f is applied to "; print_int x; print_newline() + let _ = Callback.register "test function" f +\end{verbatim} + +On the C side, a pointer to the value registered under name \var{n} is +obtained by calling "caml_named_value("\var{n}")". The returned +pointer must then be dereferenced to recover the actual OCaml value. +If no value is registered under the name \var{n}, the null pointer is +returned. For example, here is a C wrapper that calls the OCaml function "f" +above: +\begin{verbatim} + void call_caml_f(int arg) + { + caml_callback(*caml_named_value("test function"), Val_int(arg)); + } +\end{verbatim} + +The pointer returned by "caml_named_value" is constant and can safely +be cached in a C variable to avoid repeated name lookups. On the other +hand, the value pointed to can change during garbage collection and +must always be recomputed at the point of use. Here is a more +efficient variant of "call_caml_f" above that calls "caml_named_value" +only once: +\begin{verbatim} + void call_caml_f(int arg) + { + static value * closure_f = NULL; + if (closure_f == NULL) { + /* First time around, look up by name */ + closure_f = caml_named_value("test function"); + } + caml_callback(*closure_f, Val_int(arg)); + } +\end{verbatim} + +\subsection{Registering OCaml exceptions for use in C functions} \label{s:register-exn} + +The registration mechanism described above can also be used to +communicate exception identifiers from OCaml to C. The OCaml code +registers the exception by evaluating +"Callback.register_exception" \var{n} \var{exn}, where \var{n} is an +arbitrary name and \var{exn} is an exception value of the +exception to register. For example: +\begin{verbatim} + exception Error of string + let _ = Callback.register_exception "test exception" (Error "any string") +\end{verbatim} +The C code can then recover the exception identifier using +"caml_named_value" and pass it as first argument to the functions +"raise_constant", "raise_with_arg", and "raise_with_string" (described +in section~\ref{s:c-exceptions}) to actually raise the exception. For +example, here is a C function that raises the "Error" exception with +the given argument: +\begin{verbatim} + void raise_error(char * msg) + { + caml_raise_with_string(*caml_named_value("test exception"), msg); + } +\end{verbatim} + +\subsection{Main program in C} \label{s:main-c} + +In normal operation, a mixed OCaml/C program starts by executing the +OCaml initialization code, which then may proceed to call C +functions. We say that the main program is the OCaml code. In some +applications, it is desirable that the C code plays the role of the +main program, calling OCaml functions when needed. This can be achieved as +follows: +\begin{itemize} +\item The C part of the program must provide a "main" function, +which will override the default "main" function provided by the OCaml +runtime system. Execution will start in the user-defined "main" function +just like for a regular C program. + +\item At some point, the C code must call "caml_main(argv)" to +initialize the OCaml code. The "argv" argument is a C array of strings +(type "char **"), terminated with a "NULL" pointer, +which represents the command-line arguments, as +passed as second argument to "main". The OCaml array "Sys.argv" will +be initialized from this parameter. For the bytecode compiler, +"argv[0]" and "argv[1]" are also consulted to find the file containing +the bytecode. + +\item The call to "caml_main" initializes the OCaml runtime system, +loads the bytecode (in the case of the bytecode compiler), and +executes the initialization code of the OCaml program. Typically, this +initialization code registers callback functions using "Callback.register". +Once the OCaml initialization code is complete, control returns to the +C code that called "caml_main". + +\item The C code can then invoke OCaml functions using the callback +mechanism (see section~\ref{s:callbacks}). +\end{itemize} + +\subsection{Embedding the OCaml code in the C code} \label{s:embedded-code} + +The bytecode compiler in custom runtime mode ("ocamlc -custom") +normally appends the bytecode to the executable file containing the +custom runtime. This has two consequences. First, the final linking +step must be performed by "ocamlc". Second, the OCaml runtime library +must be able to find the name of the executable file from the +command-line arguments. When using "caml_main(argv)" as in +section~\ref{s:main-c}, this means that "argv[0]" or "argv[1]" must +contain the executable file name. + +An alternative is to embed the bytecode in the C code. The +"-output-obj" option to "ocamlc" is provided for this purpose. It +causes the "ocamlc" compiler to output a C object file (".o" file, +".obj" under Windows) containing the bytecode for the OCaml part of the +program, as well as a "caml_startup" function. The C object file +produced by "ocamlc -output-obj" can then be linked with C code using +the standard C compiler, or stored in a C library. + +The "caml_startup" function must be called from the main C program in +order to initialize the OCaml runtime and execute the OCaml +initialization code. Just like "caml_main", it takes one "argv" +parameter containing the command-line parameters. Unlike "caml_main", +this "argv" parameter is used only to initialize "Sys.argv", but not +for finding the name of the executable file. + +The "caml_startup" function calls the uncaught exception handler (or +enters the debugger, if running under ocamldebug) if an exception escapes +from a top-level module initialiser. Such exceptions may be caught in the +C code by instead using the "caml_startup_exn" function and testing the result +using {\tt Is_exception_result} (followed by {\tt Extract_exception} if +appropriate). + +The "-output-obj" option can also be used to obtain the C source file. +More interestingly, the same option can also produce directly a shared +library (".so" file, ".dll" under Windows) that contains the OCaml +code, the OCaml runtime system and any other static C code given to +"ocamlc" (".o", ".a", respectively, ".obj", ".lib"). This use of +"-output-obj" is very similar to a normal linking step, but instead of +producing a main program that automatically runs the OCaml code, it +produces a shared library that can run the OCaml code on demand. The +three possible behaviors of "-output-obj" are selected according +to the extension of the resulting file (given with "-o"). + +The native-code compiler "ocamlopt" also supports the "-output-obj" +option, causing it to output a C object file or a shared library +containing the native code for all OCaml modules on the command-line, +as well as the OCaml startup code. Initialization is performed by +calling "caml_startup" (or "caml_startup_exn") as in the case of the +bytecode compiler. + +For the final linking phase, in addition to the object file produced +by "-output-obj", you will have to provide the OCaml runtime +library ("libcamlrun.a" for bytecode, "libasmrun.a" for native-code), +as well as all C libraries that are required by the OCaml libraries +used. For instance, assume the OCaml part of your program uses the +Unix library. With "ocamlc", you should do: +\begin{alltt} + ocamlc -output-obj -o camlcode.o unix.cma {\it{other}} .cmo {\it{and}} .cma {\it{files}} + cc -o myprog {\it{C objects and libraries}} \char92 + camlcode.o -L`ocamlc -where` -lunix -lcamlrun +\end{alltt} +With "ocamlopt", you should do: +\begin{alltt} + ocamlopt -output-obj -o camlcode.o unix.cmxa {\it{other}} .cmx {\it{and}} .cmxa {\it{files}} + cc -o myprog {\it{C objects and libraries}} \char92 + camlcode.o -L`ocamlc -where` -lunix -lasmrun +\end{alltt} + +% -- This seems completely wrong -- Damien +% The shared libraries produced by "ocamlc -output-obj" or by "ocamlopt +% -output-obj" already contains the OCaml runtime library as +% well as all the needed C libraries. + +\paragraph{Warning:} On some ports, special options are required on the final +linking phase that links together the object file produced by the +"-output-obj" option and the remainder of the program. Those options +are shown in the configuration file "config/Makefile" generated during +compilation of OCaml, as the variable "LDFLAGS". +\begin{itemize} +\item Windows with the MSVC compiler: the object file produced by +OCaml have been compiled with the "/MD" flag, and therefore +all other object files linked with it should also be compiled with +"/MD". +\item other systems: you may have to add one or more of "-lcurses", +"-lm", "-ldl", depending on your OS and C compiler. +\end{itemize} + +\paragraph{Stack backtraces.} When OCaml bytecode produced by +"ocamlc -g" is embedded in a C program, no debugging information is +included, and therefore it is impossible to print stack backtraces on +uncaught exceptions. This is not the case when native code produced +by "ocamlopt -g" is embedded in a C program: stack backtrace +information is available, but the backtrace mechanism needs to be +turned on programmatically. This can be achieved from the OCaml side +by calling "Printexc.record_backtrace true" in the initialization of +one of the OCaml modules. This can also be achieved from the C side +by calling "caml_record_backtrace(Val_int(1));" in the OCaml-C glue code. + +\paragraph{Unloading the runtime.} + +In case the shared library produced with "-output-obj" is to be loaded and +unloaded repeatedly by a single process, care must be taken to unload the +OCaml runtime explicitly, in order to avoid various system resource leaks. + +Since 4.05, "caml_shutdown" function can be used to shut the runtime down +gracefully, which equals the following: +\begin{itemize} +\item Running the functions that were registered with "Pervasives.at_exit". +\item Triggering finalization of allocated custom blocks (see +section~\ref{s:custom}). For example, "Pervasives.in_channel" and +"Pervasives.out_channel" are represented by custom blocks that enclose file +descriptors, which are to be released. +\item Unloading the dependent shared libraries that were loaded by the runtime, +including "dynlink" plugins. +\item Freeing the memory blocks that were allocated by the runtime with +"malloc". Inside C primitives, it is advised to use "caml_stat_*" functions +from "memory.h" for managing static (that is, non-moving) blocks of heap +memory, as all the blocks allocated with these functions are automatically +freed by "caml_shutdown". For ensuring compatibility with legacy C stubs that +have used "caml_stat_*" incorrectly, this behaviour is only enabled if the +runtime is started with a specialized "caml_startup_pooled" function. +\end{itemize} + +As a shared library may have several clients simultaneously, it is made for +convenience that "caml_startup" (and "caml_startup_pooled") may be called +multiple times, given that each such call is paired with a corresponding call +to "caml_shutdown" (in a nested fashion). The runtime will be unloaded once +there are no outstanding calls to "caml_startup". + +Once a runtime is unloaded, it cannot be started up again without reloading the +shared library and reinitializing its static data. Therefore, at the moment, the +facility is only useful for building reloadable shared libraries. + + +\section{Advanced example with callbacks} +\pdfsection{Advanced example with callbacks} + +This section illustrates the callback facilities described in +section~\ref{s:callback}. We are going to package some OCaml functions +in such a way that they can be linked with C code and called from C +just like any C functions. The OCaml functions are defined in the +following "mod.ml" OCaml source: + +\begin{verbatim} +(* File mod.ml -- some "useful" OCaml functions *) + +let rec fib n = if n < 2 then 1 else fib(n-1) + fib(n-2) + +let format_result n = Printf.sprintf "Result is: %d\n" n + +(* Export those two functions to C *) + +let _ = Callback.register "fib" fib +let _ = Callback.register "format_result" format_result +\end{verbatim} + +Here is the C stub code for calling these functions from C: + +\begin{verbatim} +/* File modwrap.c -- wrappers around the OCaml functions */ + +#include +#include +#include +#include + +int fib(int n) +{ + static value * fib_closure = NULL; + if (fib_closure == NULL) fib_closure = caml_named_value("fib"); + return Int_val(caml_callback(*fib_closure, Val_int(n))); +} + +char * format_result(int n) +{ + static value * format_result_closure = NULL; + if (format_result_closure == NULL) + format_result_closure = caml_named_value("format_result"); + return strdup(String_val(caml_callback(*format_result_closure, Val_int(n)))); + /* We copy the C string returned by String_val to the C heap + so that it remains valid after garbage collection. */ +} +\end{verbatim} + +We now compile the OCaml code to a C object file and put it in a C +library along with the stub code in "modwrap.c" and the OCaml runtime system: +\begin{verbatim} + ocamlc -custom -output-obj -o modcaml.o mod.ml + ocamlc -c modwrap.c + cp `ocamlc -where`/libcamlrun.a mod.a && chmod +w mod.a + ar r mod.a modcaml.o modwrap.o +\end{verbatim} +(One can also use "ocamlopt -output-obj" instead of "ocamlc -custom +-output-obj". In this case, replace "libcamlrun.a" (the bytecode +runtime library) by "libasmrun.a" (the native-code runtime library).) + +Now, we can use the two functions "fib" and "format_result" in any C +program, just like regular C functions. Just remember to call +"caml_startup" (or "caml_startup_exn") once before. + +\begin{verbatim} +/* File main.c -- a sample client for the OCaml functions */ + +#include +#include + +extern int fib(int n); +extern char * format_result(int n); + +int main(int argc, char ** argv) +{ + int result; + + /* Initialize OCaml code */ + caml_startup(argv); + /* Do some computation */ + result = fib(10); + printf("fib(10) = %s\n", format_result(result)); + return 0; +} +\end{verbatim} + +To build the whole program, just invoke the C compiler as follows: +\begin{verbatim} + cc -o prog -I `ocamlc -where` main.c mod.a -lcurses +\end{verbatim} +(On some machines, you may need to put "-ltermcap" or +"-lcurses -ltermcap" instead of "-lcurses".) + +\section{Advanced topic: custom blocks} \label{s:custom} +\pdfsection{Advanced topic: custom blocks} + +Blocks with tag "Custom_tag" contain both arbitrary user data and a +pointer to a C struct, with type "struct custom_operations", that +associates user-provided finalization, comparison, hashing, +serialization and deserialization functions to this block. + +\subsection{The "struct custom_operations"} + +The "struct custom_operations" is defined in "" and +contains the following fields: +\begin{itemize} +\item "char *identifier" \\ +A zero-terminated character string serving as an identifier for +serialization and deserialization operations. + +\item "void (*finalize)(value v)" \\ +The "finalize" field contains a pointer to a C function that is called +when the block becomes unreachable and is about to be reclaimed. +The block is passed as first argument to the function. +The "finalize" field can also be "custom_finalize_default" to indicate that no +finalization function is associated with the block. + +\item "int (*compare)(value v1, value v2)" \\ +The "compare" field contains a pointer to a C function that is +called whenever two custom blocks are compared using OCaml's generic +comparison operators ("=", "<>", "<=", ">=", "<", ">" and +"compare"). The C function should return 0 if the data contained in +the two blocks are structurally equal, a negative integer if the data +from the first block is less than the data from the second block, and +a positive integer if the data from the first block is greater than +the data from the second block. + +The "compare" field can be set to "custom_compare_default"; this +default comparison function simply raises "Failure". + +\item "int (*compare_ext)(value v1, value v2)" \\ +(Since 3.12.1) +The "compare_ext" field contains a pointer to a C function that is +called whenever one custom block and one unboxed integer are compared using OCaml's generic +comparison operators ("=", "<>", "<=", ">=", "<", ">" and +"compare"). As in the case of the "compare" field, the C function +should return 0 if the two arguments are structurally equal, a +negative integer if the first argument compares less than the second +argument, and a positive integer if the first argument compares +greater than the second argument. + +The "compare_ext" field can be set to "custom_compare_ext_default"; this +default comparison function simply raises "Failure". + +\item "intnat (*hash)(value v)" \\ +The "hash" field contains a pointer to a C function that is called +whenever OCaml's generic hash operator (see module "Hashtbl") is +applied to a custom block. The C function can return an arbitrary +integer representing the hash value of the data contained in the +given custom block. The hash value must be compatible with the +"compare" function, in the sense that two structurally equal data +(that is, two custom blocks for which "compare" returns 0) must have +the same hash value. + +The "hash" field can be set to "custom_hash_default", in which case +the custom block is ignored during hash computation. + +\item "void (*serialize)(value v, uintnat * wsize_32, uintnat * wsize_64)" \\ +The "serialize" field contains a pointer to a C function that is +called whenever the custom block needs to be serialized (marshaled) +using the OCaml functions "output_value" or "Marshal.to_...". +For a custom block, those functions first write the identifier of the +block (as given by the "identifier" field) to the output stream, +then call the user-provided "serialize" function. That function is +responsible for writing the data contained in the custom block, using +the "serialize_..." functions defined in "" and listed +below. The user-provided "serialize" function must then store in its +"wsize_32" and "wsize_64" parameters the sizes in bytes of the data +part of the custom block on a 32-bit architecture and on a 64-bit +architecture, respectively. + +The "serialize" field can be set to "custom_serialize_default", +in which case the "Failure" exception is raised when attempting to +serialize the custom block. + +\item "uintnat (*deserialize)(void * dst)" \\ +The "deserialize" field contains a pointer to a C function that is +called whenever a custom block with identifier "identifier" needs to +be deserialized (un-marshaled) using the OCaml functions "input_value" +or "Marshal.from_...". This user-provided function is responsible for +reading back the data written by the "serialize" operation, using the +"deserialize_..." functions defined in "" and listed +below. It must then rebuild the data part of the custom block +and store it at the pointer given as the "dst" argument. Finally, it +returns the size in bytes of the data part of the custom block. +This size must be identical to the "wsize_32" result of +the "serialize" operation if the architecture is 32 bits, or +"wsize_64" if the architecture is 64 bits. + +The "deserialize" field can be set to "custom_deserialize_default" +to indicate that deserialization is not supported. In this case, +do not register the "struct custom_operations" with the deserializer +using "register_custom_operations" (see below). +\end{itemize} + +Note: the "finalize", "compare", "hash", "serialize" and "deserialize" +functions attached to custom block descriptors must never trigger a +garbage collection. Within these functions, do not call any of the +OCaml allocation functions, and do not perform a callback into OCaml +code. Do not use "CAMLparam" to register the parameters to these +functions, and do not use "CAMLreturn" to return the result. + +\subsection{Allocating custom blocks} + +Custom blocks must be allocated via the "caml_alloc_custom" function: +\begin{center} +"caml_alloc_custom("\var{ops}", "\var{size}", "\var{used}", "\var{max}")" +\end{center} +returns a fresh custom block, with room for \var{size} bytes of user +data, and whose associated operations are given by \var{ops} (a +pointer to a "struct custom_operations", usually statically allocated +as a C global variable). + +The two parameters \var{used} and \var{max} are used to control the +speed of garbage collection when the finalized object contains +pointers to out-of-heap resources. Generally speaking, the +OCaml incremental major collector adjusts its speed relative to the +allocation rate of the program. The faster the program allocates, the +harder the GC works in order to reclaim quickly unreachable blocks +and avoid having large amount of ``floating garbage'' (unreferenced +objects that the GC has not yet collected). + +Normally, the allocation rate is measured by counting the in-heap size +of allocated blocks. However, it often happens that finalized +objects contain pointers to out-of-heap memory blocks and other resources +(such as file descriptors, X Windows bitmaps, etc.). For those +blocks, the in-heap size of blocks is not a good measure of the +quantity of resources allocated by the program. + +The two arguments \var{used} and \var{max} give the GC an idea of how +much out-of-heap resources are consumed by the finalized block +being allocated: you give the amount of resources allocated to this +object as parameter \var{used}, and the maximum amount that you want +to see in floating garbage as parameter \var{max}. The units are +arbitrary: the GC cares only about the ratio $\var{used} / \var{max}$. + +For instance, if you are allocating a finalized block holding an X +Windows bitmap of \var{w} by \var{h} pixels, and you'd rather not +have more than 1 mega-pixels of unreclaimed bitmaps, specify +$\var{used} = \var{w} * \var{h}$ and $\var{max} = 1000000$. + +Another way to describe the effect of the \var{used} and \var{max} +parameters is in terms of full GC cycles. If you allocate many custom +blocks with $\var{used} / \var{max} = 1 / \var{N}$, the GC will then do one +full cycle (examining every object in the heap and calling +finalization functions on those that are unreachable) every \var{N} +allocations. For instance, if $\var{used} = 1$ and $\var{max} = 1000$, +the GC will do one full cycle at least every 1000 allocations of +custom blocks. + +If your finalized blocks contain no pointers to out-of-heap resources, +or if the previous discussion made little sense to you, just take +$\var{used} = 0$ and $\var{max} = 1$. But if you later find that the +finalization functions are not called ``often enough'', consider +increasing the $\var{used} / \var{max}$ ratio. + +\subsection{Accessing custom blocks} + +The data part of a custom block \var{v} can be +accessed via the pointer "Data_custom_val("\var{v}")". This pointer +has type "void *" and should be cast to the actual type of the data +stored in the custom block. + +The contents of custom blocks are not scanned by the garbage +collector, and must therefore not contain any pointer inside the OCaml +heap. In other terms, never store an OCaml "value" in a custom block, +and do not use "Field", "Store_field" nor "caml_modify" to access the data +part of a custom block. Conversely, any C data structure (not +containing heap pointers) can be stored in a custom block. + +\subsection{Writing custom serialization and deserialization functions} + +The following functions, defined in "", are provided to +write and read back the contents of custom blocks in a portable way. +Those functions handle endianness conversions when e.g. data is +written on a little-endian machine and read back on a big-endian machine. + +\begin{tableau}{|l|p{10cm}|}{Function}{Action} +\entree{"caml_serialize_int_1"}{Write a 1-byte integer} +\entree{"caml_serialize_int_2"}{Write a 2-byte integer} +\entree{"caml_serialize_int_4"}{Write a 4-byte integer} +\entree{"caml_serialize_int_8"}{Write a 8-byte integer} +\entree{"caml_serialize_float_4"}{Write a 4-byte float} +\entree{"caml_serialize_float_8"}{Write a 8-byte float} +\entree{"caml_serialize_block_1"}{Write an array of 1-byte quantities} +\entree{"caml_serialize_block_2"}{Write an array of 2-byte quantities} +\entree{"caml_serialize_block_4"}{Write an array of 4-byte quantities} +\entree{"caml_serialize_block_8"}{Write an array of 8-byte quantities} +\entree{"caml_deserialize_uint_1"}{Read an unsigned 1-byte integer} +\entree{"caml_deserialize_sint_1"}{Read a signed 1-byte integer} +\entree{"caml_deserialize_uint_2"}{Read an unsigned 2-byte integer} +\entree{"caml_deserialize_sint_2"}{Read a signed 2-byte integer} +\entree{"caml_deserialize_uint_4"}{Read an unsigned 4-byte integer} +\entree{"caml_deserialize_sint_4"}{Read a signed 4-byte integer} +\entree{"caml_deserialize_uint_8"}{Read an unsigned 8-byte integer} +\entree{"caml_deserialize_sint_8"}{Read a signed 8-byte integer} +\entree{"caml_deserialize_float_4"}{Read a 4-byte float} +\entree{"caml_deserialize_float_8"}{Read an 8-byte float} +\entree{"caml_deserialize_block_1"}{Read an array of 1-byte quantities} +\entree{"caml_deserialize_block_2"}{Read an array of 2-byte quantities} +\entree{"caml_deserialize_block_4"}{Read an array of 4-byte quantities} +\entree{"caml_deserialize_block_8"}{Read an array of 8-byte quantities} +\entree{"caml_deserialize_error"}{Signal an error during deserialization; +"input_value" or "Marshal.from_..." raise a "Failure" exception after +cleaning up their internal data structures} +\end{tableau} + +Serialization functions are attached to the custom blocks to which +they apply. Obviously, deserialization functions cannot be attached +this way, since the custom block does not exist yet when +deserialization begins! Thus, the "struct custom_operations" that +contain deserialization functions must be registered with the +deserializer in advance, using the "register_custom_operations" +function declared in "". Deserialization proceeds by +reading the identifier off the input stream, allocating a custom block +of the size specified in the input stream, searching the registered +"struct custom_operation" blocks for one with the same identifier, and +calling its "deserialize" function to fill the data part of the custom block. + +\subsection{Choosing identifiers} + +Identifiers in "struct custom_operations" must be chosen carefully, +since they must identify uniquely the data structure for serialization +and deserialization operations. In particular, consider including a +version number in the identifier; this way, the format of the data can +be changed later, yet backward-compatible deserialisation functions +can be provided. + +Identifiers starting with "_" (an underscore character) are reserved +for the OCaml runtime system; do not use them for your custom +data. We recommend to use a URL +("http://mymachine.mydomain.com/mylibrary/version-number") +or a Java-style package name +("com.mydomain.mymachine.mylibrary.version-number") +as identifiers, to minimize the risk of identifier collision. + +\subsection{Finalized blocks} + +Custom blocks generalize the finalized blocks that were present in +OCaml prior to version 3.00. For backward compatibility, the +format of custom blocks is compatible with that of finalized blocks, +and the "alloc_final" function is still available to allocate a custom +block with a given finalization function, but default comparison, +hashing and serialization functions. "caml_alloc_final("\var{n}", +"\var{f}", "\var{used}", "\var{max}")" returns a fresh custom block of +size \var{n}+1 words, with finalization function \var{f}. The first +word is reserved for storing the custom operations; the other +\var{n} words are available for your data. The two parameters +\var{used} and \var{max} are used to control the speed of garbage +collection, as described for "caml_alloc_custom". + +\section{Advanced topic: Big arrays and the OCaml-C interface} +\label{s:C-Bigarrays} + +This section explains how C stub code that interfaces C or Fortran +code with OCaml code can use big arrays. + +\subsection{Include file} + +The include file "" must be included in the C stub +file. It declares the functions, constants and macros discussed +below. + +\subsection{Accessing an OCaml bigarray from C or Fortran} + +If \var{v} is a OCaml "value" representing a big array, the expression +"Caml_ba_data_val("\var{v}")" returns a pointer to the data part of the array. +This pointer is of type "void *" and can be cast to the appropriate C +type for the array (e.g. "double []", "char [][10]", etc). + +Various characteristics of the OCaml big array can be consulted from C +as follows: +\begin{tableau}{|l|l|}{C expression}{Returns} +\entree{"Caml_ba_array_val("\var{v}")->num_dims"}{number of dimensions} +\entree{"Caml_ba_array_val("\var{v}")->dim["\var{i}"]"}{\var{i}-th dimension} +\entree{"Caml_ba_array_val("\var{v}")->flags & BIGARRAY_KIND_MASK"}{kind of array elements} +\end{tableau} +The kind of array elements is one of the following constants: +\begin{tableau}{|l|l|}{Constant}{Element kind} +\entree{"CAML_BA_FLOAT32"}{32-bit single-precision floats} +\entree{"CAML_BA_FLOAT64"}{64-bit double-precision floats} +\entree{"CAML_BA_SINT8"}{8-bit signed integers} +\entree{"CAML_BA_UINT8"}{8-bit unsigned integers} +\entree{"CAML_BA_SINT16"}{16-bit signed integers} +\entree{"CAML_BA_UINT16"}{16-bit unsigned integers} +\entree{"CAML_BA_INT32"}{32-bit signed integers} +\entree{"CAML_BA_INT64"}{64-bit signed integers} +\entree{"CAML_BA_CAML_INT"}{31- or 63-bit signed integers} +\entree{"CAML_BA_NATIVE_INT"}{32- or 64-bit (platform-native) integers} +\end{tableau} +% +The following example shows the passing of a two-dimensional big array +to a C function and a Fortran function. +\begin{verbatim} + extern void my_c_function(double * data, int dimx, int dimy); + extern void my_fortran_function_(double * data, int * dimx, int * dimy); + + value caml_stub(value bigarray) + { + int dimx = Caml_ba_array_val(bigarray)->dim[0]; + int dimy = Caml_ba_array_val(bigarray)->dim[1]; + /* C passes scalar parameters by value */ + my_c_function(Caml_ba_data_val(bigarray), dimx, dimy); + /* Fortran passes all parameters by reference */ + my_fortran_function_(Caml_ba_data_val(bigarray), &dimx, &dimy); + return Val_unit; + } +\end{verbatim} + +\subsection{Wrapping a C or Fortran array as an OCaml big array} + +A pointer \var{p} to an already-allocated C or Fortran array can be +wrapped and returned to OCaml as a big array using the "caml_ba_alloc" +or "caml_ba_alloc_dims" functions. +\begin{itemize} +\item +"caml_ba_alloc("\var{kind} "|" \var{layout}, \var{numdims}, \var{p}, \var{dims}")" + +Return an OCaml big array wrapping the data pointed to by \var{p}. +\var{kind} is the kind of array elements (one of the "CAML_BA_" +kind constants above). \var{layout} is "CAML_BA_C_LAYOUT" for an +array with C layout and "CAML_BA_FORTRAN_LAYOUT" for an array with +Fortran layout. \var{numdims} is the number of dimensions in the +array. \var{dims} is an array of \var{numdims} long integers, giving +the sizes of the array in each dimension. + +\item +"caml_ba_alloc_dims("\var{kind} "|" \var{layout}, \var{numdims}, +\var{p}, "(long) "\nth{dim}{1}, "(long) "\nth{dim}{2}, \ldots, "(long) "\nth{dim}{numdims}")" + +Same as "caml_ba_alloc", but the sizes of the array in each dimension +are listed as extra arguments in the function call, rather than being +passed as an array. +\end{itemize} +% +The following example illustrates how statically-allocated C and +Fortran arrays can be made available to OCaml. +\begin{verbatim} + extern long my_c_array[100][200]; + extern float my_fortran_array_[300][400]; + + value caml_get_c_array(value unit) + { + long dims[2]; + dims[0] = 100; dims[1] = 200; + return caml_ba_alloc(CAML_BA_NATIVE_INT | CAML_BA_C_LAYOUT, + 2, my_c_array, dims); + } + + value caml_get_fortran_array(value unit) + { + return caml_ba_alloc_dims(CAML_BA_FLOAT32 | CAML_BA_FORTRAN_LAYOUT, + 2, my_fortran_array_, 300L, 400L); + } +\end{verbatim} + +\section{Advanced topic: cheaper C call} +\label{s:C-cheaper-call} + +This section describe how to make calling C functions cheaper. + +{\bf Note:} this only applies to the native compiler. So whenever you +use any of these methods, you have to provide an alternative byte-code +stub that ignores all the special annotations. + +\subsection{Passing unboxed values} + +We said earlier that all OCaml objects are represented by the C type +"value", and one has to use macros such as "Int_val" to decode data from +the "value" type. It is however possible to tell the OCaml native-code +compiler to do this for us and pass arguments unboxed to the C function. +Similarly it is possible to tell OCaml to expect the result unboxed and box +it for us. + +The motivation is that, by letting `ocamlopt` deal with boxing, it can +often decide to suppress it entirely. + +For instance let's consider this example: + +\begin{verbatim} +external foo : float -> float -> float = "foo" + +let f a b = + let len = Array.length a in + assert (Array.length b = len); + let res = Array.make len 0. in + for i = 0 to len - 1 do + res.(i) <- foo a.(i) b.(i) + done +\end{verbatim} + +Float arrays are unboxed in OCaml, however the C function "foo" expect +its arguments as boxed floats and returns a boxed float. Hence the +OCaml compiler has no choice but to box "a.(i)" and "b.(i)" and unbox +the result of "foo". This results in the allocation of "3 * len" +temporary float values. + +Now if we annotate the arguments and result with "[\@unboxed]", the +native-code compiler will be able to avoid all these allocations: + +\begin{verbatim} +external foo + : (float [@unboxed]) + -> (float [@unboxed]) + -> (float [@unboxed]) + = "foo_byte" "foo" +\end{verbatim} + +In this case the C functions must look like: + +\begin{verbatim} +CAMLprim double foo(double a, double b) +{ + ... +} + +CAMLprim value foo_byte(value a, value b) +{ + return caml_copy_double(foo(Double_val(a), Double_val(b))) +} +\end{verbatim} + +For convenicence, when all arguments and the result are annotated with +"[\@unboxed]", it is possible to put the attribute only once on the +declaration itself. So we can also write instead: + +\begin{verbatim} +external foo : float -> float -> float = "foo_byte" "foo" [@@unboxed] +\end{verbatim} + +The following table summarize what OCaml types can be unboxed, and +what C types should be used in correspondence: + +\begin{tableau}{|l|l|}{OCaml type}{C type} +\entree{"float"}{"double"} +\entree{"int32"}{"int32_t"} +\entree{"int64"}{"int64_t"} +\entree{"nativeint"}{"intnat"} +\end{tableau} + +Similarly, it is possible to pass untagged OCaml integers between +OCaml and C. This is done by annotating the arguments and/or result +with "[\@untagged]": + +\begin{verbatim} +external f : string -> (int [@untagged]) = "f_byte" "f" +\end{verbatim} + +The corresponding C type must be "intnat". + +{\bf Note:} do not use the C "int" type in correspondence with "(int +[\@untagged])". This is because they often differ in size. + +\subsection{Direct C call} + +In order to be able to run the garbage collector in the middle of +a C function, the OCaml native-code compiler generates some bookkeeping +code around C calls. Technically it wraps every C call with the C function +"caml_c_call" which is part of the OCaml runtime. + +For small functions that are called repeatedly, this indirection can have +a big impact on performances. However this is not needed if we know that +the C function doesn't allocate and doesn't raise exceptions. We can +instruct the OCaml native-code compiler of this fact by annotating the +external declaration with the attribute "[\@\@noalloc]": + +\begin{verbatim} +external bar : int -> int -> int = "foo" [@@noalloc] +\end{verbatim} + +In this case calling "bar" from OCaml is as cheap as calling any other +OCaml function, except for the fact that the OCaml compiler can't +inline C functions... + +\subsection{Example: calling C library functions without indirection} + +Using these attributes, it is possible to call C library functions +with no indirection. For instance many math functions are defined this +way in the OCaml standard library: + +\begin{verbatim} +external sqrt : float -> float = "caml_sqrt_float" "sqrt" + [@@unboxed] [@@noalloc] +(** Square root. *) + +external exp : float -> float = "caml_exp_float" "exp" [@@unboxed] [@@noalloc] +(** Exponential. *) + +external log : float -> float = "caml_log_float" "log" [@@unboxed] [@@noalloc] +(** Natural logarithm. *) +\end{verbatim} + +\section{Advanced topic: multithreading} +\label{s:C-multithreading} + +Using multiple threads (shared-memory concurrency) in a mixed OCaml/C +application requires special precautions, which are described in this +section. + +\subsection{Registering threads created from C} + +Callbacks from C to OCaml are possible only if the calling thread is +known to the OCaml run-time system. Threads created from OCaml (through +the "Thread.create" function of the system threads library) are +automatically known to the run-time system. If the application +creates additional threads from C and wishes to callback into OCaml +code from these threads, it must first register them with the run-time +system. The following functions are declared in the include file +"". + +\begin{itemize} +\item +"caml_c_thread_register()" registers the calling thread with the OCaml +run-time system. Returns 1 on success, 0 on error. Registering an +already-register thread does nothing and returns 0. +\item +"caml_c_thread_unregister()" must be called before the thread + terminates, to unregister it from the OCaml run-time system. +Returns 1 on success, 0 on error. If the calling thread was not +previously registered, does nothing and returns 0. +\end{itemize} + +\subsection{Parallel execution of long-running C code} + +The OCaml run-time system is not reentrant: at any time, at most one +thread can be executing OCaml code or C code that uses the OCaml +run-time system. Technically, this is enforced by a ``master lock'' +that any thread must hold while executing such code. + +When OCaml calls the C code implementing a primitive, the master lock +is held, therefore the C code has full access to the facilities of the +run-time system. However, no other thread can execute OCaml code +concurrently with the C code of the primitive. + +If a C primitive runs for a long time or performs potentially blocking +input-output operations, it can explicitly release the master lock, +enabling other OCaml threads to run concurrently with its operations. +The C code must re-acquire the master lock before returning to OCaml. +This is achieved with the following functions, declared in +the include file "". + +\begin{itemize} +\item +"caml_release_runtime_system()" +The calling thread releases the master lock and other OCaml resources, +enabling other threads to run OCaml code in parallel with the execution +of the calling thread. +\item +"caml_acquire_runtime_system()" +The calling thread re-acquires the master lock and other OCaml +resources. It may block until no other thread uses the OCaml run-time +system. +\end{itemize} + +After "caml_release_runtime_system()" was called and until +"caml_acquire_runtime_system()" is called, the C code must not access +any OCaml data, nor call any function of the run-time system, nor call +back into OCaml code. Consequently, arguments provided by OCaml to the +C primitive must be copied into C data structures before calling +"caml_release_runtime_system()", and results to be returned to OCaml +must be encoded as OCaml values after "caml_acquire_runtime_system()" +returns. + +Example: the following C primitive invokes "gethostbyname" to find the +IP address of a host name. The "gethostbyname" function can block for +a long time, so we choose to release the OCaml run-time system while it +is running. +\begin{verbatim} +CAMLprim stub_gethostbyname(value vname) +{ + CAMLparam1 (vname); + CAMLlocal1 (vres); + struct hostent * h; + char * name; + + /* Copy the string argument to a C string, allocated outside the + OCaml heap. */ + name = caml_stat_strdup(String_val(vname)); + /* Release the OCaml run-time system */ + caml_release_runtime_system(); + /* Resolve the name */ + h = gethostbyname(name); + /* Free the copy of the string, which we might as well do before + acquiring the runtime system to benefit from parallelism. */ + caml_stat_free(name); + /* Re-acquire the OCaml run-time system */ + caml_acquire_runtime_system(); + /* Encode the relevant fields of h as the OCaml value vres */ + ... /* Omitted */ + /* Return to OCaml */ + CAMLreturn (vres); +} +\end{verbatim} + +Callbacks from C to OCaml must be performed while holding the master +lock to the OCaml run-time system. This is naturally the case if the +callback is performed by a C primitive that did not release the +run-time system. If the C primitive released the run-time system +previously, or the callback is performed from other C code that was +not invoked from OCaml (e.g. an event loop in a GUI application), the +run-time system must be acquired before the callback and released +after: +\begin{verbatim} + caml_acquire_runtime_system(); + /* Resolve OCaml function vfun to be invoked */ + /* Build OCaml argument varg to the callback */ + vres = callback(vfun, varg); + /* Copy relevant parts of result vres to C data structures */ + caml_release_runtime_system(); +\end{verbatim} + +Note: the "acquire" and "release" functions described above were +introduced in OCaml 3.12. Older code uses the following historical +names, declared in "": +\begin{itemize} +\item "caml_enter_blocking_section" as an alias for + "caml_release_runtime_system" +\item "caml_leave_blocking_section" as an alias for + "caml_acquire_runtime_system" +\end{itemize} +Intuition: a ``blocking section'' is a piece of C code that does not +use the OCaml run-time system, typically a blocking input/output operation. + +\section{Advanced topic: interfacing with Windows Unicode APIs} +\label{s:interfacing-windows-unicode-apis} + +This section contains some general guidelines for writing C stubs that use +Windows Unicode APIs. + +{\bf Note:} This is an experimental feature of OCaml: the set of APIs below, as +well as their exact semantics are not final and subject to change in future +releases. + +The OCaml system under Windows can be configured at build time in one of two +modes: + +\begin{itemize} + +\item {\bf legacy mode:} All path names, environment variables, command line +arguments, etc. on the OCaml side are assumed to be encoded using the current +8-bit code page of the system. + +\item {\bf Unicode mode:} All path names, environment variables, command line +arguments, etc. on the OCaml side are assumed to be encoded using UTF-8. + +\end{itemize} + +In what follows, we say that a string has the \emph{OCaml encoding} if it is +encoded in UTF-8 when in Unicode mode, in the current code page in legacy mode, +or is an arbitrary string under Unix. A string has the \emph{platform encoding} +if it is encoded in UTF-16 under Windows or is an arbitrary string under Unix. + +From the point of view of the writer of C stubs, the challenges of interacting +with Windows Unicode APIs are twofold: + +\begin{itemize} + +\item The Windows API uses the UTF-16 encoding to support Unicode. The runtime +system performs the necessary conversions so that the OCaml programmer only +needs to deal with the OCaml encoding. C stubs that call Windows Unicode APIs +need to use specific runtime functions to perform the necessary conversions in a +compatible way. + +\item When writing stubs that need to be compiled under both Windows and Unix, +the stubs need to be written in a way that allow the necessary conversions under +Windows but that also work under Unix, where typically nothing particular needs +to be done to support Unicode. + +\end{itemize} + +The native C character type under Windows is "WCHAR", two bytes wide, while +under Unix it is "char", one byte wide. A type "char_os" is defined in +"" that stands for the concrete C character type of each +platform. Strings in the platform encoding are of type "char_os *". + +The following functions are exposed to help write compatible C stubs. To use +them, you need to include both "" and "". + +\begin{itemize} + +\item "char_os* caml_stat_strdup_to_os(const char *)" copies the argument while +translating from OCaml encoding to the platform encoding. This function is +typically used to convert the "char *" underlying an OCaml string before passing +it to an operating system API that takes a Unicode argument. Under Unix, it is +equivalent to "caml_stat_strdup". + +{\bf Note:} For maximum backwards compatibility in Unicode mode, if the argument +is not a valid UTF-8 string, this function will fall back to assuming that it is +encoded in the current code page. + +\item "char* caml_stat_strdup_of_os(const char_os *)" copies the argument while +translating from the platform encoding to the OCaml encoding. It is the inverse +of "caml_stat_strdup_to_os". This function is typically used to convert a string +obtained from the operating system before passing it on to OCaml code. Under +Unix, it is equivalent to "caml_stat_strdup". + +\item "value caml_copy_string_of_os(char_os *)" allocates an OCaml string with +contents equal to the argument string converted to the OCaml encoding. This +function is essentially equivalent to "caml_stat_strdup_of_os" followed by +"caml_copy_string", except that it avoids the allocation of the intermediate +string returned by "caml_stat_strdup_of_os". Under Unix, it is equivalent to +"caml_copy_string". + +\end{itemize} + +{\bf Note:} The strings returned by "caml_stat_strdup_to_os" and +"caml_stat_strdup_of_os" are allocated using "caml_stat_alloc", so they need to +be deallocated using "caml_stat_free" when they are no longer needed. + +\paragraph{Example} We want to bind the function "getenv" in a way that works +both under Unix and Windows. Under Unix this function has the prototype: + +\begin{verbatim} + char *getenv(const char *); +\end{verbatim} +While the Unicode version under Windows has the prototype: +\begin{verbatim} + WCHAR *_wgetenv(const WCHAR *); +\end{verbatim} + +In terms of "char_os", both functions take an argument of type "char_os *" and +return a result of the same type. We begin by choosing the right implementation +of the function to bind: + +\begin{verbatim} +#ifdef _WIN32 +#define getenv_os _wgetenv +#else +#define getenv_os getenv +#endif +\end{verbatim} + +The rest of the binding is the same for both platforms: + +\begin{verbatim} +/* The following define is necessary because the API is experimental */ +#define CAML_INTERNALS + +#include +#include +#include +#include +#include +#include + +CAMLprim value stub_getenv(value var_name) +{ + CAMLparam1(var_name); + CAMLlocal1(var_value); + char_os *var_name_os, *var_value_os; + + var_name_os = caml_stat_strdup_to_os(String_val(var_name)); + var_value_os = getenv_os(var_name_os); + caml_stat_free(var_name_os); + + if (var_value_os == NULL) + caml_raise_not_found(); + + var_value = caml_copy_string_of_os(var_value_os); + + CAMLreturn(var_value); +} +\end{verbatim} + +\section{Building mixed C/OCaml libraries: \texttt{ocamlmklib}} +\label{s-ocamlmklib} + +The "ocamlmklib" command facilitates the construction of libraries +containing both OCaml code and C code, and usable both in static +linking and dynamic linking modes. This command is available under +Windows since Objective Caml 3.11 and under other operating systems since +Objective Caml 3.03. + +The "ocamlmklib" command takes three kinds of arguments: +\begin{itemize} +\item OCaml source files and object files (".cmo", ".cmx", ".ml") +comprising the OCaml part of the library; +\item C object files (".o", ".a", respectively, ".obj", ".lib") + comprising the C part of the library; +\item Support libraries for the C part ("-l"\var{lib}). +\end{itemize} +It generates the following outputs: +\begin{itemize} +\item An OCaml bytecode library ".cma" incorporating the ".cmo" and +".ml" OCaml files given as arguments, and automatically referencing the +C library generated with the C object files. +\item An OCaml native-code library ".cmxa" incorporating the ".cmx" and +".ml" OCaml files given as arguments, and automatically referencing the +C library generated with the C object files. +\item If dynamic linking is supported on the target platform, a +".so" (respectively, ".dll") shared library built from the C object files given as arguments, +and automatically referencing the support libraries. +\item A C static library ".a"(respectively, ".lib") built from the C object files. +\end{itemize} +In addition, the following options are recognized: +\begin{options} +\item["-cclib", "-ccopt", "-I", "-linkall"] +These options are passed as is to "ocamlc" or "ocamlopt". +See the documentation of these commands. +\item["-rpath", "-R", "-Wl,-rpath", "-Wl,-R"] +These options are passed as is to the C compiler. Refer to the +documentation of the C compiler. +\item["-custom"] Force the construction of a statically linked library +only, even if dynamic linking is supported. +\item["-failsafe"] Fall back to building a statically linked library +if a problem occurs while building the shared library (e.g. some of +the support libraries are not available as shared libraries). +\item["-L"\var{dir}] Add \var{dir} to the search path for support +libraries ("-l"\var{lib}). +\item["-ocamlc" \var{cmd}] Use \var{cmd} instead of "ocamlc" to call +the bytecode compiler. +\item["-ocamlopt" \var{cmd}] Use \var{cmd} instead of "ocamlopt" to call +the native-code compiler. +\item["-o" \var{output}] Set the name of the generated OCaml library. +"ocamlmklib" will generate \var{output}".cma" and/or \var{output}".cmxa". +If not specified, defaults to "a". +\item["-oc" \var{outputc}] Set the name of the generated C library. +"ocamlmklib" will generate "lib"\var{outputc}".so" (if shared +libraries are supported) and "lib"\var{outputc}".a". +If not specified, defaults to the output name given with "-o". +\end{options} + +\noindent +On native Windows, the following environment variable is also consulted: + +\begin{options} +\item["OCAML_FLEXLINK"] Alternative executable to use instead of the +configured value. Primarily used for bootstrapping. +\end{options} + +\paragraph{Example} Consider an OCaml interface to the standard "libz" +C library for reading and writing compressed files. Assume this +library resides in "/usr/local/zlib". This interface is +composed of an OCaml part "zip.cmo"/"zip.cmx" and a C part "zipstubs.o" +containing the stub code around the "libz" entry points. The +following command builds the OCaml libraries "zip.cma" and "zip.cmxa", +as well as the companion C libraries "dllzip.so" and "libzip.a": +\begin{verbatim} +ocamlmklib -o zip zip.cmo zip.cmx zipstubs.o -lz -L/usr/local/zlib +\end{verbatim} +If shared libraries are supported, this performs the following +commands: +\begin{verbatim} +ocamlc -a -o zip.cma zip.cmo -dllib -lzip \ + -cclib -lzip -cclib -lz -ccopt -L/usr/local/zlib +ocamlopt -a -o zip.cmxa zip.cmx -cclib -lzip \ + -cclib -lzip -cclib -lz -ccopt -L/usr/local/zlib +gcc -shared -o dllzip.so zipstubs.o -lz -L/usr/local/zlib +ar rc libzip.a zipstubs.o +\end{verbatim} +Note: This example is on a Unix system. The exact command lines +may be different on other systems. + +If shared libraries are not supported, the following commands are +performed instead: +\begin{verbatim} +ocamlc -a -custom -o zip.cma zip.cmo -cclib -lzip \ + -cclib -lz -ccopt -L/usr/local/zlib +ocamlopt -a -o zip.cmxa zip.cmx -lzip \ + -cclib -lz -ccopt -L/usr/local/zlib +ar rc libzip.a zipstubs.o +\end{verbatim} +Instead of building simultaneously the bytecode library, the +native-code library and the C libraries, "ocamlmklib" can be called +three times to build each separately. Thus, +\begin{verbatim} +ocamlmklib -o zip zip.cmo -lz -L/usr/local/zlib +\end{verbatim} +builds the bytecode library "zip.cma", and +\begin{verbatim} +ocamlmklib -o zip zip.cmx -lz -L/usr/local/zlib +\end{verbatim} +builds the native-code library "zip.cmxa", and +\begin{verbatim} +ocamlmklib -o zip zipstubs.o -lz -L/usr/local/zlib +\end{verbatim} +builds the C libraries "dllzip.so" and "libzip.a". Notice that the +support libraries ("-lz") and the corresponding options +("-L/usr/local/zlib") must be given on all three invocations of "ocamlmklib", +because they are needed at different times depending on whether shared +libraries are supported. diff --git a/manual/manual/cmds/lexyacc.etex b/manual/manual/cmds/lexyacc.etex new file mode 100644 index 00000000..6053de89 --- /dev/null +++ b/manual/manual/cmds/lexyacc.etex @@ -0,0 +1,729 @@ +\chapter{Lexer and parser generators (ocamllex, ocamlyacc)} +\label{c:ocamlyacc} +\pdfchapter{Lexer and parser generators (ocamllex, ocamlyacc)} +%HEVEA\cutname{lexyacc.html} + +This chapter describes two program generators: "ocamllex", that +produces a lexical analyzer from a set of regular expressions with +associated semantic actions, and "ocamlyacc", that produces a parser +from a grammar with associated semantic actions. + +These program generators are very close to the well-known "lex" and +"yacc" commands that can be found in most C programming environments. +This chapter assumes a working knowledge of "lex" and "yacc": while +it describes the input syntax for "ocamllex" and "ocamlyacc" and the +main differences with "lex" and "yacc", it does not explain the basics +of writing a lexer or parser description in "lex" and "yacc". Readers +unfamiliar with "lex" and "yacc" are referred to ``Compilers: +principles, techniques, and tools'' by Aho, Sethi and Ullman +(Addison-Wesley, 1986), or ``Lex $\&$ Yacc'', by Levine, Mason and +Brown (O'Reilly, 1992). + +\section{Overview of \texttt{ocamllex}} + +The "ocamllex" command produces a lexical analyzer from a set of regular +expressions with attached semantic actions, in the style of +"lex". Assuming the input file is \var{lexer}".mll", executing +\begin{alltt} + ocamllex \var{lexer}.mll +\end{alltt} +produces OCaml code for a lexical analyzer in file \var{lexer}".ml". +This file defines one lexing function per entry point in the lexer +definition. These functions have the same names as the entry +points. Lexing functions take as argument a lexer buffer, and return +the semantic attribute of the corresponding entry point. + +Lexer buffers are an abstract data type implemented in the standard +library module "Lexing". The functions "Lexing.from_channel", +"Lexing.from_string" and "Lexing.from_function" create +lexer buffers that read from an input channel, a character string, or +any reading function, respectively. (See the description of module +"Lexing" in chapter~\ref{c:stdlib}.) + +When used in conjunction with a parser generated by "ocamlyacc", the +semantic actions compute a value belonging to the type "token" defined +by the generated parsing module. (See the description of "ocamlyacc" +below.) + +\subsection{Options} +The following command-line options are recognized by "ocamllex". + +\begin{options} + +\item["-ml"] +Output code that does not use OCaml's built-in automata +interpreter. Instead, the automaton is encoded by OCaml functions. +This option mainly is useful for debugging "ocamllex", using it for +production lexers is not recommended. + +\item["-o" \var{output-file}] +Specify the name of the output file produced by "ocamllex". +The default is the input file name with its extension replaced by ".ml". + +\item["-q"] +Quiet mode. "ocamllex" normally outputs informational messages +to standard output. They are suppressed if option "-q" is used. + +\item["-v" or "-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\section{Syntax of lexer definitions} + +The format of lexer definitions is as follows: +\begin{alltt} +\{ \var{header} \} +let \var{ident} = \var{regexp} \ldots +[refill \{ \var{refill-handler} \}] +rule \var{entrypoint} [\nth{arg}{1}\ldots{} \nth{arg}{n}] = + parse \var{regexp} \{ \var{action} \} + | \ldots + | \var{regexp} \{ \var{action} \} +and \var{entrypoint} [\nth{arg}{1}\ldots{} \nth{arg}{n}] = + parse \ldots +and \ldots +\{ \var{trailer} \} +\end{alltt} +Comments are delimited by "(*" and "*)", as in OCaml. +The "parse" keyword, can be replaced by the "shortest" keyword, with +the semantic consequences explained below. + +Refill handlers are a recent (optional) feature introduced in 4.02, +documented below in subsection~\ref{ss:refill-handlers}. + +\subsection{Header and trailer} +The {\it header} and {\it trailer} sections are arbitrary OCaml +text enclosed in curly braces. Either or both can be omitted. If +present, the header text is copied as is at the beginning of the +output file and the trailer text at the end. Typically, the +header section contains the "open" directives required +by the actions, and possibly some auxiliary functions used in the +actions. + +\subsection{Naming regular expressions} + +Between the header and the entry points, one can give names to +frequently-occurring regular expressions. This is written +@"let" ident "=" regexp@. +In regular expressions that follow this declaration, the identifier +\var{ident} can be used as shorthand for \var{regexp}. + +\subsection{Entry points} + +The names of the entry points must be valid identifiers for OCaml +values (starting with a lowercase letter). +Similarily, the arguments \texttt{\var{arg$_1$}\ldots{} +\var{arg$_n$}} must be valid identifiers for OCaml. +Each entry point becomes an +OCaml function that takes $n+1$ arguments, +the extra implicit last argument being of type "Lexing.lexbuf". +Characters are read from the "Lexing.lexbuf" argument and matched +against the regular expressions provided in the rule, until a prefix +of the input matches one of the rule. The corresponding action is +then evaluated and returned as the result of the function. + + +If several regular expressions match a prefix of the input, the +``longest match'' rule applies: the regular expression that matches +the longest prefix of the input is selected. In case of tie, the +regular expression that occurs earlier in the rule is selected. + +However, if lexer rules are introduced with the "shortest" keyword in +place of the "parse" keyword, then the ``shortest match'' rule applies: +the shortest prefix of the input is selected. In case of tie, the +regular expression that occurs earlier in the rule is still selected. +This feature is not intended for use in ordinary lexical analyzers, it +may facilitate the use of "ocamllex" as a simple text processing tool. + + + +\subsection{Regular expressions} + +The regular expressions are in the style of "lex", with a more +OCaml-like syntax. +\begin{syntax} +regexp: + \ldots +\end{syntax} +\begin{options} + +\item[@"'" regular-char || escape-sequence "'"@] +A character constant, with the same syntax as OCaml character +constants. Match the denoted character. + +\item["_"] +(underscore) Match any character. + +\item[@"eof"@] +Match the end of the lexer input.\\ +{\bf Note:} On some systems, with interactive input, an end-of-file +may be followed by more characters. However, "ocamllex" will not +correctly handle regular expressions that contain "eof" followed by +something else. + +\item[@'"' { string-character } '"'@] +A string constant, with the same syntax as OCaml string +constants. Match the corresponding sequence of characters. + +\item[@'[' character-set ']'@] +Match any single character belonging to the given +character set. Valid character sets are: single +character constants @"'" @c@ "'"@; ranges of characters +@"'" @c@_1 "'" "-" "'" @c@_2 "'"@ (all characters between $c_1$ and $c_2$, +inclusive); and the union of two or more character sets, denoted by +concatenation. + +\item[@'[' '^' character-set ']'@] +Match any single character not belonging to the given character set. + + +\item[@regexp_1 '#' regexp_2@] +(difference of character sets) +Regular expressions @regexp_1@ and @regexp_2@ must be character sets +defined with @'['\ldots ']'@ (or a single character expression or +underscore "_"). +Match the difference of the two specified character sets. + + +\item[@regexp '*'@] +(repetition) Match the concatenation of zero or more +strings that match @regexp@. + +\item[@regexp '+'@] +(strict repetition) Match the concatenation of one or more +strings that match @regexp@. + +\item[@regexp '?'@] +(option) Match the empty string, or a string matching @regexp@. + +\item[@regexp_1 '|' regexp_2@] +(alternative) Match any string that matches @regexp_1@ or @regexp_2@ + +\item[@regexp_1 regexp_2@] +(concatenation) Match the concatenation of two strings, the first +matching @regexp_1@, the second matching @regexp_2@. + +\item[@'(' regexp ')'@] +Match the same strings as @regexp@. + +\item[@ident@] +Reference the regular expression bound to @ident@ by an earlier +@"let" ident "=" regexp@ definition. + +\item[@regexp 'as' ident@] +Bind the substring matched by @regexp@ to identifier @ident@. +\end{options} + +Concerning the precedences of operators, "#" has the highest precedence, +followed by "*", "+" and "?", +then concatenation, then "|" (alternation), then "as". + +\subsection{Actions} + +The actions are arbitrary OCaml expressions. They are evaluated in +a context where the identifiers defined by using the "as" construct +are bound to subparts of the matched string. +Additionally, "lexbuf" is bound to the current lexer +buffer. Some typical uses for "lexbuf", in conjunction with the +operations on lexer buffers provided by the "Lexing" standard library +module, are listed below. + +\begin{options} +\item["Lexing.lexeme lexbuf"] +Return the matched string. + +\item["Lexing.lexeme_char lexbuf "$n$] +Return the $n\th$ +character in the matched string. The first character corresponds to $n = 0$. + +\item["Lexing.lexeme_start lexbuf"] +Return the absolute position in the input text of the beginning of the +matched string (i.e. the offset of the first character of the matched +string). The first character read from the input text has offset 0. + +\item["Lexing.lexeme_end lexbuf"] +Return the absolute position in the input text of the end of the +matched string (i.e. the offset of the first character after the +matched string). The first character read from the input text has +offset 0. + +\newcommand{\sub}[1]{$_{#1}$}% +\item[\var{entrypoint} {[\var{exp\sub{1}}\ldots{} \var{exp\sub{n}}]} "lexbuf"] +(Where \var{entrypoint} is the name of another entry point in the same +lexer definition.) Recursively call the lexer on the given entry point. +Notice that "lexbuf" is the last argument. +Useful for lexing nested comments, for example. + +\end{options} + +\subsection{Variables in regular expressions} +The "as" construct is similar to ``\emph{groups}'' as provided by +numerous regular expression packages. +The type of these variables can be "string", "char", "string option" +or "char option". + +We first consider the case of linear patterns, that is the case when +all "as" bound variables are distinct. +In @regexp 'as' ident@, the type of @ident@ normally is "string" (or +"string option") except +when @regexp@ is a character constant, an underscore, a string +constant of length one, a character set specification, or an +alternation of those. Then, the type of @ident@ is "char" (or "char +option"). +Option types are introduced when overall rule matching does not +imply matching of the bound sub-pattern. This is in particular the +case of @'(' regexp 'as' ident ')' '?'@ and of +@regexp_1 '|' '(' regexp_2 'as' ident ')'@. + +There is no linearity restriction over "as" bound variables. +When a variable is bound more than once, the previous rules are to be +extended as follows: +\begin{itemize} +\item A variable is a "char" variable when all its occurrences bind +"char" occurrences in the previous sense. +\item A variable is an "option" variable when the overall expression +can be matched without binding this variable. +\end{itemize} +For instance, in +"('a' as x) | ( 'a' (_ as x) )" the variable "x" is of type +"char", whereas in +"(\"ab\" as x) | ( 'a' (_ as x) ? )" the variable "x" is of type +"string option". + + +In some cases, a successful match may not yield a unique set of bindings. +For instance the matching of \verb+aba+ by the regular expression +"(('a'|\"ab\") as x) ((\"ba\"|'a') as y)" may result in binding +either +\verb+x+ to \verb+"ab"+ and \verb+y+ to \verb+"a"+, or +\verb+x+ to \verb+"a"+ and \verb+y+ to \verb+"ba"+. +The automata produced "ocamllex" on such ambiguous regular +expressions will select one of the possible resulting sets of +bindings. +The selected set of bindings is purposely left unspecified. + +\subsection{Refill handlers} +\label{ss:refill-handlers} + +By default, when ocamllex reaches the end of its lexing buffer, it +will silently call the "refill_buff" function of "lexbuf" structure +and continue lexing. It is sometimes useful to be able to take control +of refilling action; typically, if you use a library for asynchronous +computation, you may want to wrap the refilling action in a delaying +function to avoid blocking synchronous operations. + +Since OCaml 4.02, it is possible to specify a \var{refill-handler}, +a function that will be called when refill happens. It is passed the +continuation of the lexing, on which it has total control. The OCaml +expression used as refill action should have a type that is an +instance of +\begin{verbatim} + (Lexing.lexbuf -> 'a) -> Lexing.lexbuf -> 'a +\end{verbatim} +where the first argument is the continuation which captures the +processing ocamllex would usually perform (refilling the buffer, then +calling the lexing function again), and the result type that +instantiates ['a] should unify with the result type of all lexing +rules. + +As an example, consider the following lexer that is parametrized over +an arbitrary monad: +\begin{verbatim} +{ +type token = EOL | INT of int | PLUS + +module Make (M : sig + type 'a t + val return: 'a -> 'a t + val bind: 'a t -> ('a -> 'b t) -> 'b t + val fail : string -> 'a t + + (* Set up lexbuf *) + val on_refill : Lexing.lexbuf -> unit t + end) += struct + +let refill_handler k lexbuf = + M.bind (M.on_refill lexbuf) (fun () -> k lexbuf) + +} + +refill {refill_handler} + +rule token = parse +| [' ' '\t'] + { token lexbuf } +| '\n' + { M.return EOL } +| ['0'-'9']+ as i + { M.return (INT (int_of_string i)) } +| '+' + { M.return PLUS } +| _ + { M.fail "unexpected character" } +{ +end +} +\end{verbatim} + +\subsection{Reserved identifiers} + +All identifiers starting with "__ocaml_lex" are reserved for use by +"ocamllex"; do not use any such identifier in your programs. + + +\section{Overview of \texttt{ocamlyacc}} + +The "ocamlyacc" command produces a parser from a context-free grammar +specification with attached semantic actions, in the style of "yacc". +Assuming the input file is \var{grammar}".mly", executing +\begin{alltt} + ocamlyacc \var{options} \var{grammar}.mly +\end{alltt} +produces OCaml code for a parser in the file \var{grammar}".ml", +and its interface in file \var{grammar}".mli". + +The generated module defines one parsing function per entry point in +the grammar. These functions have the same names as the entry points. +Parsing functions take as arguments a lexical analyzer (a function +from lexer buffers to tokens) and a lexer buffer, and return the +semantic attribute of the corresponding entry point. Lexical analyzer +functions are usually generated from a lexer specification by the +"ocamllex" program. Lexer buffers are an abstract data type +implemented in the standard library module "Lexing". Tokens are values from +the concrete type "token", defined in the interface file +\var{grammar}".mli" produced by "ocamlyacc". + +\section{Syntax of grammar definitions} + +Grammar definitions have the following format: +\begin{alltt} +\%\{ + \var{header} +\%\} + \var{declarations} +\%\% + \var{rules} +\%\% + \var{trailer} +\end{alltt} + +Comments are enclosed between \verb|/*| and \verb|*/| (as in C) in the +``declarations'' and ``rules'' sections, and between \verb|(*| and +\verb|*)| (as in OCaml) in the ``header'' and ``trailer'' sections. + +\subsection{Header and trailer} + +The header and the trailer sections are OCaml code that is copied +as is into file \var{grammar}".ml". Both sections are optional. The header +goes at the beginning of the output file; it usually contains +"open" directives and auxiliary functions required by the semantic +actions of the rules. The trailer goes at the end of the output file. + +\subsection{Declarations} + +Declarations are given one per line. They all start with a \verb"%" sign. + +\begin{options} + +\item[@"%token" constr \ldots constr@] +Declare the given symbols @constr \ldots constr@ +as tokens (terminal symbols). These symbols +are added as constant constructors for the "token" concrete type. + +\item[@"%token" "<" typexpr ">" constr \ldots constr@] +Declare the given symbols @constr \ldots constr@ as tokens with an +attached attribute of the +given type. These symbols are added as constructors with arguments of +the given type for the "token" concrete type. The @typexpr@ part is +an arbitrary OCaml type expression, except that all type +constructor names must be fully qualified (e.g. "Modname.typename") +for all types except standard built-in types, even if the proper +\verb|open| directives (e.g. \verb|open Modname|) were given in the +header section. That's because the header is copied only to the ".ml" +output file, but not to the ".mli" output file, while the @typexpr@ part +of a \verb"%token" declaration is copied to both. + +\item[@"%start" symbol \ldots symbol@] +Declare the given symbols as entry points for the grammar. For each +entry point, a parsing function with the same name is defined in the +output module. Non-terminals that are not declared as entry points +have no such parsing function. Start symbols must be given a type with +the \verb|%type| directive below. + +\item[@"%type" "<" typexpr ">" symbol \ldots symbol@] +Specify the type of the semantic attributes for the given symbols. +This is mandatory for start symbols only. Other nonterminal symbols +need not be given types by hand: these types will be inferred when +running the output files through the OCaml compiler (unless the +\verb"-s" option is in effect). The @typexpr@ part is an arbitrary OCaml +type expression, except that all type constructor names must be +fully qualified, as explained above for "%token". + +\item[@"%left" symbol \ldots symbol@] +\item[@"%right" symbol \ldots symbol@] +\item[@"%nonassoc" symbol \ldots symbol@] + +Associate precedences and associativities to the given symbols. All +symbols on the same line are given the same precedence. They have +higher precedence than symbols declared before in a \verb"%left", +\verb"%right" or \verb"%nonassoc" line. They have lower precedence +than symbols declared after in a \verb"%left", \verb"%right" or +\verb"%nonassoc" line. The symbols are declared to associate to the +left (\verb"%left"), to the right (\verb"%right"), or to be +non-associative (\verb"%nonassoc"). The symbols are usually tokens. +They can also be dummy nonterminals, for use with the \verb"%prec" +directive inside the rules. + +The precedence declarations are used in the following way to +resolve reduce/reduce and shift/reduce conflicts: +\begin{itemize} +\item Tokens and rules have precedences. By default, the precedence + of a rule is the precedence of its rightmost terminal. You + can override this default by using the @"%prec"@ directive in the rule. +\item A reduce/reduce conflict + is resolved in favor of the first rule (in the order given by the + source file), and "ocamlyacc" outputs a warning. +\item A shift/reduce conflict + is resolved by comparing the precedence of the rule to be + reduced with the precedence of the token to be shifted. If the + precedence of the rule is higher, then the rule will be reduced; + if the precedence of the token is higher, then the token will + be shifted. +\item A shift/reduce conflict between a rule and a token with the + same precedence will be resolved using the associativity: if the + token is left-associative, then the parser will reduce; if the + token is right-associative, then the parser will shift. If the + token is non-associative, then the parser will declare a syntax + error. +\item When a shift/reduce conflict cannot be resolved using the above + method, then "ocamlyacc" will output a warning and the parser will + always shift. +\end{itemize} + +\end{options} + +\subsection{Rules} + +The syntax for rules is as usual: +\begin{alltt} +\var{nonterminal} : + \var{symbol} \ldots \var{symbol} \{ \var{semantic-action} \} + | \ldots + | \var{symbol} \ldots \var{symbol} \{ \var{semantic-action} \} +; +\end{alltt} +% +Rules can also contain the \verb"%prec "{\it symbol} directive in the +right-hand side part, to override the default precedence and +associativity of the rule with the precedence and associativity of the +given symbol. + +Semantic actions are arbitrary OCaml expressions, that +are evaluated to produce the semantic attribute attached to +the defined nonterminal. The semantic actions can access the +semantic attributes of the symbols in the right-hand side of +the rule with the \verb"$" notation: \verb"$1" is the attribute for the +first (leftmost) symbol, \verb"$2" is the attribute for the second +symbol, etc. + +The rules may contain the special symbol "error" to indicate +resynchronization points, as in "yacc". + +Actions occurring in the middle of rules are not supported. + +Nonterminal symbols are like regular OCaml symbols, except that they +cannot end with "'" (single quote). + +\subsection{Error handling} + +Error recovery is supported as follows: when the parser reaches an +error state (no grammar rules can apply), it calls a function named +"parse_error" with the string "\"syntax error\"" as argument. The default +"parse_error" function does nothing and returns, thus initiating error +recovery (see below). The user can define a customized "parse_error" +function in the header section of the grammar file. + +The parser also enters error recovery mode if one of the grammar +actions raises the "Parsing.Parse_error" exception. + +In error recovery mode, the parser discards states from the +stack until it reaches a place where the error token can be shifted. +It then discards tokens from the input until it finds three successive +tokens that can be accepted, and starts processing with the first of +these. If no state can be uncovered where the error token can be +shifted, then the parser aborts by raising the "Parsing.Parse_error" +exception. + +Refer to documentation on "yacc" for more details and guidance in how +to use error recovery. + +\section{Options} + +The "ocamlyacc" command recognizes the following options: + +\begin{options} + +\item["-b"{\it prefix}] +Name the output files {\it prefix}".ml", {\it prefix}".mli", +{\it prefix}".output", instead of the default naming convention. + +\item["-q"] +This option has no effect. + +\item["-v"] +Generate a description of the parsing tables and a report on conflicts +resulting from ambiguities in the grammar. The description is put in +file \var{grammar}".output". + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-"] +Read the grammar specification from standard input. The default +output file names are "stdin.ml" and "stdin.mli". + +\item["--" \var{file}] +Process \var{file} as the grammar specification, even if its name +starts with a dash (-) character. This option must be the last on the +command line. + +\end{options} + +At run-time, the "ocamlyacc"-generated parser can be debugged by +setting the "p" option in the "OCAMLRUNPARAM" environment variable +(see section~\ref{ocamlrun-options}). This causes the pushdown +automaton executing the parser to print a trace of its action (tokens +shifted, rules reduced, etc). The trace mentions rule numbers and +state numbers that can be interpreted by looking at the file +\var{grammar}".output" generated by "ocamlyacc -v". + +\section{A complete example} + +The all-time favorite: a desk calculator. This program reads +arithmetic expressions on standard input, one per line, and prints +their values. Here is the grammar definition: +\begin{verbatim} + /* File parser.mly */ + %token INT + %token PLUS MINUS TIMES DIV + %token LPAREN RPAREN + %token EOL + %left PLUS MINUS /* lowest precedence */ + %left TIMES DIV /* medium precedence */ + %nonassoc UMINUS /* highest precedence */ + %start main /* the entry point */ + %type main + %% + main: + expr EOL { $1 } + ; + expr: + INT { $1 } + | LPAREN expr RPAREN { $2 } + | expr PLUS expr { $1 + $3 } + | expr MINUS expr { $1 - $3 } + | expr TIMES expr { $1 * $3 } + | expr DIV expr { $1 / $3 } + | MINUS expr %prec UMINUS { - $2 } + ; +\end{verbatim} +Here is the definition for the corresponding lexer: +\begin{verbatim} + (* File lexer.mll *) + { + open Parser (* The type token is defined in parser.mli *) + exception Eof + } + rule token = parse + [' ' '\t'] { token lexbuf } (* skip blanks *) + | ['\n' ] { EOL } + | ['0'-'9']+ as lxm { INT(int_of_string lxm) } + | '+' { PLUS } + | '-' { MINUS } + | '*' { TIMES } + | '/' { DIV } + | '(' { LPAREN } + | ')' { RPAREN } + | eof { raise Eof } +\end{verbatim} +Here is the main program, that combines the parser with the lexer: +\begin{verbatim} + (* File calc.ml *) + let _ = + try + let lexbuf = Lexing.from_channel stdin in + while true do + let result = Parser.main Lexer.token lexbuf in + print_int result; print_newline(); flush stdout + done + with Lexer.Eof -> + exit 0 +\end{verbatim} +To compile everything, execute: +\begin{verbatim} + ocamllex lexer.mll # generates lexer.ml + ocamlyacc parser.mly # generates parser.ml and parser.mli + ocamlc -c parser.mli + ocamlc -c lexer.ml + ocamlc -c parser.ml + ocamlc -c calc.ml + ocamlc -o calc lexer.cmo parser.cmo calc.cmo +\end{verbatim} + +\section{Common errors} + +\begin{options} + +\item[ocamllex: transition table overflow, automaton is too big] + +The deterministic automata generated by "ocamllex" are limited to at +most 32767 transitions. The message above indicates that your lexer +definition is too complex and overflows this limit. This is commonly +caused by lexer definitions that have separate rules for each of the +alphabetic keywords of the language, as in the following example. +\begin{verbatim} +rule token = parse + "keyword1" { KWD1 } +| "keyword2" { KWD2 } +| ... +| "keyword100" { KWD100 } +| ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '0'-'9' '_'] * as id + { IDENT id} +\end{verbatim} +To keep the generated automata small, rewrite those definitions with +only one general ``identifier'' rule, followed by a hashtable lookup +to separate keywords from identifiers: +\begin{verbatim} +{ let keyword_table = Hashtbl.create 53 + let _ = + List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) + [ "keyword1", KWD1; + "keyword2", KWD2; ... + "keyword100", KWD100 ] +} +rule token = parse + ['A'-'Z' 'a'-'z'] ['A'-'Z' 'a'-'z' '0'-'9' '_'] * as id + { try + Hashtbl.find keyword_table id + with Not_found -> + IDENT id } +\end{verbatim} + +\item[ocamllex: Position memory overflow, too many bindings] +The deterministic automata generated by "ocamllex" maintain a table of +positions inside the scanned lexer buffer. The size of this table is +limited to at most 255 cells. This error should not show up in normal +situations. + +\end{options} diff --git a/manual/manual/cmds/native.etex b/manual/manual/cmds/native.etex new file mode 100644 index 00000000..c06a8186 --- /dev/null +++ b/manual/manual/cmds/native.etex @@ -0,0 +1,231 @@ +\chapter{Native-code compilation (ocamlopt)} \label{c:nativecomp} +\pdfchapter{Native-code compilation (ocamlopt)} +%HEVEA\cutname{native.html} + +This chapter describes the OCaml high-performance +native-code compiler "ocamlopt", which compiles OCaml source files to +native code object files and links these object files to produce +standalone executables. + +The native-code compiler is only available on certain platforms. +It produces code that runs faster than the bytecode produced by +"ocamlc", at the cost of increased compilation time and executable code +size. Compatibility with the bytecode compiler is extremely high: the +same source code should run identically when compiled with "ocamlc" and +"ocamlopt". + +It is not possible to mix native-code object files produced by "ocamlopt" +with bytecode object files produced by "ocamlc": a program must be +compiled entirely with "ocamlopt" or entirely with "ocamlc". Native-code +object files produced by "ocamlopt" cannot be loaded in the toplevel +system "ocaml". + +\section{Overview of the compiler} + +The "ocamlopt" command has a command-line interface very close to that +of "ocamlc". It accepts the same types of arguments, and processes them +sequentially, after all options have been processed: + +\begin{itemize} +\item +Arguments ending in ".mli" are taken to be source files for +compilation unit interfaces. Interfaces specify the names exported by +compilation units: they declare value names with their types, define +public data types, declare abstract data types, and so on. From the +file \var{x}".mli", the "ocamlopt" compiler produces a compiled interface +in the file \var{x}".cmi". The interface produced is identical to that +produced by the bytecode compiler "ocamlc". + +\item +Arguments ending in ".ml" are taken to be source files for compilation +unit implementations. Implementations provide definitions for the +names exported by the unit, and also contain expressions to be +evaluated for their side-effects. From the file \var{x}".ml", the "ocamlopt" +compiler produces two files: \var{x}".o", containing native object code, +and \var{x}".cmx", containing extra information for linking and +optimization of the clients of the unit. The compiled implementation +should always be referred to under the name \var{x}".cmx" (when given +a ".o" or ".obj" file, "ocamlopt" assumes that it contains code compiled from C, +not from OCaml). + +The implementation is checked against the interface file \var{x}".mli" +(if it exists) as described in the manual for "ocamlc" +(chapter~\ref{c:camlc}). + +\item +Arguments ending in ".cmx" are taken to be compiled object code. These +files are linked together, along with the object files obtained +by compiling ".ml" arguments (if any), and the OCaml standard +library, to produce a native-code executable program. The order in +which ".cmx" and ".ml" arguments are presented on the command line is +relevant: compilation units are initialized in that order at +run-time, and it is a link-time error to use a component of a unit +before having initialized it. Hence, a given \var{x}".cmx" file must come +before all ".cmx" files that refer to the unit \var{x}. + +\item +Arguments ending in ".cmxa" are taken to be libraries of object code. +Such a library packs in two files (\var{lib}".cmxa" and \var{lib}".a"/".lib") +a set of object files (".cmx" and ".o"/".obj" files). Libraries are build with +"ocamlopt -a" (see the description of the "-a" option below). The object +files contained in the library are linked as regular ".cmx" files (see +above), in the order specified when the library was built. The only +difference is that if an object file contained in a library is not +referenced anywhere in the program, then it is not linked in. + +\item +Arguments ending in ".c" are passed to the C compiler, which generates +a ".o"/".obj" object file. This object file is linked with the program. + +\item +Arguments ending in ".o", ".a" or ".so" (".obj", ".lib" and ".dll" +under Windows) are assumed to be C object files and +libraries. They are linked with the program. + +\end{itemize} + +The output of the linking phase is a regular Unix or Windows +executable file. It does not need "ocamlrun" to run. + +\section{Options} + +The following command-line options are recognized by "ocamlopt". +The options "-pack", "-a", "-shared", "-c" and "-output-obj" are mutually +exclusive. + +% Configure boolean variables used by the macros in unified-options.etex +\compfalse +\nattrue +\topfalse +% unified-options gathers all options across the native/bytecode +% compilers and toplevel +\input{unified-options.tex} + +\paragraph{Options for the IA32 architecture} +The IA32 code generator (Intel Pentium, AMD Athlon) supports the +following additional option: + +\begin{options} +\item["-ffast-math"] Use the IA32 instructions to compute +trigonometric and exponential functions, instead of calling the +corresponding library routines. The functions affected are: +"atan", "atan2", "cos", "log", "log10", "sin", "sqrt" and "tan". +The resulting code runs faster, but the range of supported arguments +and the precision of the result can be reduced. In particular, +trigonometric operations "cos", "sin", "tan" have their range reduced to +$[-2^{64}, 2^{64}]$. +\end{options} + +\paragraph{Options for the AMD64 architecture} +The AMD64 code generator (64-bit versions of Intel Pentium and AMD +Athlon) supports the following additional options: + +\begin{options} +\item["-fPIC"] Generate position-independent machine code. This is +the default. +\item["-fno-PIC"] Generate position-dependent machine code. +\end{options} + +\paragraph{Contextual control of command-line options} + +The compiler command line can be modified ``from the outside'' +with the following mechanisms. These are experimental +and subject to change. They should be used only for experimental and +development work, not in released packages. + +\begin{options} +\item["OCAMLPARAM" \rm(environment variable)] +A set of arguments that will be inserted before or after the arguments from +the command line. Arguments are specified in a comma-separated list +of "name=value" pairs. A "_" is used to specify the position of +the command line arguments, i.e. "a=x,_,b=y" means that "a=x" should be +executed before parsing the arguments, and "b=y" after. Finally, +an alternative separator can be specified as the +first character of the string, within the set ":|; ,". +\item["ocaml_compiler_internal_params" \rm(file in the stdlib directory)] +A mapping of file names to lists of arguments that +will be added to the command line (and "OCAMLPARAM") arguments. +\item["OCAML_FLEXLINK" \rm(environment variable)] +Alternative executable to use on native +Windows for "flexlink" instead of the +configured value. Primarily used for bootstrapping. +\end{options} + +\section{Common errors} + +The error messages are almost identical to those of "ocamlc". +See section~\ref{s:comp-errors}. + +\section{Running executables produced by ocamlopt} + +Executables generated by "ocamlopt" are native, stand-alone executable +files that can be invoked directly. They do +not depend on the "ocamlrun" bytecode runtime system nor on +dynamically-loaded C/OCaml stub libraries. + +During execution of an "ocamlopt"-generated executable, +the following environment variables are also consulted: +\begin{options} +\item["OCAMLRUNPARAM"] Same usage as in "ocamlrun" + (see section~\ref{ocamlrun-options}), except that option "l" + is ignored (the operating system's stack size limit + is used instead). +\item["CAMLRUNPARAM"] If "OCAMLRUNPARAM" is not found in the + environment, then "CAMLRUNPARAM" will be used instead. If + "CAMLRUNPARAM" is not found, then the default values will be used. +\end{options} + +\section{Compatibility with the bytecode compiler} +\label{s:compat-native-bytecode} + +This section lists the known incompatibilities between the bytecode +compiler and the native-code compiler. Except on those points, the two +compilers should generate code that behave identically. + +\begin{itemize} + +\item Signals are detected only when the program performs an +allocation in the heap. That is, if a signal is delivered while in a +piece of code that does not allocate, its handler will not be called +until the next heap allocation. + +\item Stack overflow, typically caused by excessively deep recursion, +is not always turned into a "Stack_overflow" exception like the +bytecode compiler does. The runtime system makes a best effort to +trap stack overflows and raise the "Stack_overflow" exception, but +sometimes it fails and a ``segmentation fault'' or another system fault +occurs instead. + +\item On ARM and PowerPC processors (32 and 64 bits), fused + multiply-add (FMA) instructions can be generated for a + floating-point multiplication followed by a floating-point addition + or subtraction, as in "x *. y +. z". The FMA instruction avoids + rounding the intermediate result "x *. y", which is generally + beneficial, but produces floating-point results that differ slightly + from those produced by the bytecode interpreter. + +\item On IA32 processors only (Intel and AMD x86 processors in 32-bit +mode), some intermediate results in floating-point computations are +kept in extended precision rather than being rounded to double +precision like the bytecode compiler always does. Floating-point +results can therefore differ slightly between bytecode and native code. + +\item The native-code compiler performs a number of optimizations that +the bytecode compiler does not perform, especially when the Flambda +optimizer is active. In particular, the native-code compiler +identifies and eliminates ``dead code'', i.e.\ computations that do +not contribute to the results of the program. For example, +\begin{verbatim} + let _ = ignore M.f +\end{verbatim} +contains a reference to compilation unit "M" when compiled to +bytecode. This reference forces "M" to be linked and its +initialization code to be executed. The native-code compiler +eliminates the reference to "M", hence the compilation unit "M" may +not be linked and executed. A workaround is to compile "M" with the +"-linkall" flag so that it will always be linked and executed, even if +not referenced. See also the "Sys.opaque_identity" function from the +"Sys" standard library module. + +\end{itemize} + diff --git a/manual/manual/cmds/ocamlbuild.etex b/manual/manual/cmds/ocamlbuild.etex new file mode 100644 index 00000000..9a55b48a --- /dev/null +++ b/manual/manual/cmds/ocamlbuild.etex @@ -0,0 +1,6 @@ +\chapter{The ocamlbuild compilation manager} \label{c:ocamlbuild} +\pdfchapter{The ocamlbuild compilation manager} + +Since OCaml version 4.03, the ocamlbuild compilation manager is +distributed separately from the OCaml compiler. The project is now +hosted at \url{https://github.com/ocaml/ocamlbuild/}. diff --git a/manual/manual/cmds/ocamldoc.etex b/manual/manual/cmds/ocamldoc.etex new file mode 100644 index 00000000..d7346bb4 --- /dev/null +++ b/manual/manual/cmds/ocamldoc.etex @@ -0,0 +1,1136 @@ +\chapter{The documentation generator (ocamldoc)} \label{c:ocamldoc} +\pdfchapter{The documentation generator (ocamldoc)} +%HEVEA\cutname{ocamldoc.html} + +This chapter describes OCamldoc, a tool that generates documentation from +special comments embedded in source files. The comments used by OCamldoc +are of the form "(**"\ldots"*)" and follow the format described +in section \ref{s:ocamldoc-comments}. + +OCamldoc can produce documentation in various formats: HTML, \LaTeX , +TeXinfo, Unix man pages, and "dot" dependency graphs. Moreover, +users can add their own custom generators, as explained in +section \ref{s:ocamldoc-custom-generators}. + +In this chapter, we use the word {\em element} to refer to any of the +following parts of an OCaml source file: a type declaration, a value, +a module, an exception, a module type, a type constructor, a record +field, a class, a class type, a class method, a class value or a class +inheritance clause. + +\section{Usage} \label{s:ocamldoc-usage} + +\subsection{Invocation} + +OCamldoc is invoked via the command "ocamldoc", as follows: +\begin{alltt} + ocamldoc \var{options} \var{sourcefiles} +\end{alltt} + +\subsubsection*{Options for choosing the output format} + +The following options determine the format for the generated +documentation. + +\begin{options} +\item["-html"] +Generate documentation in HTML default format. The generated HTML pages +are stored in the current directory, or in the directory specified +with the {\bf\tt -d} option. You can customize the style of the +generated pages by editing the generated "style.css" file, or by providing +your own style sheet using option "-css-style". +The file "style.css" is not generated if it already exists or if -css-style is used. + +\item["-latex"] +Generate documentation in \LaTeX\ default format. The generated +\LaTeX\ document is saved in file "ocamldoc.out", or in the file +specified with the {\bf\tt -o} option. The document uses the style file +"ocamldoc.sty". This file is generated when using the "-latex" option, +if it does not already exist. +You can change this file to customize the style of your \LaTeX\ documentation. + +\item["-texi"] +Generate documentation in TeXinfo default format. The generated +\LaTeX\ document is saved in file "ocamldoc.out", or in the file +specified with the {\bf\tt -o} option. + +\item["-man"] +Generate documentation as a set of Unix "man" pages. The generated pages +are stored in the current directory, or in the directory specified +with the {\bf\tt -d} option. + +\item["-dot"] +Generate a dependency graph for the toplevel modules, in a format suitable +for displaying and processing by "dot". The "dot" tool is available from +\url{http://www.research.att.com/sw/tools/graphviz/}. +The textual representation of the graph is written to the file +"ocamldoc.out", or to the file specified with the {\bf\tt -o} option. +Use "dot ocamldoc.out" to display it. + +\item["-g" \var{file.cm[o,a,xs]}] +Dynamically load the given file, which defines a custom documentation +generator. See section \ref{s:ocamldoc-compilation-and-usage}. This +option is supported by the "ocamldoc" command (to load ".cmo" and ".cma" files) +and by its native-code version "ocamldoc.opt" (to load ".cmxs" files). +If the given file is a simple one and does not exist in +the current directory, then ocamldoc looks for it in the custom +generators default directory, and in the directories specified with +optional "-i" options. + +\item["-customdir"] +Display the custom generators default directory. + +\item["-i" \var{directory}] +Add the given directory to the path where to look for custom generators. + +\end{options} + +\subsubsection*{General options} + +\begin{options} + +\item["-d" \var{dir}] +Generate files in directory \var{dir}, rather than the current directory. + +\item["-dump" \var{file}] +Dump collected information into \var{file}. This information can be +read with the "-load" option in a subsequent invocation of "ocamldoc". + +\item["-hide" \var{modules}] +Hide the given complete module names in the generated documentation. +\var{modules} is a list of complete module names separated + by '","', without blanks. For instance: "Pervasives,M2.M3". + +\item["-inv-merge-ml-mli"] +Reverse the precedence of implementations and interfaces when merging. +All elements +in implementation files are kept, and the {\bf\tt -m} option +indicates which parts of the comments in interface files are merged +with the comments in implementation files. + +\item["-keep-code"] +Always keep the source code for values, methods and instance variables, +when available. + +\item["-load" \var{file}] +Load information from \var{file}, which has been produced by +"ocamldoc -dump". Several "-load" options can be given. + +\item["-m" \var{flags}] +Specify merge options between interfaces and implementations. +(see section \ref{s:ocamldoc-merge} for details). +\var{flags} can be one or several of the following characters: +\begin{options} + \item["d"] merge description + \item["a"] merge "\@author" + \item["v"] merge "\@version" + \item["l"] merge "\@see" + \item["s"] merge "\@since" + \item["b"] merge "\@before" + \item["o"] merge "\@deprecated" + \item["p"] merge "\@param" + \item["e"] merge "\@raise" + \item["r"] merge "\@return" + \item["A"] merge everything +\end{options} + +\item["-no-custom-tags"] +Do not allow custom \@-tags (see section \ref{s:ocamldoc-tags}). + +\item["-no-stop"] +Keep elements placed after/between the "(**/**)" special comment(s) +(see section \ref{s:ocamldoc-comments}). + +\item["-o" \var{file}] +Output the generated documentation to \var{file} instead of "ocamldoc.out". +This option is meaningful only in conjunction with the +{\bf\tt -latex}, {\bf\tt -texi}, or {\bf\tt -dot} options. + +\item["-pp" \var{command}] +Pipe sources through preprocessor \var{command}. + +\item["-impl" \var{filename}] +Process the file \var{filename} as an implementation file, even if its +extension is not ".ml". + +\item["-intf" \var{filename}] +Process the file \var{filename} as an interface file, even if its +extension is not ".mli". + +\item["-text" \var{filename}] +Process the file \var{filename} as a text file, even if its +extension is not ".txt". + +\item["-sort"] +Sort the list of top-level modules before generating the documentation. + +\item["-stars"] +Remove blank characters until the first asterisk ('"*"') in each +line of comments. + +\item["-t" \var{title}] +Use \var{title} as the title for the generated documentation. + +\item["-intro" \var{file}] +Use content of \var{file} as ocamldoc text to use as introduction (HTML, +\LaTeX{} and TeXinfo only). +For HTML, the file is used to create the whole "index.html" file. + +\item["-v"] +Verbose mode. Display progress information. + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-warn-error"] +Treat Ocamldoc warnings as errors. + +\item["-hide-warnings"] +Do not print OCamldoc warnings. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\subsubsection*{Type-checking options} + +OCamldoc calls the OCaml type-checker to obtain type +information. The following options impact the type-checking phase. +They have the same meaning as for the "ocamlc" and "ocamlopt" commands. + +\begin{options} + +\item["-I" \var{directory}] +Add \var{directory} to the list of directories search for compiled +interface files (".cmi" files). + +\item["-nolabels"] +Ignore non-optional labels in types. + +\item["-rectypes"] +Allow arbitrary recursive types. (See the "-rectypes" option to "ocamlc".) + +\end{options} + +\subsubsection*{Options for generating HTML pages} + +The following options apply in conjunction with the "-html" option: + +\begin{options} +\item["-all-params"] +Display the complete list of parameters for functions and methods. + +\item["-charset" \var{charset}] +Add information about character encoding being \var{charset} +(default is iso-8859-1). + +\item["-colorize-code"] +Colorize the OCaml code enclosed in "[ ]" and "{[ ]}", using colors +to emphasize keywords, etc. If the code fragments are not +syntactically correct, no color is added. + +\item["-css-style" \var{filename}] +Use \var{filename} as the Cascading Style Sheet file. + +\item["-index-only"] +Generate only index files. + +\item["-short-functors"] +Use a short form to display functors: +\begin{alltt} +module M : functor (A:Module) -> functor (B:Module2) -> sig .. end +\end{alltt} +is displayed as: +\begin{alltt} +module M (A:Module) (B:Module2) : sig .. end +\end{alltt} + +\end{options} + +\subsubsection*{Options for generating \LaTeX\ files} + +The following options apply in conjunction with the "-latex" option: + +\begin{options} +\item["-latex-value-prefix" \var{prefix}] +Give a prefix to use for the labels of the values in the generated +\LaTeX\ document. +The default prefix is the empty string. You can also use the options +{\tt -latex-type-prefix}, {\tt -latex-exception-prefix}, +{\tt -latex-module-prefix}, +{\tt -latex-module-type-prefix}, {\tt -latex-class-prefix}, +{\tt -latex-class-type-prefix}, +{\tt -latex-attribute-prefix} and {\tt -latex-method-prefix}. + +These options are useful when you have, for example, a type and a value with + the same name. If you do not specify prefixes, \LaTeX\ will complain about +multiply defined labels. + +\item["-latextitle" \var{n,style}] +Associate style number \var{n} to the given \LaTeX\ sectioning command +\var{style}, e.g. "section" or "subsection". (\LaTeX\ only.) This is +useful when including the generated document in another \LaTeX\ document, +at a given sectioning level. The default association is 1 for "section", +2 for "subsection", 3 for "subsubsection", 4 for "paragraph" and 5 for +"subparagraph". + +\item["-noheader"] +Suppress header in generated documentation. + +\item["-notoc"] +Do not generate a table of contents. + +\item["-notrailer"] +Suppress trailer in generated documentation. + +\item["-sepfiles"] +Generate one ".tex" file per toplevel module, instead of the global +"ocamldoc.out" file. +\end{options} + +\subsubsection*{Options for generating TeXinfo files} + +The following options apply in conjunction with the "-texi" option: + +\begin{options} +\item["-esc8"] +Escape accented characters in Info files. + +\item["-info-entry"] +Specify Info directory entry. + +\item["-info-section"] +Specify section of Info directory. + +\item["-noheader"] +Suppress header in generated documentation. + +\item["-noindex"] +Do not build index for Info files. + +\item["-notrailer"] +Suppress trailer in generated documentation. +\end{options} + +\subsubsection*{Options for generating "dot" graphs} + +The following options apply in conjunction with the "-dot" option: + +\begin{options} +\item["-dot-colors" \var{colors}] +Specify the colors to use in the generated "dot" code. +When generating module dependencies, "ocamldoc" uses different colors +for modules, depending on the directories in which they reside. +When generating types dependencies, "ocamldoc" uses different colors +for types, depending on the modules in which they are defined. +\var{colors} is a list of color names separated by '","', as +in "Red,Blue,Green". The available colors are the ones supported by +the "dot" tool. + +\item["-dot-include-all"] +Include all modules in the "dot" output, not only modules given +on the command line or loaded with the {\bf\tt -load} option. + +\item["-dot-reduce"] +Perform a transitive reduction of the dependency graph before +outputting the "dot" code. This can be useful if there are +a lot of transitive dependencies that clutter the graph. + +\item["-dot-types"] +Output "dot" code describing the type dependency graph instead of +the module dependency graph. +\end{options} + +\subsubsection*{Options for generating man files} + +The following options apply in conjunction with the "-man" option: + +\begin{options} +\item["-man-mini"] +Generate man pages only for modules, module types, classes and class +types, instead of pages for all elements. + +\item["-man-suffix" \var{suffix}] +Set the suffix used for generated man filenames. Default is '"3o"', +as in "List.3o". + +\item["-man-section" \var{section}] +Set the section number used for generated man filenames. Default is '"3"'. + +\end{options} + +\subsection{Merging of module information} +\label{s:ocamldoc-merge} + +Information on a module can be extracted either from the ".mli" or ".ml" +file, or both, depending on the files given on the command line. +When both ".mli" and ".ml" files are given for the same module, +information extracted from these files is merged according to the +following rules: +\begin{itemize} +\item Only elements (values, types, classes, ...) declared in the ".mli" +file are kept. In other terms, definitions from the ".ml" file that are +not exported in the ".mli" file are not documented. +\item Descriptions of elements and descriptions in \@-tags are handled +as follows. If a description for the same element or in the same +\@-tag of the same element is present in both files, then the +description of the ".ml" file is concatenated to the one in the ".mli" file, +if the corresponding "-m" flag is given on the command line. +If a description is present in the ".ml" file and not in the +".mli" file, the ".ml" description is kept. +In either case, all the information given in the ".mli" file is kept. +\end{itemize} + +\subsection{Coding rules} +\label{s:ocamldoc-rules} +The following rules must be respected in order to avoid name clashes +resulting in cross-reference errors: +\begin{itemize} +\item In a module, there must not be two modules, two module types or + a module and a module type with the same name. + In the default HTML generator, modules "ab" and "AB" will be printed + to the same file on case insensitive file systems. +\item In a module, there must not be two classes, two class types or + a class and a class type with the same name. +\item In a module, there must not be two values, two types, or two + exceptions with the same name. +\item Values defined in tuple, as in "let (x,y,z) = (1,2,3)" +are not kept by OCamldoc. +\item Avoid the following construction: +\begin{caml_eval} +module Foo = struct module Bar = struct let x = 1 end end;; +\end{caml_eval} +\begin{caml_example*}{verbatim} +open Foo (* which has a module Bar with a value x *) +module Foo = + struct + module Bar = + struct + let x = 1 + end + end + let dummy = Bar.x +\end{caml_example*} +In this case, OCamldoc will associate "Bar.x" to the "x" of module +"Foo" defined just above, instead of to the "Bar.x" defined in the +opened module "Foo". +\end{itemize} + +\section{Syntax of documentation comments} +\label{s:ocamldoc-comments} + +Comments containing documentation material are called {\em special +comments} and are written between "(**" and "*)". Special comments +must start exactly with "(**". Comments beginning with "(" and more +than two "*" are ignored. + +\subsection{Placement of documentation comments} +OCamldoc can associate comments to some elements of the language +encountered in the source files. The association is made according to +the locations of comments with respect to the language elements. The +locations of comments in ".mli" and ".ml" files are different. + +%%%%%%%%%%%%% +\subsubsection{Comments in ".mli" files} +A special comment is associated to an element if it is placed before or +after the element.\\ +A special comment before an element is associated to this element if~: +\begin{itemize} +\item There is no blank line or another special comment between the special +comment and the element. However, a regular comment can occur between +the special comment and the element. +\item The special comment is not already associated to the previous element. +\item The special comment is not the first one of a toplevel module. +\end{itemize} + +A special comment after an element is associated to this element if +there is no blank line or comment between the special comment and the +element. + +There are two exceptions: for constructors and record fields in +type definitions, the associated comment can only be placed after the +constructor or field definition, without blank lines or other comments +between them. The special comment for a constructor +with another constructor following must be placed before the '"|"' +character separating the two constructors. + +The following sample interface file "foo.mli" illustrates the +placement rules for comments in ".mli" files. + +\begin{caml_eval} +class cl = object end +\end{caml_eval} +\begin{caml_example*}{signature} +(** The first special comment of the file is the comment associated + with the whole module.*) + + +(** Special comments can be placed between elements and are kept + by the OCamldoc tool, but are not associated to any element. + @-tags in these comments are ignored.*) + +(*******************************************************************) +(** Comments like the one above, with more than two asterisks, + are ignored. *) + +(** The comment for function f. *) +val f : int -> int -> int +(** The continuation of the comment for function f. *) + +(** Comment for exception My_exception, even with a simple comment + between the special comment and the exception.*) +(* Hello, I'm a simple comment :-) *) +exception My_exception of (int -> int) * int + +(** Comment for type weather *) +type weather = +| Rain of int (** The comment for constructor Rain *) +| Sun (** The comment for constructor Sun *) + +(** Comment for type weather2 *) +type weather2 = +| Rain of int (** The comment for constructor Rain *) +| Sun (** The comment for constructor Sun *) +(** I can continue the comment for type weather2 here + because there is already a comment associated to the last constructor.*) + +(** The comment for type my_record *) +type my_record = { + foo : int ; (** Comment for field foo *) + bar : string ; (** Comment for field bar *) + } + (** Continuation of comment for type my_record *) + +(** Comment for foo *) +val foo : string +(** This comment is associated to foo and not to bar. *) +val bar : string +(** This comment is associated to bar. *) + +(** The comment for class my_class *) +class my_class : + object + (** A comment to describe inheritance from cl *) + inherit cl + + (** The comment for attribute tutu *) + val mutable tutu : string + + (** The comment for attribute toto. *) + val toto : int + + (** This comment is not attached to titi since + there is a blank line before titi, but is kept + as a comment in the class. *) + + val titi : string + + (** Comment for method toto *) + method toto : string + + (** Comment for method m *) + method m : float -> int + end + +(** The comment for the class type my_class_type *) +class type my_class_type = + object + (** The comment for variable x. *) + val mutable x : int + + (** The commend for method m. *) + method m : int -> int +end + +(** The comment for module Foo *) +module Foo : + sig + (** The comment for x *) + val x : int + + (** A special comment that is kept but not associated to any element *) + end + +(** The comment for module type my_module_type. *) +module type my_module_type = + sig + (** The comment for value x. *) + val x : int + + (** The comment for module M. *) + module M : + sig + (** The comment for value y. *) + val y : int + + (* ... *) + end + + end + +\end{caml_example*} + +%%%%%%%%%%%%% +\subsubsection{Comments in {\tt .ml} files} + +A special comment is associated to an element if it is placed before +the element and there is no blank line between the comment and the +element. Meanwhile, there can be a simple comment between the special +comment and the element. There are two exceptions, for +constructors and record fields in type definitions, whose associated +comment must be placed after the constructor or field definition, +without blank line between them. The special comment for a constructor +with another constructor following must be placed before the '"|"' +character separating the two constructors. + +The following example of file "toto.ml" shows where to place comments +in a ".ml" file. + +\begin{caml_example*}{verbatim} +(** The first special comment of the file is the comment associated + to the whole module. *) + +(** The comment for function f *) +let f x y = x + y + +(** This comment is not attached to any element since there is another + special comment just before the next element. *) + +(** Comment for exception My_exception, even with a simple comment + between the special comment and the exception.*) +(* A simple comment. *) +exception My_exception of (int -> int) * int + +(** Comment for type weather *) +type weather = +| Rain of int (** The comment for constructor Rain *) +| Sun (** The comment for constructor Sun *) + +(** The comment for type my_record *) +type my_record = { + foo : int ; (** Comment for field foo *) + bar : string ; (** Comment for field bar *) + } + +(** The comment for class my_class *) +class my_class = + object + (** A comment to describe inheritance from cl *) + inherit cl + + (** The comment for the instance variable tutu *) + val mutable tutu = "tutu" + (** The comment for toto *) + val toto = 1 + val titi = "titi" + (** Comment for method toto *) + method toto = tutu ^ "!" + (** Comment for method m *) + method m (f : float) = 1 + end + +(** The comment for class type my_class_type *) +class type my_class_type = + object + (** The comment for the instance variable x. *) + val mutable x : int + (** The commend for method m. *) + method m : int -> int + end + +(** The comment for module Foo *) +module Foo = + struct + (** The comment for x *) + let x = 0 + (** A special comment in the class, but not associated to any element. *) + end + +(** The comment for module type my_module_type. *) +module type my_module_type = + sig + (* Comment for value x. *) + val x : int + (* ... *) + end +\end{caml_example} + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{The Stop special comment} +The special comment "(**/**)" tells OCamldoc to discard +elements placed after this comment, up to the end of the current +class, class type, module or module type, or up to the next stop comment. +For instance: +\begin{caml_example*}{signature} +class type foo = + object + (** comment for method m *) + method m : string + + (**/**) + + (** This method won't appear in the documentation *) + method bar : int + end + +(** This value appears in the documentation, since the Stop special comment + in the class does not affect the parent module of the class.*) +val foo : string + +(**/**) +(** The value bar does not appear in the documentation.*) +val bar : string +(**/**) + +(** The type t appears since in the documentation since the previous stop comment +toggled off the "no documentation mode". *) +type t = string +\end{caml_example*} + +The {\bf\tt -no-stop} option to "ocamldoc" causes the Stop special +comments to be ignored. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Syntax of documentation comments} + +The inside of documentation comments "(**"\ldots"*)" consists of +free-form text with optional formatting annotations, followed by +optional {\em tags} giving more specific information about parameters, +version, authors, \ldots\ The tags are distinguished by a leading "\@" +character. Thus, a documentation comment has the following shape: +\begin{verbatim} +(** The comment begins with a description, which is text formatted + according to the rules described in the next section. + The description continues until the first non-escaped '@' character. + @author Mr Smith + @param x description for parameter x +*) +\end{verbatim} +Some elements support only a subset of all \@-tags. Tags that are not +relevant to the documented element are simply ignored. For instance, +all tags are ignored when documenting type constructors, record +fields, and class inheritance clauses. Similarly, a "\@param" tag on a +class instance variable is ignored. + +At last, "(**)" is the empty documentation comment. + +%%%%%%%%%%%%% + +% enable section numbering for subsubsections (PR#6189, item 3) +\setcounter{secnumdepth}{3} + +\subsection{Text formatting} + +Here is the BNF grammar for the simple markup language used to format +text descriptions. + +\newpage + +\begin{syntax} +text: {{text-element}} +; +\end{syntax} + +\noindent +\begin{syntaxleft} +\nonterm{text-element}\is{} +\end{syntaxleft} + +\begin{tabular}{rlp{10cm}} +@||@&@ '{' {{ "0" \ldots "9" }} text '}' @ & format @text@ as a section header; + the integer following "{" indicates the sectioning level. \\ +@||@&@ '{' {{ "0" \ldots "9" }} ':' @ \nt{label} @ text '}' @ & + same, but also associate the name \nt{label} to the current point. + This point can be referenced by its fully-qualified label in a + "{!" command, just like any other element. \\ +@||@&@ '{b' text '}' @ & set @text@ in bold. \\ +@||@&@ '{i' text '}' @ & set @text@ in italic. \\ +@||@&@ '{e' text '}' @ & emphasize @text@. \\ +@||@&@ '{C' text '}' @ & center @text@. \\ +@||@&@ '{L' text '}' @ & left align @text@. \\ +@||@&@ '{R' text '}' @ & right align @text@. \\ +@||@&@ '{ul' list '}' @ & build a list. \\ +@||@&@ '{ol' list '}' @ & build an enumerated list. \\ +@||@&@ '{{:' string '}' text '}' @ & put a link to the given address +(given as @string@) on the given @text@. \\ +@||@&@ '[' string ']' @ & set the given @string@ in source code style. \\ +@||@&@ '{[' string ']}' @ & set the given @string@ in preformatted + source code style.\\ +@||@&@ '{v' string 'v}' @ & set the given @string@ in verbatim style. \\ +@||@&@ '{%' string '%}' @ & target-specific content + (\LaTeX\ code by default, see details + in \ref{sss:target-specific-syntax}) \\ +@||@&@ '{!' string '}' @ & insert a cross-reference to an element + (see section \ref{sss:crossref} for the syntax of cross-references).\\ +@||@&@ '{!modules:' string string ... '}' @ & insert an index table +for the given module names. Used in HTML only.\\ +@||@&@ '{!indexlist}' @ & insert a table of links to the various indexes +(types, values, modules, ...). Used in HTML only.\\ +@||@&@ '{^' text '}' @ & set text in superscript.\\ +@||@&@ '{_' text '}' @ & set text in subscript.\\ +@||@& \nt{escaped-string} & typeset the given string as is; +special characters ('"{"', '"}"', '"["', '"]"' and '"\@"') +must be escaped by a '"\\"'\\ +@||@& \nt{blank-line} & force a new line. +\end{tabular} \\ + +\subsubsection{List formatting} + +\begin{syntax} +list: +| {{ '{-' text '}' }} +| {{ '{li' text '}' }} +\end{syntax} + +A shortcut syntax exists for lists and enumerated lists: +\begin{verbatim} +(** Here is a {b list} +- item 1 +- item 2 +- item 3 + +The list is ended by the blank line.*) +\end{verbatim} +is equivalent to: +\begin{verbatim} +(** Here is a {b list} +{ul {- item 1} +{- item 2} +{- item 3}} +The list is ended by the blank line.*) +\end{verbatim} + +The same shortcut is available for enumerated lists, using '"+"' +instead of '"-"'. +Note that only one list can be defined by this shortcut in nested lists. + +\subsubsection{Cross-reference formatting} +\label{sss:crossref} + +Cross-references are fully qualified element names, as in the example +"{!Foo.Bar.t}". This is an ambiguous reference as it may designate +a type name, a value name, a class name, etc. It is possible to make +explicit the intended syntactic class, using "{!type:Foo.Bar.t}" to +designate a type, and "{!val:Foo.Bar.t}" a value of the same name. + +The list of possible syntactic class is as follows: +\begin{center} +\begin{tabular}{rl} +\multicolumn{1}{c}{"tag"} & \multicolumn{1}{c}{syntactic class}\\ \hline +"module:" & module \\ +"modtype:" & module type \\ +"class:" & class \\ +"classtype:" & class type \\ +"val:" & value \\ +"type:" & type \\ +"exception:" & exception \\ +"attribute:" & attribute \\ +"method:" & class method \\ +"section:" & ocamldoc section \\ +"const:" & variant constructor \\ +"recfield:" & record field +\end{tabular} +\end{center} + +In the case of variant constructors or record field, the constructor +or field name should be preceded by the name of the correspond type -- +to avoid the ambiguity of several types having the same constructor +names. For example, the constructor "Node" of the type "tree" will be +referenced as "{!tree.Node}" or "{!const:tree.Node}", or possibly +"{!Mod1.Mod2.tree.Node}" from outside the module. + +\subsubsection{First sentence} + +In the description of a value, type, exception, module, module type, class +or class type, the {\em first sentence} is sometimes used in indexes, or +when just a part of the description is needed. The first sentence +is composed of the first characters of the description, until +\begin{itemize} +\item the first dot followed by a blank, or +\item the first blank line +\end{itemize} +outside of the following text formatting : +@ '{ul' list '}' @, +@ '{ol' list '}' @, +@ '[' string ']' @, +@ '{[' string ']}' @, +@ '{v' string 'v}' @, +@ '{%' string '%}' @, +@ '{!' string '}' @, +@ '{^' text '}' @, +@ '{_' text '}' @. + +\subsubsection{Target-specific formatting} +\label{sss:target-specific-syntax} + +The content inside "{%foo: ... %}" is target-specific and will only be +interpreted by the backend "foo", and ignored by the others. The +backends of the distribution are "latex", "html", "texi" and "man". If +no target is specified (syntax "{% ... %}"), "latex" is chosen by +default. Custom generators may support their own target prefix. + +\subsubsection{Recognized HTML tags} +The HTML tags "..", +"..", +"..", +"
    ..
", +"
    ..
", +"
  • ..
  • ", +"
    ..
    " and +".." can be used instead of, respectively, +@ '{b ..}' @, +@ '[..]' @, +@ '{i ..}' @, +@ '{ul ..}' @, +@ '{ol ..}' @, +@ '{li ..}' @, +@ '{C ..}' @ and +"{[0-9] ..}". + +%disable section numbering for subsubsections +\setcounter{secnumdepth}{2} + +%%%%%%%%%%%%% +\subsection{Documentation tags (\@-tags)} +\label{s:ocamldoc-tags} + +\subsubsection{Predefined tags} +The following table gives the list of predefined \@-tags, with their +syntax and meaning.\\ + +\begin{tabular}{|p{5cm}|p{10cm}|}\hline +@ "@author" string @ & The author of the element. One author per +"\@author" tag. +There may be several "\@author" tags for the same element. \\ \hline + +@ "@deprecated" text @ & The @text@ should describe when the element was +deprecated, what to use as a replacement, and possibly the reason +for deprecation. \\ \hline + +@ "@param" id text @ & Associate the given description (@text@) to the +given parameter name @id@. This tag is used for functions, +methods, classes and functors. \\ \hline + +@ "@raise" Exc text @ & Explain that the element may raise + the exception @Exc@. \\ \hline + +@ "@return" text @ & Describe the return value and + its possible values. This tag is used for functions + and methods. \\ \hline + +@ "@see" '<' URL '>' text @ & Add a reference to the @URL@ +with the given @text@ as comment. \\ \hline + +@ "@see" "'"@\nt{filename}@"'" text @ & Add a reference to the given file name +(written between single quotes), with the given @text@ as comment. \\ \hline + +@ "@see" '"'@\nt{document-name}@'"' text @ & Add a reference to the given +document name (written between double quotes), with the given @text@ +as comment. \\ \hline + +@ "@since" string @ & Indicate when the element was introduced. \\ \hline + +@ "@before" @ \nt{version} @ text @ & Associate the given description (@text@) +to the given \nt{version} in order to document compatibility issues. \\ \hline + +@ "@version" string @ & The version number for the element. \\ \hline +\end{tabular} + +\subsubsection{Custom tags} +\label{s:ocamldoc-custom-tags} +You can use custom tags in the documentation comments, but they will +have no effect if the generator used does not handle them. To use a +custom tag, for example "foo", just put "\@foo" with some text in your +comment, as in: +\begin{verbatim} +(** My comment to show you a custom tag. +@foo this is the text argument to the [foo] custom tag. +*) +\end{verbatim} + +To handle custom tags, you need to define a custom generator, +as explained in section \ref{s:ocamldoc-handling-custom-tags}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Custom generators} +\label{s:ocamldoc-custom-generators} + +OCamldoc operates in two steps: +\begin{enumerate} +\item analysis of the source files; +\item generation of documentation, through a documentation generator, + which is an object of class "Odoc_args.class_generator". +\end{enumerate} +Users can provide their own documentation generator to be used during +step 2 instead of the default generators. +All the information retrieved during the analysis step is available through +the "Odoc_info" module, which gives access to all the types and functions + representing the elements found in the given modules, with their associated +description. + +The files you can use to define custom generators are installed in the +"ocamldoc" sub-directory of the OCaml standard library. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{The generator modules} +The type of a generator module depends on the kind of generated documentation. +Here is the list of generator module types, with the name of the generator +class in the module~: +\begin{itemize} +\item for HTML~: "Odoc_html.Html_generator" (class "html"), +\item for \LaTeX~: "Odoc_latex.Latex_generator" (class "latex"), +\item for TeXinfo~: "Odoc_texi.Texi_generator" (class "texi"), +\item for man pages~: "Odoc_man.Man_generator" (class "man"), +\item for graphviz (dot)~: "Odoc_dot.Dot_generator" (class "dot"), +\item for other kinds~: "Odoc_gen.Base" (class "generator"). +\end{itemize} +That is, to define a new generator, one must implement a module with +the expected signature, and with the given generator class, providing +the "generate" method as entry point to make the generator generates +documentation for a given list of modules~: + +\begin{verbatim} + method generate : Odoc_info.Module.t_module list -> unit +\end{verbatim} + +\noindent{}This method will be called with the list of analysed and possibly +merged "Odoc_info.t_module" structures. + +It is recommended to inherit from the current generator of the same +kind as the one you want to define. Doing so, it is possible to +load various custom generators to combine improvements brought by each one. + +This is done using first class modules (see chapter \ref{s-first-class-modules}). + +The easiest way to define a custom generator is the following this example, +here extending the current HTML generator. We don't have to know if this is +the original HTML generator defined in ocamldoc or if it has been extended +already by a previously loaded custom generator~: + +\begin{verbatim} +module Generator (G : Odoc_html.Html_generator) = +struct + class html = + object(self) + inherit G.html as html + (* ... *) + + method generate module_list = + (* ... *) + () + + (* ... *) + end +end;; + +let _ = Odoc_args.extend_html_generator (module Generator : Odoc_gen.Html_functor);; +\end{verbatim} + +To know which methods to override and/or which methods are available, +have a look at the different base implementations, depending on the +kind of generator you are extending~: +\newcommand\ocamldocsrc[2]{\href{https://github.com/ocaml/ocaml/blob/{\ocamlversion}/ocamldoc/odoc_#1.ml}{#2}} +\begin{itemize} +\item for HTML~: \ocamldocsrc{html}{"odoc_html.ml"}, +\item for \LaTeX~: \ocamldocsrc{latex}{"odoc_latex.ml"}, +\item for TeXinfo~: \ocamldocsrc{texi}{"odoc_texi.ml"}, +\item for man pages~: \ocamldocsrc{man}{"odoc_man.ml"}, +\item for graphviz (dot)~: \ocamldocsrc{dot}{"odoc_dot.ml"}. +\end{itemize} + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Handling custom tags} +\label{s:ocamldoc-handling-custom-tags} + +Making a custom generator handle custom tags (see +\ref{s:ocamldoc-custom-tags}) is very simple. + +\subsubsection*{For HTML} +Here is how to develop a HTML generator handling your custom tags. + +The class "Odoc_html.Generator.html" inherits +from the class "Odoc_html.info", containing a field "tag_functions" which is a +list pairs composed of a custom tag (e.g. "\"foo\"") and a function taking +a "text" and returning HTML code (of type "string"). +To handle a new tag "bar", extend the current HTML generator + and complete the "tag_functions" field: +\begin{verbatim} +module Generator (G : Odoc_html.Html_generator) = +struct + class html = + object(self) + inherit G.html + + (** Return HTML code for the given text of a bar tag. *) + method html_of_bar t = (* your code here *) + + initializer + tag_functions <- ("bar", self#html_of_bar) :: tag_functions + end +end +let _ = Odoc_args.extend_html_generator (module Generator : Odoc_gen.Html_functor);; +\end{verbatim} + +Another method of the class "Odoc_html.info" will look for the +function associated to a custom tag and apply it to the text given to +the tag. If no function is associated to a custom tag, then the method +prints a warning message on "stderr". + +\subsubsection{For other generators} +You can act the same way for other kinds of generators. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Adding command line options} +The command line analysis is performed after loading the module containing the +documentation generator, thus allowing command line options to be added to the + list of existing ones. Adding an option can be done with the function +\begin{verbatim} + Odoc_args.add_option : string * Arg.spec * string -> unit +\end{verbatim} +\noindent{}Note: Existing command line options can be redefined using +this function. + +%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Compilation and usage} +\label{s:ocamldoc-compilation-and-usage} + +%%%%%%%%%%%%%% +\subsubsection{Defining a custom generator class in one file} +Let "custom.ml" be the file defining a new generator class. +Compilation of "custom.ml" can be performed by the following command~: +\begin{alltt} + ocamlc -I +ocamldoc -c custom.ml +\end{alltt} +\noindent{}The file "custom.cmo" is created and can be used this way~: +\begin{alltt} + ocamldoc -g custom.cmo \var{other-options} \var{source-files} +\end{alltt} +\noindent{}Options selecting a built-in generator to "ocamldoc", such as +"-html", have no effect if a custom generator of the same kind is provided using +"-g". If the kinds do not match, the selected built-in generator is used and the +custom one is ignored. + +%%%%%%%%%%%%%% +\subsubsection{Defining a custom generator class in several files} +It is possible to define a generator class in several modules, which +are defined in several files \var{\nth{file}{1}}".ml"["i"], +\var{\nth{file}{2}}".ml"["i"], ..., \var{\nth{file}{n}}".ml"["i"]. A ".cma" +library file must be created, including all these files. + +The following commands create the "custom.cma" file from files +\var{\nth{file}{1}}".ml"["i"], ..., \var{\nth{file}{n}}".ml"["i"]~: +\begin{alltt} +ocamlc -I +ocamldoc -c \var{\nth{file}{1}}.ml\textrm{[}i\textrm{]} +ocamlc -I +ocamldoc -c \var{\nth{file}{2}}.ml\textrm{[}i\textrm{]} +... +ocamlc -I +ocamldoc -c \var{\nth{file}{n}}.ml\textrm{[}i\textrm{]} +ocamlc -o custom.cma -a \var{\nth{file}{1}}.cmo \var{\nth{file}{2}}.cmo ... \var{\nth{file}{n}}.cmo +\end{alltt} +\noindent{}Then, the following command uses "custom.cma" as custom generator: +\begin{alltt} + ocamldoc -g custom.cma \var{other-options} \var{source-files} +\end{alltt} diff --git a/manual/manual/cmds/plugins.etex b/manual/manual/cmds/plugins.etex new file mode 100644 index 00000000..7fd02e4f --- /dev/null +++ b/manual/manual/cmds/plugins.etex @@ -0,0 +1,88 @@ +\chapter{Compiler plugins\label{c:plugins}} +\pdfchapterfold{-9}{Compiler plugind} +%HEVEA\cutname{plugins.html} + +\section{Overview} + +Starting from OCaml 4.03, it is possible to extend the native and bytecode compilers +with plugins using the "-plugin" command line option of both tools. +This possibility is also available for "ocamldep" for OCaml version ulterior to 4.05. +Beware however that plugins are an advanced feature of which the design +is still in flux and breaking changes may happen in the future. Plugins features +are based on the compiler library API. In complement, new hooks have been added to +the compiler to increase its flexibility. + +In particular, hooks are available in the +\ifouthtml\ahref{libref/Pparse.html}{\texttt{Pparse} module} +\else\texttt{Pparse} module (see section~\ref{Pparse})\fi +to transform the parsed abstract syntax tree, providing similar functionality +to extension point based preprocessors. +Other hooks are available to analyze the typed tree in the +\ifouthtml\ahref{libref/Typemod.html}{\texttt{Typemod} module} +\else\texttt{Typemod} module (see section~\ref{Typemod})\fi +after the type-checking phase of the compiler. Since the typed tree relies +on numerous invariants that play a vital part in ulterior phases of the +compiler, it is not possible however to transform the typed tree. +Similarly, the intermediary lambda representation can be modified by using the +hooks provided in the +\ifouthtml\ahref{libref/Simplif.html}{\texttt{Simplif} module} +\else\texttt{Simplif} module (see section~\ref{Simplif})\fi. +A plugin can also add new options to a tool through the +"Clflags.add_arguments" function (see +\ifouthtml\ahref{libref/Clflags.html}{\texttt{Clflags} module} +\else\texttt{Clflags} module (see section~\ref{Clflags})\fi). + +Plugins are dynamically loaded and need to be compiled in the same mode (i.e. +native or bytecode) that the tool they extend. + +\section{Basic example} + +As an illustration, we shall build a simple "Hello world" plugin that adds +a simple statement "print_endline \"Hello from:$sourcefile\"" to a compiled file. + +The simplest way to implement this feature is to modify the abstract syntax +tree. We will therefore add an hooks to the "Pparse.ImplementationHooks". +Since the proposed modification is very basic, we could implement the hook +directly. However, for the sake of this illustration, we use the "Ast_mapper" +structure that provides a better path to build more interesting plugins. + +The first step is to build the AST fragment corresponding to the +evaluation of "print_endline": +\begin{verbatim} + let print_endline name = + let open Ast_helper in + let print_endline = Exp.ident + @@ Location.mknoloc @@Longident.Lident "print_endline" in + let hello = Exp.constant @@ Const.string @@ "Hello from: " ^ name in + Str.eval @@ Exp.apply print_endline [Asttypes.Nolabel, hello] +\end{verbatim}% +Then, we can construct an ast mapper that adds this fragment to the parsed +ast tree. +\begin{verbatim} +let add_hello name (mapper:Ast_mapper.mapper) structure = + let default = Ast_mapper.default_mapper in + (print_endline name) :: (default.structure default structure) + +let ast_mapper name = + { Ast_mapper.default_mapper with structure = add_hello name } +\end{verbatim}% +% +Once this AST mapper is constructed, we need to convert it to a hook and adds this +hook to the "Pparse.ImplementationsHooks". +\begin{verbatim} +let transform hook_info structure = + let astm = ast_mapper hook_info.Misc.sourcefile in + astm.structure astm structure + +let () = Pparse.ImplementationHooks.add_hook "Hello world hook" transform +\end{verbatim} +% +The resulting simplistic plugin can then be compiled with +\begin{verbatim} +$ ocamlopt -I +compiler-libs -shared plugin.ml -o plugin.cmxs +\end{verbatim} +% +Compiling other files with this plugin enabled is then as simple as +\begin{verbatim} +$ ocamlopt -plugin plugin.cmxs test.ml -o test +\end{verbatim} diff --git a/manual/manual/cmds/profil.etex b/manual/manual/cmds/profil.etex new file mode 100644 index 00000000..31b6ed95 --- /dev/null +++ b/manual/manual/cmds/profil.etex @@ -0,0 +1,176 @@ +\chapter{Profiling (ocamlprof)} \label{c:profiler} +\pdfchapter{Profiling (ocamlprof)} +%HEVEA\cutname{profil.html} + +This chapter describes how the execution of OCaml +programs can be profiled, by recording how many times functions are +called, branches of conditionals are taken, \ldots + +\section{Compiling for profiling} + +Before profiling an execution, the program must be compiled in +profiling mode, using the "ocamlcp" front-end to the "ocamlc" compiler +(see chapter~\ref{c:camlc}) or the "ocamloptp" front-end to the +"ocamlopt" compiler (see chapter~\ref{c:nativecomp}). When compiling +modules separately, "ocamlcp" or "ocamloptp" must be used when +compiling the modules (production of ".cmo" or ".cmx" files), and can +also be used (though this is not strictly necessary) when linking them +together. + +\paragraph{Note} If a module (".ml" file) doesn't have a corresponding +interface (".mli" file), then compiling it with "ocamlcp" will produce +object files (".cmi" and ".cmo") that are not compatible with the ones +produced by "ocamlc", which may lead to problems (if the ".cmi" or +".cmo" is still around) when switching between profiling and +non-profiling compilations. To avoid this problem, you should always +have a ".mli" file for each ".ml" file. The same problem exists with +"ocamloptp". + +\paragraph{Note} To make sure your programs can be compiled in +profiling mode, avoid using any identifier that begins with +"__ocaml_prof". + +The amount of profiling information can be controlled through the "-P" +option to "ocamlcp" or "ocamloptp", followed by one or several letters +indicating which parts of the program should be profiled: + +%% description des options +\begin{options} +\item["a"] all options +\item["f"] function calls : a count point is set at the beginning of +each function body +\item["i"] {\bf if \ldots then \ldots else \ldots} : count points are set in +both {\bf then} branch and {\bf else} branch +\item["l"] {\bf while, for} loops: a count point is set at the beginning of +the loop body +\item["m"] {\bf match} branches: a count point is set at the beginning of the +body of each branch +\item["t"] {\bf try \ldots with \ldots} branches: a count point is set at the +beginning of the body of each branch +\end{options} + +For instance, compiling with "ocamlcp -P film" profiles function calls, +if\ldots then\ldots else\ldots, loops and pattern matching. + +Calling "ocamlcp" or "ocamloptp" without the "-P" option defaults to +"-P fm", meaning that only function calls and pattern matching are +profiled. + +\paragraph{Note} For compatibility with previous releases, "ocamlcp" +also accepts the "-p" option, with the same arguments and behaviour as +"-P". + +The "ocamlcp" and "ocamloptp" commands also accept all the options of +the corresponding "ocamlc" or "ocamlopt" compiler, except the "-pp" +(preprocessing) option. + + +\section{Profiling an execution} + +Running an executable that has been compiled with "ocamlcp" or +"ocamloptp" records the execution counts for the specified parts of +the program and saves them in a file called "ocamlprof.dump" in the +current directory. + +If the environment variable "OCAMLPROF_DUMP" is set when the program +exits, its value is used as the file name instead of "ocamlprof.dump". + +The dump file is written only if the program terminates +normally (by calling "exit" or by falling through). It is not written +if the program terminates with an uncaught exception. + +If a compatible dump file already exists in the current directory, then the +profiling information is accumulated in this dump file. This allows, for +instance, the profiling of several executions of a program on +different inputs. Note that dump files produced by byte-code +executables (compiled with "ocamlcp") are compatible with the dump +files produced by native executables (compiled with "ocamloptp"). + +\section{Printing profiling information} + +The "ocamlprof" command produces a source listing of the program modules +where execution counts have been inserted as comments. For instance, +\begin{verbatim} + ocamlprof foo.ml +\end{verbatim} +prints the source code for the "foo" module, with comments indicating +how many times the functions in this module have been called. Naturally, +this information is accurate only if the source file has not been modified +after it was compiled. + +The following options are recognized by "ocamlprof": + +\begin{options} + +\item["-args" \var{filename}] + Read additional newline-terminated command line arguments from \var{filename}. + +\item["-args0" \var{filename}] + Read additional null character terminated command line arguments from \var{filename}. + +\item["-f" \var{dumpfile}] +Specifies an alternate dump file of profiling information to be read. + +\item["-F" \var{string}] +Specifies an additional string to be output with profiling information. +By default, "ocamlprof" will annotate programs with comments of the form +{\tt (* \var{n} *)} where \var{n} is the counter value for a profiling +point. With option {\tt -F \var{s}}, the annotation will be +{\tt (* \var{s}\var{n} *)}. + +\item["-impl" \var{filename}] +Process the file \var{filename} as an implementation file, even if its +extension is not ".ml". + +\item["-intf" \var{filename}] +Process the file \var{filename} as an interface file, even if its +extension is not ".mli". + +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-help" or "--help"] +Display a short usage summary and exit. +% +\end{options} + +\section{Time profiling} + +Profiling with "ocamlprof" only records execution counts, not the actual +time spent within each function. There is currently no way to perform +time profiling on bytecode programs generated by "ocamlc". + +Native-code programs generated by "ocamlopt" can be profiled for time +and execution counts using the "-p" option and the standard Unix +profiler "gprof". Just add the "-p" option when compiling and linking +the program: +\begin{alltt} + ocamlopt -o myprog -p \var{other-options} \var{files} + ./myprog + gprof myprog +\end{alltt} +OCaml function names in the output of "gprof" have the following format: +\begin{alltt} + \var{Module-name}_\var{function-name}_\var{unique-number} +\end{alltt} +Other functions shown are either parts of the OCaml run-time system or +external C functions linked with the program. + +The output of "gprof" is described in the Unix manual page for +"gprof(1)". It generally consists of two parts: a ``flat'' profile +showing the time spent in each function and the number of invocation +of each function, and a ``hierarchical'' profile based on the call +graph. Currently, only the Intel x86 ports of "ocamlopt" under +Linux, BSD and MacOS X support the two profiles. On other platforms, +"gprof" will report only the ``flat'' profile with just time +information. When reading the output of "gprof", keep in mind that +the accumulated times computed by "gprof" are based on heuristics and +may not be exact. + +\paragraph{Note} The "ocamloptp" command also accepts the "-p" +option. In that case, both kinds of profiling are performed by the +program, and you can display the results with the "gprof" and "ocamlprof" +commands, respectively. diff --git a/manual/manual/cmds/runtime.etex b/manual/manual/cmds/runtime.etex new file mode 100644 index 00000000..97232f67 --- /dev/null +++ b/manual/manual/cmds/runtime.etex @@ -0,0 +1,281 @@ +\chapter{The runtime system (ocamlrun)} \label{c:runtime} +\pdfchapter{The runtime system (ocamlrun)} +%HEVEA\cutname{runtime.html} + +The "ocamlrun" command executes bytecode files produced by the +linking phase of the "ocamlc" command. + +\section{Overview} + +The "ocamlrun" command comprises three main parts: the bytecode +interpreter, that actually executes bytecode files; the memory +allocator and garbage collector; and a set of C functions that +implement primitive operations such as input/output. + +The usage for "ocamlrun" is: +\begin{alltt} + ocamlrun \var{options} \var{bytecode-executable} \nth{arg}{1} ... \nth{arg}{n} +\end{alltt} +The first non-option argument is taken to be the name of the file +containing the executable bytecode. (That file is searched in the +executable path as well as in the current directory.) The remaining +arguments are passed to the OCaml program, in the string array +"Sys.argv". Element 0 of this array is the name of the +bytecode executable file; elements 1 to \var{n} are the remaining +arguments \nth{arg}{1} to \nth{arg}{n}. + +As mentioned in chapter~\ref{c:camlc}, the bytecode executable files +produced by the "ocamlc" command are self-executable, and manage to +launch the "ocamlrun" command on themselves automatically. That is, +assuming "a.out" is a bytecode executable file, +\begin{alltt} + a.out \nth{arg}{1} ... \nth{arg}{n} +\end{alltt} +works exactly as +\begin{alltt} + ocamlrun a.out \nth{arg}{1} ... \nth{arg}{n} +\end{alltt} +Notice that it is not possible to pass options to "ocamlrun" when +invoking "a.out" directly. + +\begin{windows} +Under several versions of Windows, bytecode executable files are +self-executable only if their name ends in ".exe". It is recommended +to always give ".exe" names to bytecode executables, e.g. compile +with "ocamlc -o myprog.exe ..." rather than "ocamlc -o myprog ...". +\end{windows} + +\section{Options} \label{ocamlrun-options} + +The following command-line options are recognized by "ocamlrun". + +\begin{options} + +\item["-b"] +When the program aborts due to an uncaught exception, print a detailed +``back trace'' of the execution, showing where the exception was +raised and which function calls were outstanding at this point. The +back trace is printed only if the bytecode executable contains +debugging information, i.e. was compiled and linked with the "-g" +option to "ocamlc" set. This is equivalent to setting the "b" flag +in the "OCAMLRUNPARAM" environment variable (see below). +\item["-I" \var{dir}] +Search the directory \var{dir} for dynamically-loaded libraries, +in addition to the standard search path (see +section~\ref{s-ocamlrun-dllpath}). +\item["-p"] +Print the names of the primitives known to this version of +"ocamlrun" and exit. +\item["-v"] +Direct the memory manager to print some progress messages on +standard error. This is equivalent to setting "v=63" in the +"OCAMLRUNPARAM" environment variable (see below). +\item["-version"] +Print version string and exit. +\item["-vnum"] +Print short version number and exit. + +\end{options} + +\noindent +The following environment variables are also consulted: + +\begin{options} +\item["CAML_LD_LIBRARY_PATH"] Additional directories to search for + dynamically-loaded libraries (see section~\ref{s-ocamlrun-dllpath}). + +\item["OCAMLLIB"] The directory containing the OCaml standard + library. (If "OCAMLLIB" is not set, "CAMLLIB" will be used instead.) + Used to locate the "ld.conf" configuration file for + dynamic loading (see section~\ref{s-ocamlrun-dllpath}). If not set, + default to the library directory specified when compiling OCaml. + +\item["OCAMLRUNPARAM"] Set the runtime system options + and garbage collection parameters. + (If "OCAMLRUNPARAM" is not set, "CAMLRUNPARAM" will be used instead.) + This variable must be a sequence of parameter specifications separated + by commas. + A parameter specification is an option letter followed by an "=" + sign, a decimal number (or an hexadecimal number prefixed by "0x"), + and an optional multiplier. The options are documented below; + the last six correspond to the fields of the + "control" record documented in +\ifouthtml + \ahref{libref/Gc.html}{Module \texttt{Gc}}. +\else + section~\ref{Gc}. +\fi + \begin{options} + \item[b] (backtrace) Trigger the printing of a stack backtrace + when an uncaught exception aborts the program. + This option takes no argument. + \item[p] (parser trace) Turn on debugging support for + "ocamlyacc"-generated parsers. When this option is on, + the pushdown automaton that executes the parsers prints a + trace of its actions. This option takes no argument. + \item[R] (randomize) Turn on randomization of all hash tables by default + (see +\ifouthtml + \ahref{libref/Hashtbl.html}{Module \texttt{Hashtbl}}). +\else + section~\ref{Hashtbl}). +\fi + This option takes no argument. + \item[h] The initial size of the major heap (in words). + \item[a] ("allocation_policy") The policy used for allocating in the + OCaml heap. Possible values are 0 for the next-fit policy, and 1 + for the first-fit policy. Next-fit is usually faster, but first-fit + is better for avoiding fragmentation and the associated heap + compactions. + \item[s] ("minor_heap_size") Size of the minor heap. (in words) + \item[i] ("major_heap_increment") Default size increment for the + major heap. (in words) + \item[o] ("space_overhead") The major GC speed setting. + \item[O] ("max_overhead") The heap compaction trigger setting. + \item[l] ("stack_limit") The limit (in words) of the stack size. + \item[v] ("verbose") What GC messages to print to stderr. This + is a sum of values selected from the following: + \begin{options} + \item[1 (= 0x001)] Start of major GC cycle. + \item[2 (= 0x002)] Minor collection and major GC slice. + \item[4 (= 0x004)] Growing and shrinking of the heap. + \item[8 (= 0x008)] Resizing of stacks and memory manager tables. + \item[16 (= 0x010)] Heap compaction. + \item[32 (= 0x020)] Change of GC parameters. + \item[64 (= 0x040)] Computation of major GC slice size. + \item[128 (= 0x080)] Calling of finalization functions + \item[256 (= 0x100)] Startup messages (loading the bytecode + executable file, resolving shared libraries). + \item[512 (= 0x200)] Computation of compaction-triggering condition. + \item[1024 (= 0x400)] Output GC statistics at program exit. + \end{options} + \item[c] ("cleanup_on_exit") Shut the runtime down gracefully on exit (see + "caml_shutdown" in section~\ref{s:embedded-code}). The option also enables + pooling (as in "caml_startup_pooled"). This mode can be used to detect + leaks with a third-party memory debugger. + \end{options} + The multiplier is "k", "M", or "G", for multiplication by $2^{10}$, + $2^{20}$, and $2^{30}$ respectively. + + If the option letter is not recognized, the whole parameter is ignored; + if the equal sign or the number is missing, the value is taken as 1; + if the multiplier is not recognized, it is ignored. + + For example, on a 32-bit machine, under "bash" the command +\begin{verbatim} + export OCAMLRUNPARAM='b,s=256k,v=0x015' +\end{verbatim} + tells a subsequent "ocamlrun" to print backtraces for uncaught exceptions, + set its initial minor heap size to 1~megabyte and + print a message at the start of each major GC cycle, when the heap + size changes, and when compaction is triggered. + +\item["CAMLRUNPARAM"] If "OCAMLRUNPARAM" is not found in the + environment, then "CAMLRUNPARAM" will be used instead. If + "CAMLRUNPARAM" is also not found, then the default values will be used. + +\item["PATH"] List of directories searched to find the bytecode +executable file. +\end{options} + +\section{Dynamic loading of shared libraries} \label{s-ocamlrun-dllpath} + +On platforms that support dynamic loading, "ocamlrun" can link +dynamically with C shared libraries (DLLs) providing additional C primitives +beyond those provided by the standard runtime system. The names for +these libraries are provided at link time as described in +section~\ref{dynlink-c-code}), and recorded in the bytecode executable +file; "ocamlrun", then, locates these libraries and resolves references +to their primitives when the bytecode executable program starts. + +The "ocamlrun" command searches shared libraries in the following +directories, in the order indicated: +\begin{enumerate} +\item Directories specified on the "ocamlrun" command line with the +"-I" option. +\item Directories specified in the "CAML_LD_LIBRARY_PATH" environment +variable. +\item Directories specified at link-time via the "-dllpath" option to +"ocamlc". (These directories are recorded in the bytecode executable +file.) +\item Directories specified in the file "ld.conf". This file resides +in the OCaml standard library directory, and lists directory +names (one per line) to be searched. Typically, it contains only one +line naming the "stublibs" subdirectory of the OCaml standard +library directory. Users can add there the names of other directories +containing frequently-used shared libraries; however, for consistency +of installation, we recommend that shared libraries are installed +directly in the system "stublibs" directory, rather than adding lines +to the "ld.conf" file. +\item Default directories searched by the system dynamic loader. +Under Unix, these generally include "/lib" and "/usr/lib", plus the +directories listed in the file "/etc/ld.so.conf" and the environment +variable "LD_LIBRARY_PATH". Under Windows, these include the Windows +system directories, plus the directories listed in the "PATH" +environment variable. +\end{enumerate} + +\section{Common errors} + +This section describes and explains the most frequently encountered +error messages. + +\begin{options} + +\item[{\it filename}": no such file or directory"] +If {\it filename} is the name of a self-executable bytecode file, this +means that either that file does not exist, or that it failed to run +the "ocamlrun" bytecode interpreter on itself. The second possibility +indicates that OCaml has not been properly installed on your +system. + +\item["Cannot exec ocamlrun"] +(When launching a self-executable bytecode file.) The "ocamlrun" + could not be found in the executable path. Check that OCaml + has been properly installed on your system. + +\item["Cannot find the bytecode file"] +The file that "ocamlrun" is trying to execute (e.g. the file given as +first non-option argument to "ocamlrun") either does not exist, or is +not a valid executable bytecode file. + +\item["Truncated bytecode file"] +The file that "ocamlrun" is trying to execute is not a valid executable +bytecode file. Probably it has been truncated or mangled since +created. Erase and rebuild it. + +\item["Uncaught exception"] +The program being executed contains a ``stray'' exception. That is, +it raises an exception at some point, and this exception is never +caught. This causes immediate termination of the program. The name of +the exception is printed, along with its string, byte sequence, and +integer arguments +(arguments of more complex types are not correctly printed). +To locate the context of the uncaught exception, compile the program +with the "-g" option and either run it again under the "ocamldebug" +debugger (see chapter~\ref{c:debugger}), or run it with "ocamlrun -b" +or with the "OCAMLRUNPARAM" environment variable set to "b=1". + +\item["Out of memory"] +The program being executed requires more memory than available. Either +the program builds excessively large data structures; or the program +contains too many nested function calls, and the stack overflows. In +some cases, your program is perfectly correct, it just requires more +memory than your machine provides. In other cases, the ``out of +memory'' message reveals an error in your program: non-terminating +recursive function, allocation of an excessively large array, +string or byte sequence, attempts to build an infinite list or other +data structure, \ldots + +To help you diagnose this error, run your program with the "-v" option +to "ocamlrun", or with the "OCAMLRUNPARAM" environment variable set to +"v=63". If it displays lots of ``"Growing stack"\ldots'' +messages, this is probably a looping recursive function. If it +displays lots of ``"Growing heap"\ldots'' messages, with the heap size +growing slowly, this is probably an attempt to construct a data +structure with too many (infinitely many?) cells. If it displays few +``"Growing heap"\ldots'' messages, but with a huge increment in the +heap size, this is probably an attempt to build an excessively large +array, string or byte sequence. + +\end{options} diff --git a/manual/manual/cmds/spacetime.etex b/manual/manual/cmds/spacetime.etex new file mode 100644 index 00000000..7abc7095 --- /dev/null +++ b/manual/manual/cmds/spacetime.etex @@ -0,0 +1,126 @@ +\chapter{Memory profiling with Spacetime} +\pdfchapterfold{-9}{Memory profiling with Spacetime} +%HEVEA\cutname{spacetime.html} + +\section{Overview} + +Spacetime is the name given to functionality within the OCaml compiler that +provides for accurate profiling of the memory behaviour of a program. +Using Spacetime it is possible to determine the source of memory leaks +and excess memory allocation quickly and easily. Excess allocation slows +programs down both by imposing a higher load on the garbage collector and +reducing the cache locality of the program's code. Spacetime provides +full backtraces for every allocation that occurred on the OCaml heap +during the lifetime of the program including those in C stubs. + +Spacetime only analyses the memory behaviour of a program with respect to +the OCaml heap allocators and garbage collector. It does not analyse +allocation on the C heap. Spacetime does not affect the memory behaviour +of a program being profiled with the exception of any change caused by the +overhead of profiling (see section\ \ref{runtimeoverhead})---for example +the program running slower might cause it to allocate less memory in total. + +Spacetime is currently only available for x86-64 targets and has only been +tested on Linux systems (although it is expected to work on most modern +Unix-like systems and provision has been made for running under +Windows). It is expected that the set of supported platforms will +be extended in the future. + +\section{How to use it} + +\subsection{Building} + +To use Spacetime it is necessary to use an OCaml compiler that was +configured with the {\tt -spacetime} option. It is not possible to select +Spacetime on a per-source-file basis or for a subset of files in a project; +all files involved in the executable being profiled must be built with the +Spacetime compiler. Only native code compilation is supported (not +bytecode). + +If the {\tt libunwind} library is not available on the system then it will +not be possible for Spacetime to profile allocations occurring within +C stubs. If the {\tt libunwind} library is available but in an unusual +location then that location may be specified to the {\tt configure} script +using the {\tt -libunwinddir} option (or alternatively, using separate +{\tt -libunwindinclude} and {\tt -libunwindlib} options). + +OPAM switches will be provided for Spacetime-configured compilers. + +Once the appropriate compiler has been selected the program should be +built as normal (ensuring that all files are built with the Spacetime +compiler---there is currently no protection to ensure this is the case, but +it is essential). For many uses it will not be necessary to change the +code of the program to use the profiler. + +Spacetime-configured compilers run slower and occupy more memory than their +counterparts. It is hoped this will be fixed in the future as part of +improved cross compilation support. + +\subsection{Running} + +Programs built with Spacetime instrumentation have a dependency on +the {\tt libunwind} library unless that was unavailable at configure time or +the {\tt -disable-libunwind} option was specified +(see section\ \ref{runtimeoverhead}). + +Setting the {\tt OCAML\_SPACETIME\_INTERVAL} environment variable to an +integer representing a number of milliseconds before running a program built +with Spacetime will cause memory profiling to be in operation when the +program is started. The contents of the OCaml heap will be sampled each +time the number of milliseconds that the program has spent executing since the +last sample exceeds the given number. (Note that the time base is combined +user plus system time---{\em not} wall clock time. This peculiarity may be +changed in future.) + +The program being profiled must exit normally or be caused to exit using +the {\tt SIGINT} signal (e.g. by pressing Ctrl+C). When the program exits +files will be written in the directory that was the working directory when +the program was started. One Spacetime file will be written for each +process that was involved, indexed by process ID; there will normally only +be one such. The Spacetime files may be substantial. The directory to which +they are written may be overridden by setting +the {\tt OCAML\_SPACETIME\_SNAPSHOT\_DIR} environment variable before the +program is started. + +Instead of using the automatic snapshot facility described above it is also +possible to manually control Spacetime profiling. (The environment variables +{\tt OCAML\_SPACETIME\_INTERVAL} and {\tt OCAML\_SPACETIME\_SNAPSHOT\_DIR} +are then not relevant.) Full documentation as regards this method of profiling +is provided in the standard library documentation (section\ \ref{c:stdlib}) +for the {\tt Spacetime} module. + +\subsection{Analysis} + +The compiler distribution does not itself provide the facility for analysing +Spacetime output files; this is left to external tools. The first such tool +will appear in OPAM as a package called {\tt prof_spacetime}. That tool will +provide interactive graphical and terminal-based visualisation of +the results of profiling. + +\section{Runtime overhead}\label{runtimeoverhead} + +The runtime overhead imposed by Spacetime varies considerably depending on +the particular program being profiled. The overhead may be as low as +ten percent---but more usually programs should be expected to run at perhaps +a third or quarter of their normal speed. It is expected that this overhead +will be reduced in future versions of the compiler. + +Execution speed of instrumented programs may be increased by using a compiler +configured with the {\tt -disable-libunwind} option. This prevents collection +of profiling information from C stubs. + +Programs running with Spacetime instrumentation consume significantly more +memory than their non-instrumented counterparts. It is expected that this +memory overhead will also be reduced in the future. + +\section{For developers} + +The compiler distribution provides an ``{\tt otherlibs}'' library called +{\tt raw\_spacetime\_lib} for decoding Spacetime files. This library +provides facilities to read not only memory profiling information but also +the full dynamic call graph of the profiled program which is written into +Spacetime output files. + +A library package {\tt spacetime\_lib} will be provided in OPAM +to provide an interface for decoding profiling information at a higher +level than that provided by {\tt raw\_spacetime\_lib}. diff --git a/manual/manual/cmds/top.etex b/manual/manual/cmds/top.etex new file mode 100644 index 00000000..0ac39a22 --- /dev/null +++ b/manual/manual/cmds/top.etex @@ -0,0 +1,451 @@ +\chapter{The toplevel system or REPL (ocaml)} \label{c:camllight} +\pdfchapter{The toplevel system or REPL (ocaml)} +%HEVEA\cutname{toplevel.html} + +This chapter describes the toplevel system for OCaml, that permits +interactive use of the OCaml system +through a read-eval-print loop (REPL). In this mode, the system repeatedly +reads OCaml phrases from the input, then typechecks, compile and +evaluate them, then prints the inferred type and result value, if +any. The system prints a "#" (sharp) prompt before reading each +phrase. + +Input to the toplevel can span several lines. It is terminated by @";;"@ (a +double-semicolon). The toplevel input consists in one or several +toplevel phrases, with the following syntax: + +\begin{syntax} +toplevel-input: + {{ definition }} ';;' + | expr ';;' + | '#' ident [ directive-argument ] ';;' +; +directive-argument: + string-literal + | integer-literal + | value-path + | 'true' || 'false' +\end{syntax} + +A phrase can consist of a definition, like those found in +implementations of compilation units or in @'struct' \ldots 'end'@ +module expressions. The definition can bind value names, type names, +an exception, a module name, or a module type name. The toplevel +system performs the bindings, then prints the types and values (if +any) for the names thus defined. + +A phrase may also consist in a value expression +(section~\ref{s:value-expr}). It is simply evaluated +without performing any bindings, and its value is +printed. + +Finally, a phrase can also consist in a toplevel directive, +starting with @"#"@ (the sharp sign). These directives control the +behavior of the toplevel; they are listed below in +section~\ref{s:toplevel-directives}. + +\begin{unix} +The toplevel system is started by the command "ocaml", as follows: +\begin{alltt} + ocaml \var{options} \var{objects} # interactive mode + ocaml \var{options} \var{objects} \var{scriptfile} # script mode +\end{alltt} +\var{options} are described below. +\var{objects} are filenames ending in ".cmo" or ".cma"; they are +loaded into the interpreter immediately after \var{options} are set. +\var{scriptfile} is any file name not ending in ".cmo" or ".cma". + +If no \var{scriptfile} is given on the command line, the toplevel system +enters interactive mode: phrases are read on standard input, results +are printed on standard output, errors on standard error. End-of-file +on standard input terminates "ocaml" (see also the "#quit" directive +in section~\ref{s:toplevel-directives}). + +On start-up (before the first phrase is read), if the file +".ocamlinit" exists in the current directory, +its contents are read as a sequence of OCaml phrases +and executed as per the "#use" directive +described in section~\ref{s:toplevel-directives}. +The evaluation outcode for each phrase are not displayed. +If the current directory does not contain an ".ocamlinit" file, but +the user's home directory (environment variable "HOME") does, the +latter is read and executed as described below. + +The toplevel system does not perform line editing, but it can +easily be used in conjunction with an external line editor such as +"ledit", "ocaml2" or "rlwrap" +\begin{latexonly} +(see the Caml Hump "http://caml.inria.fr/humps/index_framed_caml.html"). +\end{latexonly} +\begin{htmlonly} +(see the +\ahref{http://caml.inria.fr/humps/index\_framed\_caml.html}{Caml Hump}). +\end{htmlonly} +Another option is to use "ocaml" under Gnu Emacs, which gives the +full editing power of Emacs (command "run-caml" from library "inf-caml"). + +At any point, the parsing, compilation or evaluation of the current +phrase can be interrupted by pressing "ctrl-C" (or, more precisely, +by sending the "INTR" signal to the "ocaml" process). The toplevel +then immediately returns to the "#" prompt. + +If \var{scriptfile} is given on the command-line to "ocaml", the toplevel +system enters script mode: the contents of the file are read as a +sequence of OCaml phrases and executed, as per the "#use" +directive (section~\ref{s:toplevel-directives}). The outcome of the +evaluation is not printed. On reaching the end of file, the "ocaml" +command exits immediately. No commands are read from standard input. +"Sys.argv" is transformed, ignoring all OCaml parameters, and +starting with the script file name in "Sys.argv.(0)". + +In script mode, the first line of the script is ignored if it starts +with "#!". Thus, it should be possible to make the script +itself executable and put as first line "#!/usr/local/bin/ocaml", +thus calling the toplevel system automatically when the script is +run. However, "ocaml" itself is a "#!" script on most installations +of OCaml, and Unix kernels usually do not handle nested "#!" +scripts. A better solution is to put the following as the first line +of the script: +\begin{verbatim} + #!/usr/local/bin/ocamlrun /usr/local/bin/ocaml +\end{verbatim} + +\end{unix} + +\section{Options} \label{s:toplevel-options} + +The following command-line options are recognized by the "ocaml" command. +% Configure boolean variables used by the macros in unified-options.etex +\compfalse +\natfalse +\toptrue +% unified-options gathers all options across the native/bytecode +% compilers and toplevel +\input{unified-options.tex} + +\begin{unix} +The following environment variables are also consulted: +\begin{options} +\item["OCAMLTOP_UTF_8"] When printing string values, non-ascii bytes +($ {} > "\0x7E" $) are printed as decimal escape sequence if "OCAMLTOP_UTF_8" is +set to false. Otherwise, they are printed unescaped. + +\item["TERM"] When printing error messages, the toplevel system +attempts to underline visually the location of the error. It +consults the "TERM" variable to determines the type of output terminal +and look up its capabilities in the terminal database. + +\item["HOME"] Directory where the ".ocamlinit" file is searched. +\end{options} +\end{unix} + +\section{Toplevel directives} +\label{s:toplevel-directives} + +The following directives control the toplevel behavior, load files in +memory, and trace program execution. + +{\bf Note:} all directives start with a "#" (sharp) symbol. This "#" +must be typed before the directive, and must not be confused with the +"#" prompt displayed by the interactive loop. For instance, +typing "#quit;;" will exit the toplevel loop, but typing "quit;;" +will result in an ``unbound value "quit"'' error. + +% +% Remark: this list of options should be kept synchronized with the documentation +% in toplevel/topdirs.ml. +% +\begin{options} +\item[General] + \begin{options} + \item["#help;;"] + Prints a list of all available directives, with corresponding argument type + if appropriate. + \item["#quit;;"] + Exit the toplevel loop and terminate the "ocaml" command. + \end{options} + +\item[Loading codes] + \begin{options} + + \item["#cd \""\var{dir-name}"\";;"] + Change the current working directory. + + \item["#directory \""\var{dir-name}"\";;"] + Add the given directory to the list of directories searched for + source and compiled files. + + \item["#remove_directory \""\var{dir-name}"\";;"] + Remove the given directory from the list of directories searched for + source and compiled files. Do nothing if the list does not contain + the given directory. + + \item["#load \""\var{file-name}"\";;"] + Load in memory a bytecode object file (".cmo" file) or library file + (".cma" file) produced by the batch compiler "ocamlc". + + \item["#load_rec \""\var{file-name}"\";;"] + Load in memory a bytecode object file (".cmo" file) or library file + (".cma" file) produced by the batch compiler "ocamlc". + When loading an object file that depends on other modules + which have not been loaded yet, the .cmo files for these modules + are searched and loaded as well, recursively. The loading order + is not specified. + + \item["#use \""\var{file-name}"\";;"] + Read, compile and execute source phrases from the given file. + This is textual inclusion: phrases are processed just as if + they were typed on standard input. The reading of the file stops at + the first error encountered. + + \item["#mod_use \""\var{file-name}"\";;"] + Similar to "#use" but also wrap the code into a top-level module of the + same name as capitalized file name without extensions, following + semantics of the compiler. + \end{options} + +For directives that take file names as arguments, if the given file +name specifies no directory, the file is searched in the following +directories: +\begin{enumerate} + \item In script mode, the directory containing the script currently + executing; in interactive mode, the current working directory. + \item Directories added with the "#directory" directive. + \item Directories given on the command line with "-I" options. + \item The standard library directory. +\end{enumerate} + +\item[Environment queries] + \begin{options} + \item["#show_class "\var{class-path}";;"]\vspace{-4.7ex} + \item["#show_class_type "\var{class-path}";;"]\vspace{-4.7ex} + \item["#show_exception "\var{ident}";;"]\vspace{-4.7ex} + \item["#show_module "\var{module-path}";;"]\vspace{-4.7ex} + \item["#show_module_type "\var{modtype-path}";;"]\vspace{-4.7ex} + \item["#show_type "\var{typeconstr}";;"]\vspace{-4.7ex} + \item["#show_val "\var{value-path}";;"] + Print the signature of the corresponding component. + + \item["#show "\var{ident}";;"] + Print the signatures of components with name \var{ident} in all the + above categories. + \end{options} + +\item[Pretty-printing] + \begin{options} + + \item["#install_printer "\var{printer-name}";;"] + This directive registers the function named \var{printer-name} (a + value path) as a printer for values whose types match the argument + type of the function. That is, the toplevel loop will call + \var{printer-name} when it has such a value to print. + + The printing function \var{printer-name} should have type + @"Format.formatter" "->" @t@ "->" "unit"@, where @@t@@ is the + type for the values to be printed, and should output its textual + representation for the value of type @@t@@ on the given formatter, + using the functions provided by the "Format" library. For backward + compatibility, \var{printer-name} can also have type + @@t@ "->" "unit"@ and should then output on the standard + formatter, but this usage is deprecated. + + \item["#print_depth "\var{n}";;"] + Limit the printing of values to a maximal depth of \var{n}. + The parts of values whose depth exceeds \var{n} are printed as "..." + (ellipsis). + + \item["#print_length "\var{n}";;"] + Limit the number of value nodes printed to at most \var{n}. + Remaining parts of values are printed as "..." (ellipsis). + + \item["#remove_printer "\var{printer-name}";;"] + Remove the named function from the table of toplevel printers. +\end{options} + +\item[Tracing] + \begin{options} + \item["#trace "\var{function-name}";;"] + After executing this directive, all calls to the function named + \var{function-name} will be ``traced''. That is, the argument and the + result are displayed for each call, as well as the exceptions escaping + out of the function, raised either by the function itself or by + another function it calls. If the function is curried, each argument + is printed as it is passed to the function. + + \item["#untrace "\var{function-name}";;"] + Stop tracing the given function. + + \item["#untrace_all;;"] + Stop tracing all functions traced so far. + \end{options} + +\item[Compiler options] + \begin{options} + \item["#labels "\var{bool}";;"] + Ignore labels in function types if argument is "false", or switch back + to default behaviour (commuting style) if argument is "true". + + \item["#ppx \""\var{file-name}"\";;"] + After parsing, pipe the abstract syntax tree through the preprocessor + command. + + \item["#principal "\var{bool}";;"] + If the argument is "true", check information paths during + type-checking, to make sure that all types are derived in a principal + way. If the argument is "false", do not check information paths. + + \item["#rectypes;;"] + Allow arbitrary recursive types during type-checking. Note: once + enabled, this option cannot be disabled because that would lead to + unsoundness of the type system. + + \item["#warn_error \""\var{warning-list}"\";;"] + Treat as errors the warnings enabled by the argument and as normal + warnings the warnings disabled by the argument. + + \item["#warnings \""\var{warning-list}"\";;"] + Enable or disable warnings according to the argument. + + \end{options} + +\end{options} + +\section{The toplevel and the module system} \label{s:toplevel-modules} + +Toplevel phrases can refer to identifiers defined in compilation units +with the same mechanisms as for separately compiled units: either by +using qualified names ("Modulename.localname"), or by using +the "open" construct and unqualified names (see section~\ref{s:names}). + +However, before referencing another compilation unit, an +implementation of that unit must be present in memory. +At start-up, the toplevel system contains implementations for all the +modules in the the standard library. Implementations for user modules +can be entered with the "#load" directive described above. Referencing +a unit for which no implementation has been provided +results in the error "Reference to undefined global `...'". + +Note that entering "open "\var{Mod} merely accesses the compiled +interface (".cmi" file) for \var{Mod}, but does not load the +implementation of \var{Mod}, and does not cause any error if no +implementation of \var{Mod} has been loaded. The error +``reference to undefined global \var{Mod}'' will occur only when +executing a value or module definition that refers to \var{Mod}. + +\section{Common errors} + +This section describes and explains the most frequently encountered +error messages. + +\begin{options} + +\item[Cannot find file \var{filename}] +The named file could not be found in the current directory, nor in the +directories of the search path. + +If \var{filename} has the format \var{mod}".cmi", this +means you have referenced the compilation unit \var{mod}, but its +compiled interface could not be found. Fix: compile \var{mod}".mli" or +\var{mod}".ml" first, to create the compiled interface \var{mod}".cmi". + +If \var{filename} has the format \var{mod}".cmo", this +means you are trying to load with "#load" a bytecode object file that +does not exist yet. Fix: compile \var{mod}".ml" first. + +If your program spans several directories, this error can also appear +because you haven't specified the directories to look into. Fix: use +the "#directory" directive to add the correct directories to the +search path. + +\item[This expression has type \nth{t}{1}, but is used with type \nth{t}{2}] +See section~\ref{s:comp-errors}. + +\item[Reference to undefined global \var{mod}] +You have neglected to load in memory an implementation for a module +with "#load". See section~\ref{s:toplevel-modules} above. + +\end{options} + +\section{Building custom toplevel systems: \texttt{ocamlmktop}} + +The "ocamlmktop" command builds OCaml toplevels that +contain user code preloaded at start-up. + +The "ocamlmktop" command takes as argument a set of ".cmo" and ".cma" +files, and links them with the object files that implement the OCaml toplevel. +The typical use is: +\begin{verbatim} + ocamlmktop -o mytoplevel foo.cmo bar.cmo gee.cmo +\end{verbatim} +This creates the bytecode file "mytoplevel", containing the OCaml toplevel +system, plus the code from the three ".cmo" +files. This toplevel is directly executable and is started by: +\begin{verbatim} + ./mytoplevel +\end{verbatim} +This enters a regular toplevel loop, except that the code from +"foo.cmo", "bar.cmo" and "gee.cmo" is already loaded in memory, just as +if you had typed: +\begin{verbatim} + #load "foo.cmo";; + #load "bar.cmo";; + #load "gee.cmo";; +\end{verbatim} +on entrance to the toplevel. The modules "Foo", "Bar" and "Gee" are +not opened, though; you still have to do +\begin{verbatim} + open Foo;; +\end{verbatim} +yourself, if this is what you wish. + +\subsection{Options} + +The following command-line options are recognized by "ocamlmktop". + +\begin{options} + +\item["-cclib" \var{libname}] +Pass the "-l"\var{libname} option to the C linker when linking in +``custom runtime'' mode. See the corresponding option for +"ocamlc", in chapter~\ref{c:camlc}. + +\item["-ccopt" \var{option}] +Pass the given option to the C compiler and linker, when linking in +``custom runtime'' mode. See the corresponding option for +"ocamlc", in chapter~\ref{c:camlc}. + +\item["-custom"] +Link in ``custom runtime'' mode. See the corresponding option for +"ocamlc", in chapter~\ref{c:camlc}. + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +compiled object code files (".cmo" and ".cma"). + +\item["-o" \var{exec-file}] +Specify the name of the toplevel file produced by the linker. +The default is "a.out". + +\end{options} + +\section{The native toplevel: \texttt{ocamlnat}\ (experimental)} + +{\bf This section describes a tool that is not yet officially supported % +but may be found useful.} + +OCaml code executing in the traditional toplevel system uses the bytecode +interpreter. When increased performance is required, or for testing +programs that will only execute correctly when compiled to native code, +the {\em native toplevel} may be used instead. + +For the majority of installations the native toplevel will not have been +installed along with the rest of the OCaml toolchain. In such circumstances +it will be necessary to build the OCaml distribution from source. +From the built source tree of the distribution you may use +{\tt make natruntop} to build and execute a native toplevel. (Alternatively +{\tt make ocamlnat} can be used, which just performs the build step.) + +If the {\tt make install} command is run after having built the native +toplevel then the {\tt ocamlnat} program (either from the source or the +installation directory) may be invoked directly rather than using +{\tt make natruntop}. diff --git a/manual/manual/cmds/unified-options.etex b/manual/manual/cmds/unified-options.etex new file mode 100644 index 00000000..796d65a5 --- /dev/null +++ b/manual/manual/cmds/unified-options.etex @@ -0,0 +1,789 @@ +% +% This file describes the native/bytecode compiler and toplevel +% options. Since specific options can exist in only a subset of +% \{toplevel, bytecode compiler, native compiler \} and their description +% might differ across this subset, this file uses macros to adapt the +% description tool by tool: +\long\def\comp#1{\ifcomp#1\else\fi} +% \long is needed for multiparagraph macros +\long\def\nat#1{\ifnat#1\else\fi} +\long\def\top#1{\iftop#1\else\fi} +\long\def\notop#1{\iftop\else#1\fi} +% ( Note that the previous definitions relies on the three boolean values +% \top, \nat and \comp. The manual section must therefore +% set these boolean values accordingly. +% ) +% The macros (\comp, \nat, \top) adds a supplementary text +% if we are respectively in the (bytecode compiler, native compiler, toplevel) +% section. +% The toplevel options are quite different from the compilers' options. +% It is therefore useful to have also a substractive \notop macro +% that prints its content only outside of the topvel section +% +% For instance, to add an option "-foo" that applies to the native and +% bytecode compiler, one can write +% \notop{\item["-foo"] +% ... +% } +% +% Similarly, an option "-bar" only available in the native compiler +% can be introduced with +% \nat{\item["-bar"] +% ... +% } +% These macros can be also used to add information that are only relevant to +% some tools or differ slightly from one tool to another. For instance, we +% define the following macro for the pairs cma/cmxa cmo/cmxo and ocamlc/ocamlopt +% +\def\cma{\comp{.cma}\nat{.cmxa}} +\def\cmo{\comp{.cmo}\nat{.cmx}} +\def\qcmo{{\machine\cmo}} +\def\qcma{{\machine\cma}} +\def\ocamlx{\comp{ocamlc}\nat{ocamlopt}} +% +% +\begin{options} +\notop{% +\item["-a"] +Build a library(\nat{".cmxa" and ".a"/".lib" files}\comp{".cma" file}) +with the object files (\nat{".cmx" and ".o"/".obj" files}\comp{ ".cmo" files}) +given on the command line, instead of linking them into an executable file. +The name of the library must be set with the "-o" option. + +If \comp{"-custom", }"-cclib" or "-ccopt" options are passed on the command +line, these options are stored in the resulting \qcma library. Then, +linking with this library automatically adds back the \comp{"-custom", } +"-cclib" and "-ccopt" options as if they had been provided on the +command line, unless the "-noautolink" option is given. +}%notop + +\item["-absname"] +Force error messages to show absolute paths for file names. + +\notop{\item["-annot"] +Dump detailed information about the compilation (types, bindings, +tail-calls, etc). The information for file \var{src}".ml" +is put into file \var{src}".annot". In case of a type error, dump +all the information inferred by the type-checker before the error. +The \var{src}".annot" file can be used with the emacs commands given in +"emacs/caml-types.el" to display types and other annotations +interactively. +}%notop + +\item["-args" \var{filename}] +Read additional newline-terminated command line arguments from \var{filename}. +\top{It is not possible to pass a \var{scriptfile} via file to the toplevel. +}%top +\item["-args0" \var{filename}] + Read additional null character terminated command line arguments from \var{filename}. +\top{It is not possible to pass a \var{scriptfile} via file to the toplevel. +}%top + + +\notop{\item["-bin-annot"] +Dump detailed information about the compilation (types, bindings, +tail-calls, etc) in binary format. The information for file \var{src}".ml" +(resp. \var{src}".mli") is put into file \var{src}".cmt" +(resp. \var{src}".cmti"). In case of a type error, dump +all the information inferred by the type-checker before the error. +The "*.cmt" and "*.cmti" files produced by "-bin-annot" contain +more information and are much more compact than the files produced by +"-annot". +}%notop + +\notop{\item["-c"] +Compile only. Suppress the linking phase of the +compilation. Source code files are turned into compiled files, but no +executable file is produced. This option is useful to +compile modules separately. +}%notop + +\notop{% +\item["-cc" \var{ccomp}] +Use \var{ccomp} as the C linker \nat{called to build the final executable } +\comp{when linking in ``custom runtime'' mode (see the "-custom" option)} +and as the C compiler for compiling ".c" source files. +}%notop + +\notop{% +\item["-cclib" "-l"\var{libname}] +Pass the "-l"\var{libname} option to the \comp{C} linker +\comp{when linking in ``custom runtime'' mode (see the "-custom" option)}. +This causes the given C library to be linked with the program. +}%notop + +\notop{% +\item["-ccopt" \var{option}] +Pass the given option to the C compiler and linker. +\comp{When linking in ``custom runtime'' mode, for instance}% +\nat{For instance,}% +"-ccopt -L"\var{dir} causes the C linker to search for C libraries in +directory \var{dir}.\comp{(See the "-custom" option.)} +}%notop + +\notop{% +\item["-color" \var{mode}] +Enable or disable colors in compiler messages (especially warnings and errors). +The following modes are supported: +\begin{description} + \item["auto"] use heuristics to enable colors only if the output supports them (an ANSI-compatible tty terminal); + \item["always"] enable colors unconditionally; + \item["never"] disable color output. +\end{description} +The default setting is 'auto', and the current heuristic +checks that the "TERM" environment variable exists and is +not empty or "dumb", and that 'isatty(stderr)' holds. + +The environment variable "OCAML_COLOR" is considered if "-color" is not +provided. Its values are auto/always/never as above. +}%notop + +\comp{% +\item["-compat-32"] +Check that the generated bytecode executable can run on 32-bit +platforms and signal an error if it cannot. This is useful when +compiling bytecode on a 64-bit machine. +}%comp + +\nat{% +\item["-compact"] +Optimize the produced code for space rather than for time. This +results in slightly smaller but slightly slower programs. The default is to +optimize for speed. +}%nat + +\item["-config"] +Print the version number of {\machine\ocamlx} and a detailed +summary of its configuration, then exit. + +\comp{% +\item["-custom"] +Link in ``custom runtime'' mode. In the default linking mode, the +linker produces bytecode that is intended to be executed with the +shared runtime system, "ocamlrun". In the custom runtime mode, the +linker produces an output file that contains both the runtime system +and the bytecode for the program. The resulting file is larger, but it +can be executed directly, even if the "ocamlrun" command is not +installed. Moreover, the ``custom runtime'' mode enables static +linking of OCaml code with user-defined C functions, as described in +chapter~\ref{c:intf-c}. +\begin{unix} +Never use the "strip" command on executables produced by "ocamlc -custom", +this would remove the bytecode part of the executable. +\end{unix} +\begin{unix} +Security warning: never set the ``setuid'' or ``setgid'' bits on executables +produced by "ocamlc -custom", this would make them vulnerable to attacks. +\end{unix} +}%comp + +\notop{% +\item["-depend" \var{ocamldep-args}] +Compute dependencies, as the "ocamldep" command would do. The remaining +arguments are interpreted as if they were given to the "ocamldep" command. +}%notop + +\comp{ +\item["-dllib" "-l"\var{libname}] +Arrange for the C shared library "dll"\var{libname}".so" +("dll"\var{libname}".dll" under Windows) to be loaded dynamically +by the run-time system "ocamlrun" at program start-up time. +}%comp + +\comp{\item["-dllpath" \var{dir}] +Adds the directory \var{dir} to the run-time search path for shared +C libraries. At link-time, shared libraries are searched in the +standard search path (the one corresponding to the "-I" option). +The "-dllpath" option simply stores \var{dir} in the produced +executable file, where "ocamlrun" can find it and use it as +described in section~\ref{s-ocamlrun-dllpath}. +}%comp + +\notop{% +\item["-for-pack" \var{module-path}] +Generate an object file (\qcmo\nat{ and ".o"/".obj" files}) +that can later be included +as a sub-module (with the given access path) of a compilation unit +constructed with "-pack". For instance, +{\machine\ocamlx\ -for-pack\ P\ -c\ A.ml} +will generate {\machine a.\cmo}\nat{ and "a.o" files} that can +later be used with {\machine \ocamlx\ -pack\ -o\ P\cmo\ a\cmo}. +Note: you can still pack a module that was compiled without +"-for-pack" but in this case exceptions will be printed with the wrong +names. +}%notop + +\notop{% +\item["-g"] +Add debugging information while compiling and linking. This option is +required in order to \comp{be able to debug the program with "ocamldebug" +(see chapter~\ref{c:debugger}), and to} produce stack backtraces when +the program terminates on an uncaught exception (see +section~\ref{ocamlrun-options}). +}%notop + +\notop{% +\item["-i"] +Cause the compiler to print all defined names (with their inferred +types or their definitions) when compiling an implementation (".ml" +file). No compiled files (".cmo" and ".cmi" files) are produced. +This can be useful to check the types inferred by the +compiler. Also, since the output follows the syntax of interfaces, it +can help in writing an explicit interface (".mli" file) for a file: +just redirect the standard output of the compiler to a ".mli" file, +and edit that file to remove all declarations of unexported names. +}%notop + +\item["-I" \var{directory}] +Add the given directory to the list of directories searched for +\nat{compiled interface files (".cmi"), compiled object code files (".cmx"), +and libraries (".cmxa").} +\comp{compiled interface files (".cmi"), compiled object code files ".cmo", +libraries (".cma") and C libraries specified with "-cclib -lxxx".} +\top{source and compiled files.} +By default, the current directory is searched first, then the standard +library directory. Directories added with "-I" are searched after the +current directory, in the order in which they were given on the command line, +but before the standard library directory. See also option "-nostdlib". + +If the given directory starts with "+", it is taken relative to the +standard library directory. For instance, "-I +unix" adds the +subdirectory "unix" of the standard library to the search path. + +\top{% +Directories can also be added to the list once +the toplevel is running with the "#directory" directive +(section~\ref{s:toplevel-directives}). +}%top + +\top{% +\item["-init" \var{file}] +Load the given file instead of the default initialization file. +The default file is ".ocamlinit" in the current directory if it +exists, otherwise ".ocamlinit" in the user's home directory. +}%top + +\notop{% +\item["-impl" \var{filename}] +Compile the file \var{filename} as an implementation file, even if its +extension is not ".ml". +}%notop + +\nat{% +\item["-inline" \var{n}] +Set aggressiveness of inlining to \var{n}, where \var{n} is a positive +integer. Specifying "-inline 0" prevents all functions from being +inlined, except those whose body is smaller than the call site. Thus, +inlining causes no expansion in code size. The default aggressiveness, +"-inline 1", allows slightly larger functions to be inlined, resulting +in a slight expansion in code size. Higher values for the "-inline" +option cause larger and larger functions to become candidate for +inlining, but can result in a serious increase in code size. +}%nat + +\notop{% +\item["-intf" \var{filename}] +Compile the file \var{filename} as an interface file, even if its +extension is not ".mli". +}%notop + +\notop{% +\item["-intf-suffix" \var{string}] +Recognize file names ending with \var{string} as interface files +(instead of the default ".mli"). +}%\notop + +\item["-labels"] +Labels are not ignored in types, labels may be used in applications, +and labelled parameters can be given in any order. This is the default. + +\notop{% +\item["-linkall"] +Force all modules contained in libraries to be linked in. If this +flag is not given, unreferenced modules are not linked in. When +building a library (option "-a"), setting the "-linkall" option forces all +subsequent links of programs involving that library to link all the +modules contained in the library. When compiling a module (option +"-c"), setting the "-linkall" option ensures that this module will +always be linked if it is put in a library and this library is linked. +}%notop + +\nat{% +\item["-linscan"] +Use linear scan register allocation. Compiling with this allocator is faster +than with the usual graph coloring allocator, sometimes quite drastically so for +long functions and modules. On the other hand, the generated code can be a bit +slower. +}%nat + +\comp{% +\item["-make-runtime"] +Build a custom runtime system (in the file specified by option "-o") +incorporating the C object files and libraries given on the command +line. This custom runtime system can be used later to execute +bytecode executables produced with the +"ocamlc -use-runtime" \var{runtime-name} option. +See section~\ref{s:custom-runtime} for more information. +}%comp + +\notop{% +\item["-no-alias-deps"] +Do not record dependencies for module aliases. See +section~\ref{s:module-alias} for more information. +}%notop + +\item["-no-app-funct"] +Deactivates the applicative behaviour of functors. With this option, +each functor application generates new types in its result and +applying the same functor twice to the same argument yields two +incompatible structures. + +\item["-noassert"] +Do not compile assertion checks. Note that the special form +"assert false" is always compiled because it is typed specially. +\notop{This flag has no effect when linking already-compiled files.} + +\notop{% +\item["-noautolink"] +When linking \qcma libraries, ignore \comp{"-custom",} "-cclib" and "-ccopt" +options potentially contained in the libraries (if these options were +given when building the libraries). This can be useful if a library +contains incorrect specifications of C libraries or C options; in this +case, during linking, set "-noautolink" and pass the correct C +libraries and options on the command line. +}% + +\nat{% +\item["-nodynlink"] +Allow the compiler to use some optimizations that are valid only for code +that is never dynlinked. +}%nat + +\item["-nolabels"] +Ignore non-optional labels in types. Labels cannot be used in +applications, and parameter order becomes strict. + +\top{% +\item["-noprompt"] +Do not display any prompt when waiting for input. +}%top + +\top{% +\item["-nopromptcont"] +Do not display the secondary prompt when waiting for continuation +lines in multi-line inputs. This should be used e.g. when running +"ocaml" in an "emacs" window. +}%top + +\item["-nostdlib"] +\top{% +Do not include the standard library directory in the list of +directories searched for source and compiled files. +}%top +\comp{% +Do not include the standard library directory in the list of +directories searched for +compiled interface files (".cmi"), compiled object code files +(".cmo"), libraries (".cma"), and C libraries specified with +"-cclib -lxxx". See also option "-I". +}%comp +\nat{% +Do not automatically add the standard library directory the list of +directories searched for compiled interface files (".cmi"), compiled +object code files (".cmx"), and libraries (".cmxa"). See also option +"-I". +}%nat + +\notop{% +\item["-o" \var{exec-file}] +Specify the name of the output file produced by the +\nat{linker}\comp{compiler}. The +default output name is "a.out" under Unix and "camlprog.exe" under +Windows. If the "-a" option is given, specify the name of the library +produced. If the "-pack" option is given, specify the name of the +packed object file produced. If the "-output-obj" option is given, +specify the name of the output file produced. +\nat{If the "-shared" option is given, specify the name of plugin +file produced.} +\comp{If the "-c" option is given, specify the name of the object +file produced for the {\em next} source file that appears on the +command line.} +}%notop + +\notop{% +\item["-opaque"] +When the native compiler compiles an implementation, by default it +produces a ".cmx" file containing information for cross-module +optimization. It also expects ".cmx" files to be present for the +dependencies of the currently compiled source, and uses them for +optimization. Since OCaml 4.03, the compiler will emit a warning if it +is unable to locate the ".cmx" file of one of those dependencies. + +The "-opaque" option, available since 4.04, disables cross-module +optimization information for the currently compiled unit. When +compiling ".mli" interface, using "-opaque" marks the compiled ".cmi" +interface so that subsequent compilations of modules that depend on it +will not rely on the corresponding ".cmx" file, nor warn if it is +absent. When the native compiler compiles a ".ml" implementation, +using "-opaque" generates a ".cmx" that does not contain any +cross-module optimization information. + +Using this option may degrade the quality of generated code, but it +reduces compilation time, both on clean and incremental +builds. Indeed, with the native compiler, when the implementation of +a compilation unit changes, all the units that depend on it may need +to be recompiled -- because the cross-module information may have +changed. If the compilation unit whose implementation changed was +compiled with "-opaque", no such recompilation needs to occur. This +option can thus be used, for example, to get faster edit-compile-test +feedback loops. +}%notop + +\notop{% +\item["-open" \var{Module}] +Opens the given module before processing the interface or +implementation files. If several "-open" options are given, +they are processed in order, just as if +the statements "open!" \var{Module1}";;" "..." "open!" \var{ModuleN}";;" +were added at the top of each file. +}%notop + +\notop{% +\item["-output-obj"] +Cause the linker to produce a C object file instead of +\comp{a bytecode executable file}\nat{an executable file}. +This is useful to wrap OCaml code as a C library, +callable from any C program. See chapter~\ref{c:intf-c}, +section~\ref{s:embedded-code}. The name of the output object file +must be set with the "-o" option. +This option can also be used to produce a \comp{C source file (".c" extension) or +a} compiled shared/dynamic library (".so" extension, ".dll" under Windows). +}%notop + +\nat{% +\item["-p"] +Generate extra code to write profile information when the program is +executed. The profile information can then be examined with the +analysis program "gprof". (See chapter~\ref{c:profiler} for more +information on profiling.) The "-p" option must be given both at +compile-time and at link-time. Linking object files not compiled with +"-p" is possible, but results in less precise profiling. + +\begin{unix} See the Unix manual page for "gprof(1)" for more +information about the profiles. + +Full support for "gprof" is only available for certain platforms +(currently: Intel x86 32 and 64 bits under Linux, BSD and MacOS X). +On other platforms, the "-p" option will result in a less precise +profile (no call graph information, only a time profile). +\end{unix} + +\begin{windows} +The "-p" option does not work under Windows. +\end{windows} +}%nat + +\nat{% +\item["-pack"] +Build an object file (".cmx" and ".o"/".obj" files) and its associated compiled +interface (".cmi") that combines the ".cmx" object +files given on the command line, making them appear as sub-modules of +the output ".cmx" file. The name of the output ".cmx" file must be +given with the "-o" option. For instance, +\begin{verbatim} + ocamlopt -pack -o P.cmx A.cmx B.cmx C.cmx +\end{verbatim} +generates compiled files "P.cmx", "P.o" and "P.cmi" describing a +compilation unit having three sub-modules "A", "B" and "C", +corresponding to the contents of the object files "A.cmx", "B.cmx" and +"C.cmx". These contents can be referenced as "P.A", "P.B" and "P.C" +in the remainder of the program. + +The ".cmx" object files being combined must have been compiled with +the appropriate "-for-pack" option. In the example above, +"A.cmx", "B.cmx" and "C.cmx" must have been compiled with +"ocamlopt -for-pack P". + +Multiple levels of packing can be achieved by combining "-pack" with +"-for-pack". Consider the following example: +\begin{verbatim} + ocamlopt -for-pack P.Q -c A.ml + ocamlopt -pack -o Q.cmx -for-pack P A.cmx + ocamlopt -for-pack P -c B.ml + ocamlopt -pack -o P.cmx Q.cmx B.cmx +\end{verbatim} +The resulting "P.cmx" object file has sub-modules "P.Q", "P.Q.A" +and "P.B". +}%nat + +\comp{% +\item["-pack"] +Build a bytecode object file (".cmo" file) and its associated compiled +interface (".cmi") that combines the object +files given on the command line, making them appear as sub-modules of +the output ".cmo" file. The name of the output ".cmo" file must be +given with the "-o" option. For instance, +\begin{verbatim} + ocamlc -pack -o p.cmo a.cmo b.cmo c.cmo +\end{verbatim} +generates compiled files "p.cmo" and "p.cmi" describing a compilation +unit having three sub-modules "A", "B" and "C", corresponding to the +contents of the object files "a.cmo", "b.cmo" and "c.cmo". These +contents can be referenced as "P.A", "P.B" and "P.C" in the remainder +of the program. +}%comp + + +\notop{% +\item["-plugin" \var{plugin}] +Dynamically load the code of the given \var{plugin} +(a ".cmo", ".cma" or ".cmxs" file) in the compiler. \var{plugin} must exist in +the same kind of code as the compiler ({\machine \ocamlx.byte} must load bytecode +plugins, while {\machine \ocamlx.opt} must load native code plugins), and +extension adaptation is done automatically for ".cma" files (to ".cmxs" files +if the compiler is compiled in native code). +}%notop + +\notop{% +\item["-pp" \var{command}] +Cause the compiler to call the given \var{command} as a preprocessor +for each source file. The output of \var{command} is redirected to +an intermediate file, which is compiled. If there are no compilation +errors, the intermediate file is deleted afterwards. +}%notop + +\item["-ppx" \var{command}] +After parsing, pipe the abstract syntax tree through the preprocessor +\var{command}. The module "Ast_mapper", described in +\ifouthtml +chapter~\ref{c:parsinglib}: \ahref{libref/Ast\_mapper.html}{ \texttt{Ast_mapper} } +\else section~\ref{Ast-underscoremapper}\fi, +implements the external interface of a preprocessor. + +\item["-principal"] +Check information path during type-checking, to make sure that all +types are derived in a principal way. When using labelled arguments +and/or polymorphic methods, this flag is required to ensure future +versions of the compiler will be able to infer types correctly, even +if internal algorithms change. +All programs accepted in "-principal" mode are also accepted in the +default mode with equivalent types, but different binary signatures, +and this may slow down type checking; yet it is a good idea to +use it once before publishing source code. + +\item["-rectypes"] +Allow arbitrary recursive types during type-checking. By default, +only recursive types where the recursion goes through an object type +are supported.\notop{Note that once you have created an interface using this +flag, you must use it again for all dependencies.} + +\notop{% +\item["-runtime-variant" \var{suffix}] +Add the \var{suffix} string to the name of the runtime library used by +the program. Currently, only one such suffix is supported: "d", and +only if the OCaml compiler was configured with option +"-with-debug-runtime". This suffix gives the debug version of the +runtime, which is useful for debugging pointer problems in low-level +code such as C stubs. +}%notop + +\nat{% +\item["-S"] +Keep the assembly code produced during the compilation. The assembly +code for the source file \var{x}".ml" is saved in the file \var{x}".s". +}%nat + +\nat{% +\item["-shared"] +Build a plugin (usually ".cmxs") that can be dynamically loaded with +the "Dynlink" module. The name of the plugin must be +set with the "-o" option. A plugin can include a number of OCaml +modules and libraries, and extra native objects (".o", ".obj", ".a", +".lib" files). Building native plugins is only supported for some +operating system. Under some systems (currently, +only Linux AMD 64), all the OCaml code linked in a plugin must have +been compiled without the "-nodynlink" flag. Some constraints might also +apply to the way the extra native objects have been compiled (under +Linux AMD 64, they must contain only position-independent code). +}%nat + +\item["-safe-string"] +Enforce the separation between types "string" and "bytes", +thereby making strings read-only. This is the default. + +\item["-short-paths"] +When a type is visible under several module-paths, use the shortest +one when printing the type's name in inferred interfaces and error and +warning messages. Identifier names starting with an underscore "_" or +containing double underscores "__" incur a penalty of $+10$ when computing +their length. + +\top{ +\item["-stdin"] +Read the standard input as a script file rather than starting an +interactive session. +}%top + +\item["-strict-sequence"] +Force the left-hand part of each sequence to have type unit. + +\item["-strict-formats"] +Reject invalid formats that were accepted in legacy format +implementations. You should use this flag to detect and fix such +invalid formats, as they will be rejected by future OCaml versions. + +\notop{% +\item["-unboxed-types"] +When a type is unboxable (i.e. a record with a single argument or a +concrete datatype with a single constructor of one argument) it will +be unboxed unless annotated with "[@@ocaml.boxed]". +}%notop + +\notop{% +\item["-no-unboxed-types"] +When a type is unboxable it will be boxed unless annotated with +"[@@ocaml.unboxed]". This is the default. +}%notop + +\item["-unsafe"] +Turn bound checking off for array and string accesses (the "v.(i)" and +"s.[i]" constructs). Programs compiled with "-unsafe" are therefore +\comp{slightly} faster, but unsafe: anything can happen if the program +accesses an array or string outside of its bounds. +\notop{% +Additionally, turn off the check for zero divisor in integer division + and modulus operations. With "-unsafe", an integer division +(or modulus) by zero can halt the program or continue with an +unspecified result instead of raising a "Division_by_zero" exception. +}%notop + +\item["-unsafe-string"] +Identify the types "string" and "bytes", thereby making strings writable. +This is intended for compatibility with old source code and should not +be used with new software. + +\comp{% +\item["-use-runtime" \var{runtime-name}] +Generate a bytecode executable file that can be executed on the custom +runtime system \var{runtime-name}, built earlier with +"ocamlc -make-runtime" \var{runtime-name}. +See section~\ref{s:custom-runtime} for more information. +}%comp + +\item["-v"] +Print the version number of the compiler and the location of the +standard library directory, then exit. + +\item["-verbose"] +Print all external commands before they are executed, +\nat{in particular invocations of the assembler, C compiler, and linker.} +\comp{in particular invocations of the C compiler and linker in "-custom" mode.} +Useful to debug C library problems. + +\comp{% +\item["-vmthread"] +Compile or link multithreaded programs, in combination with the +VM-level "threads" library described in chapter~\ref{c:threads}. +}%comp + +\notop{% +\item["-version" or "-vnum"] +Print the version number of the compiler in short form (e.g. "3.11.0"), +then exit. +}%notop + +\top{% +\item["-version"] +Print version string and exit. + +\item["-vnum"] +Print short version number and exit. + +\item["-no-version"] +Do not print the version banner at startup. +}%top + +\item["-w" \var{warning-list}] +Enable, disable, or mark as fatal the warnings specified by the argument +\var{warning-list}. +Each warning can be {\em enabled} or {\em disabled}, and each warning +can be {\em fatal} or {\em non-fatal}. +If a warning is disabled, it isn't displayed and doesn't affect +compilation in any way (even if it is fatal). If a warning is +enabled, it is displayed normally by the compiler whenever the source +code triggers it. If it is enabled and fatal, the compiler will also +stop with an error after displaying it. + +The \var{warning-list} argument is a sequence of warning specifiers, +with no separators between them. A warning specifier is one of the +following: + +\begin{options} +\item["+"\var{num}] Enable warning number \var{num}. +\item["-"\var{num}] Disable warning number \var{num}. +\item["@"\var{num}] Enable and mark as fatal warning number \var{num}. +\item["+"\var{num1}..\var{num2}] Enable warnings in the given range. +\item["-"\var{num1}..\var{num2}] Disable warnings in the given range. +\item["@"\var{num1}..\var{num2}] Enable and mark as fatal warnings in +the given range. +\item["+"\var{letter}] Enable the set of warnings corresponding to +\var{letter}. The letter may be uppercase or lowercase. +\item["-"\var{letter}] Disable the set of warnings corresponding to +\var{letter}. The letter may be uppercase or lowercase. +\item["@"\var{letter}] Enable and mark as fatal the set of warnings +corresponding to \var{letter}. The letter may be uppercase or +lowercase. +\item[\var{uppercase-letter}] Enable the set of warnings corresponding +to \var{uppercase-letter}. +\item[\var{lowercase-letter}] Disable the set of warnings corresponding +to \var{lowercase-letter}. +\end{options} + +Warning numbers and letters which are out of the range of warnings +that are currently defined are ignored. The warnings are as follows. +\begin{options} +\input{warnings-help.tex} +\end{options} + +The default setting is "-w +a-4-6-7-9-27-29-32..42-44-45-48-50-60". +It is displayed by {\machine\ocamlx\ -help}. +Note that warnings 5 and 10 are not always triggered, depending on +the internals of the type checker. + + +\item["-warn-error" \var{warning-list}] +Mark as fatal the warnings specified in the argument \var{warning-list}. +The compiler will stop with an error when one of these warnings is +emitted. The \var{warning-list} has the same meaning as for +the "-w" option: a "+" sign (or an uppercase letter) marks the +corresponding warnings as fatal, a "-" +sign (or a lowercase letter) turns them back into non-fatal warnings, +and a "@" sign both enables and marks as fatal the corresponding +warnings. + +Note: it is not recommended to use warning sets (i.e. letters) as +arguments to "-warn-error" +in production code, because this can break your build when future versions +of OCaml add some new warnings. + +The default setting is "-warn-error -a+31" (only warning 31 is fatal). + +\item["-warn-help"] +Show the description of all available warning numbers. + +\notop{% +\item["-where"] +Print the location of the standard library, then exit. +}%notop + +\item["-" \var{file}] +\notop{Process \var{file} as a file name, even if it starts with a dash ("-") +character.} +\top{Use \var{file} as a script file name, even when it starts with a +hyphen (-).} + +\item["-help" or "--help"] +Display a short usage summary and exit. + +\end{options} +% diff --git a/manual/manual/foreword.etex b/manual/manual/foreword.etex new file mode 100644 index 00000000..6c68d7e2 --- /dev/null +++ b/manual/manual/foreword.etex @@ -0,0 +1,82 @@ +\chapter*{Foreword} +\markboth{Foreword}{} +%HEVEA\cutname{foreword.html} + +This manual documents the release \ocamlversion\ of the OCaml +system. It is organized as follows. +\begin{itemize} +\item Part~\ref{p:tutorials}, ``An introduction to OCaml'', +gives an overview of the language. +\item Part~\ref{p:refman}, ``The OCaml language'', is the +reference description of the language. +\item Part~\ref{p:commands}, ``The OCaml tools'', documents +the compilers, toplevel system, and programming utilities. +\item Part~\ref{p:library}, ``The OCaml library'', describes the +modules provided in the standard library. +\begin{latexonly} +\item Part~\ref{p:appendix}, ``Appendix'', contains an +index of all identifiers defined in the standard library, and an +index of keywords. +\end{latexonly} +\end{itemize} + +\section*{Conventions} + +OCaml runs on several operating systems. The parts of +this manual that are specific to one operating system are presented as +shown below: + +\begin{unix} This is material specific to the Unix family of operating +systems, including Linux and \hbox{MacOS~X}. +\end{unix} + +\begin{windows} This is material specific to Microsoft Windows + (XP, Vista, 7, 8, 10). +\end{windows} + +\section*{License} + +The OCaml system is copyright \copyright\ 1996--\number\year\ +Institut National de Recherche en Informatique et en +Automatique (INRIA). +INRIA holds all ownership rights to the OCaml system. + +The OCaml system is open source and can be freely +redistributed. See the file "LICENSE" in the distribution for +licensing information. + +The present documentation is copyright \copyright\ \number\year\ +Institut National de Recherche en Informatique et en +Automatique (INRIA). The OCaml documentation and user's +manual may be reproduced and distributed in whole or +in part, subject to the following conditions: +\begin{itemize} +\item The copyright notice above and this permission notice must be +preserved complete on all complete or partial copies. +\item Any translation or derivative work of the OCaml +documentation and user's manual must be approved by the authors in +writing before distribution. +\item If you distribute the OCaml +documentation and user's manual in part, instructions for obtaining +the complete version of this manual must be included, and a +means for obtaining a complete version provided. +\item Small portions may be reproduced as illustrations for reviews or +quotes in other works without this permission notice if proper +citation is given. +\end{itemize} + +\section*{Availability} + +\begin{latexonly} +The complete OCaml distribution can be accessed via the Web +sites \url{http://www.ocaml.org/} and \url{http://caml.inria.fr/}. +The former Web site contains a lot of additional information on OCaml. +\end{latexonly} + +\begin{htmlonly} +The complete OCaml distribution can be accessed via the +\href{http://www.ocaml.org/}{community Caml Web site} and the +\href{http://caml.inria.fr/}{older Caml Web site}. +The \href{http://www.ocaml.org/}{community Caml Web site} +contains a lot of additional information on OCaml. +\end{htmlonly} diff --git a/manual/manual/htmlman/.gitignore b/manual/manual/htmlman/.gitignore new file mode 100644 index 00000000..3cecdc2c --- /dev/null +++ b/manual/manual/htmlman/.gitignore @@ -0,0 +1,8 @@ +*.html +*.haux +*.hind +libref +manual.hmanual +manual.hmanual.kwd +manual.css +*.htoc diff --git a/manual/manual/htmlman/contents_motif.gif b/manual/manual/htmlman/contents_motif.gif new file mode 100644 index 00000000..5d3d0167 Binary files /dev/null and b/manual/manual/htmlman/contents_motif.gif differ diff --git a/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.eot b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.eot new file mode 100644 index 00000000..487aa40a Binary files /dev/null and b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.eot differ diff --git a/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.svg b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.svg new file mode 100644 index 00000000..1e520978 --- /dev/null +++ b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.svg @@ -0,0 +1,330 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.ttf b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.ttf new file mode 100644 index 00000000..a330a88d Binary files /dev/null and b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.ttf differ diff --git a/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.woff b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.woff new file mode 100644 index 00000000..9c671f40 Binary files /dev/null and b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.woff differ diff --git a/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.woff2 b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.woff2 new file mode 100644 index 00000000..3d216995 Binary files /dev/null and b/manual/manual/htmlman/fonts/fira-sans-v8-latin-regular.woff2 differ diff --git a/manual/manual/htmlman/libgraph.gif b/manual/manual/htmlman/libgraph.gif new file mode 100644 index 00000000..b385985b Binary files /dev/null and b/manual/manual/htmlman/libgraph.gif differ diff --git a/manual/manual/htmlman/next_motif.gif b/manual/manual/htmlman/next_motif.gif new file mode 100644 index 00000000..3f84bacf Binary files /dev/null and b/manual/manual/htmlman/next_motif.gif differ diff --git a/manual/manual/htmlman/previous_motif.gif b/manual/manual/htmlman/previous_motif.gif new file mode 100644 index 00000000..8c8a3e64 Binary files /dev/null and b/manual/manual/htmlman/previous_motif.gif differ diff --git a/manual/manual/index.tex b/manual/manual/index.tex new file mode 100644 index 00000000..aff78b9f --- /dev/null +++ b/manual/manual/index.tex @@ -0,0 +1,20 @@ +\ifouthtml +\begin{rawhtml} + +\end{rawhtml} +\else +\chapter*{Index to the library} +\markright{Index to the library} +\addcontentsline{toc}{chapter}{Index to the library} +\myprintindex{\jobname.ind} +\fi +\chapter*{Index of keywords} +\markright{Index of keywords} +\addcontentsline{toc}{chapter}{Index of keywords} +\myprintindex{\jobname.kwd.ind} diff --git a/manual/manual/infoman/.gitignore b/manual/manual/infoman/.gitignore new file mode 100644 index 00000000..916af019 --- /dev/null +++ b/manual/manual/infoman/.gitignore @@ -0,0 +1,5 @@ +*.haux +*.hind +*.info*.gz +*.info.body* +ocaml.hocaml.kwd diff --git a/manual/manual/library/.cvsignore b/manual/manual/library/.cvsignore new file mode 100644 index 00000000..8955ee04 --- /dev/null +++ b/manual/manual/library/.cvsignore @@ -0,0 +1,5 @@ +*.tex +*.htex +arithstatus.mli +ocamldoc.out +ocamldoc.sty diff --git a/manual/manual/library/.gitignore b/manual/manual/library/.gitignore new file mode 100644 index 00000000..8955ee04 --- /dev/null +++ b/manual/manual/library/.gitignore @@ -0,0 +1,5 @@ +*.tex +*.htex +arithstatus.mli +ocamldoc.out +ocamldoc.sty diff --git a/manual/manual/library/Makefile b/manual/manual/library/Makefile new file mode 100644 index 00000000..d085f504 --- /dev/null +++ b/manual/manual/library/Makefile @@ -0,0 +1,86 @@ +CORE_INTF=Pervasives.tex + +CSLDIR=$(RELEASEDIR) + +STDLIB_INTF= Arg.tex Array.tex ArrayLabels.tex Char.tex Complex.tex \ + Digest.tex Filename.tex Format.tex \ + Gc.tex Genlex.tex Hashtbl.tex Int32.tex Int64.tex \ + Lazy.tex Lexing.tex List.tex ListLabels.tex Map.tex Marshal.tex \ + MoreLabels.tex Nativeint.tex Obj.tex Oo.tex \ + Parsing.tex Printexc.tex Printf.tex Queue.tex Random.tex Scanf.tex \ + Set.tex Sort.tex Stack.tex Stream.tex String.tex StringLabels.tex Sys.tex \ + Weak.tex Callback.tex Buffer.tex StdLabels.tex \ + Bytes.tex BytesLabels.tex Spacetime.tex + +COMPILER_LIBS_PLUGIN_HOOKS=Pparse.tex Typemod.tex + +COMPILER_LIBS_INTF=Asthelper.tex Astmapper.tex Asttypes.tex \ + Lexer.tex Location.tex Longident.tex Parse.tex Pprintast.tex Printast.tex \ + $(COMPILER_LIBS_PLUGIN_HOOKS) + +OTHERLIB_INTF=Unix.tex UnixLabels.tex Str.tex \ + Graphics.tex GraphicsX11.tex \ + Thread.tex Mutex.tex Condition.tex Event.tex ThreadUnix.tex \ + Dynlink.tex Bigarray.tex + +INTF=$(CORE_INTF) $(STDLIB_INTF) $(COMPILER_LIBS_INTF) $(OTHERLIB_INTF) + +BLURB=core.tex builtin.tex stdlib.tex compilerlibs.tex \ + libunix.tex libstr.tex libnum.tex libgraph.tex \ + libthreads.tex libdynlink.tex libbigarray.tex + +FILES=$(BLURB) $(INTF) + +SRC=../../.. + +LD_PATH := $(SRC)/otherlibs/unix/:$(SRC)/otherlibs/str/ +SET_LD_PATH=CAML_LD_LIBRARY_PATH=$(LD_PATH) + +FORMAT=../../tools/format-intf +TEXQUOTE=../../tools/texquote2 + +VPATH=.:$(STDLIB_DIR):$(CSLDIR)/parsing:$(CSLDIR)/otherlibs/unix:$(CSLDIR)/otherlibs/str:$(CSLDIR)/otherlibs/graph:$(CSLDIR)/otherlibs/threads:$(CSLDIR)/otherlibs/dynlink:$(CSLDIR)/otherlibs/bigarray + +etex-files: $(BLURB) +all: libs + +libs: $(FILES) + +OCAMLDOC=$(if $(wildcard $(CSLDIR)/ocamldoc/ocamldoc.opt),\ + $(CSLDIR)/ocamldoc/ocamldoc.opt,\ + $(SET_LD_PATH) $(CSLDIR)/byterun/ocamlrun $(CSLDIR)/ocamldoc/ocamldoc) \ + -nostdlib -initially-opened-module Pervasives + +# Copy and unprefix the standard library when needed +include $(SRC)/ocamldoc/Makefile.unprefix + + +$(INTF): interfaces +interfaces: $(STDLIB_CMIS) + $(OCAMLDOC) -latex \ + -I $(STDLIB_UNPREFIXED) \ + $(STDLIB_MLIS) \ + -sepfiles \ + -latextitle "1,subsection*" \ + -latextitle "2,subsubsection*" \ + -latex-type-prefix "TYP" \ + -latex-module-prefix "" \ + -latex-module-type-prefix "" \ + -latex-value-prefix "" + mv -f Ast_helper.tex Asthelper.tex + mv -f Ast_mapper.tex Astmapper.tex + +clean: + rm -f $(FILES) + +.SUFFIXES: +.SUFFIXES: .tex .etex .mli + +.etex.tex: $(TEXQUOTE) + @$(TEXQUOTE) < $*.etex > $*.texquote_error.tex\ + && mv $*.texquote_error.tex $*.tex\ + || printf "Failure when generating %s\n" $*.tex + + +.mli.tex: $(FORMAT) + $(FORMAT) $< > $*.tex < $< diff --git a/manual/manual/library/builtin.etex b/manual/manual/library/builtin.etex new file mode 100644 index 00000000..404f5608 --- /dev/null +++ b/manual/manual/library/builtin.etex @@ -0,0 +1,281 @@ +\section{Built-in types and predefined exceptions} + +The following built-in types and predefined exceptions are always +defined in the +compilation environment, but are not part of any module. As a +consequence, they can only be referred by their short names. + +%\vspace{0.1cm} +\subsection*{Built-in types} +%\vspace{0.1cm} + +\begin{ocamldoccode} + type int +\end{ocamldoccode} +\index{int@\verb`int`} +\begin{ocamldocdescription} + The type of integer numbers. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type char +\end{ocamldoccode} +\index{char@\verb`char`} +\begin{ocamldocdescription} + The type of characters. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type bytes +\end{ocamldoccode} +\index{bytes@\verb`bytes`} +\begin{ocamldocdescription} + The type of (writable) byte sequences. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type string +\end{ocamldoccode} +\index{string@\verb`string`} +\begin{ocamldocdescription} + The type of (read-only) character strings. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type float +\end{ocamldoccode} +\index{float@\verb`float`} +\begin{ocamldocdescription} + The type of floating-point numbers. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type bool = false | true +\end{ocamldoccode} +\index{bool@\verb`bool`} +\begin{ocamldocdescription} + The type of booleans (truth values). +\end{ocamldocdescription} + +\begin{ocamldoccode} + type unit = () +\end{ocamldoccode} +\index{unit@\verb`unit`} +\begin{ocamldocdescription} + The type of the unit value. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type exn +\end{ocamldoccode} +\index{exn@\verb`exn`} +\begin{ocamldocdescription} + The type of exception values. +\end{ocamldocdescription} + +\begin{ocamldoccode} + type 'a array +\end{ocamldoccode} +\index{array@\verb`array`} +\begin{ocamldocdescription} + The type of arrays whose elements have type "'a". +\end{ocamldocdescription} + +\begin{ocamldoccode} + type 'a list = [] | :: of 'a * 'a list +\end{ocamldoccode} +\index{list@\verb`list`} +\begin{ocamldocdescription} + The type of lists whose elements have type "'a". +\end{ocamldocdescription} + +\begin{ocamldoccode} +type 'a option = None | Some of 'a +\end{ocamldoccode} +\index{option@\verb`option`} +\begin{ocamldocdescription} + The type of optional values of type "'a". +\end{ocamldocdescription} + +\begin{ocamldoccode} +type int32 +\end{ocamldoccode} +\index{int32@\verb`int32`} +\begin{ocamldocdescription} + The type of signed 32-bit integers. + See the "Int32"[\moduleref{Int32}] module. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type int64 +\end{ocamldoccode} +\index{int64@\verb`int64`} +\begin{ocamldocdescription} + The type of signed 64-bit integers. + See the "Int64"[\moduleref{Int64}] module. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type nativeint +\end{ocamldoccode} +\index{nativeint@\verb`nativeint`} +\begin{ocamldocdescription} + The type of signed, platform-native integers (32 bits on 32-bit + processors, 64 bits on 64-bit processors). + See the "Nativeint"[\moduleref{Nativeint}] module. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type ('a, 'b, 'c, 'd, 'e, 'f) format6 +\end{ocamldoccode} +\index{format4@\verb`format4`} +\begin{ocamldocdescription} + The type of format strings. "'a" is the type of the parameters of + the format, "'f" is the result type for the "printf"-style + functions, "'b" is the type of the first argument given to "%a" and + "%t" printing functions (see module "Printf"[\moduleref{Printf}]), + "'c" is the result type of these functions, and also the type of the + argument transmitted to the first argument of "kprintf"-style + functions, "'d" is the result type for the "scanf"-style functions + (see module "Scanf"[\moduleref{Scanf}]), + and "'e" is the type of the receiver function for the "scanf"-style + functions. +\end{ocamldocdescription} + +\begin{ocamldoccode} +type 'a lazy_t +\end{ocamldoccode} +\index{lazyt@\verb`lazy_t`} +\begin{ocamldocdescription} + This type is used to implement the "Lazy"[\moduleref{Lazy}] module. + It should not be used directly. +\end{ocamldocdescription} + +%\vspace{0.1cm} +\subsection*{Predefined exceptions} +%\vspace{0.1cm} + +\begin{ocamldoccode} +exception Match_failure of (string * int * int) +\end{ocamldoccode} +\index{Matchfailure@\verb`Match_failure`} +\begin{ocamldocdescription} + Exception raised when none of the cases of a pattern-matching + apply. The arguments are the location of the "match" keyword + in the source code (file name, line number, column number). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Assert_failure of (string * int * int) +\end{ocamldoccode} +\index{Assertfailure@\verb`Assert_failure`} +\begin{ocamldocdescription} + Exception raised when an assertion fails. The arguments are + the location of the "assert" keyword in the source code + (file name, line number, column number). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Invalid_argument of string +\end{ocamldoccode} +\index{Invalidargument@\verb`Invalid_argument`} +\begin{ocamldocdescription} + Exception raised by library functions to signal that the given + arguments do not make sense. The string gives some information + to the programmer. As a general rule, this exception should not + be caught, it denotes a programming error and the code should be + modified not to trigger it. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Failure of string +\end{ocamldoccode} +\index{Failure@\verb`Failure`} +\begin{ocamldocdescription} + Exception raised by library functions to signal that they are + undefined on the given arguments. The string is meant to give some + information to the programmer; you must \emph{not} pattern match on + the string literal because it may change in future versions (use + \verb`Failure _` instead). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Not_found +\end{ocamldoccode} +\index{Notfound@\verb`Not_found`} +\begin{ocamldocdescription} + Exception raised by search functions when the desired object + could not be found. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Out_of_memory +\end{ocamldoccode} +\index{Outofmemory@\verb`Out_of_memory`} +\begin{ocamldocdescription} + Exception raised by the garbage collector + when there is insufficient memory to complete the computation. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Stack_overflow +\end{ocamldoccode} +\index{Stackoverflow@\verb`Stack_overflow`} +\begin{ocamldocdescription} + Exception raised by the bytecode interpreter when the evaluation + stack reaches its maximal size. This often indicates infinite + or excessively deep recursion in the user's program. + (Not fully implemented by the native-code compiler; + see section~\ref{s:compat-native-bytecode}.) +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Sys_error of string +\end{ocamldoccode} +\index{Syserror@\verb`Sys_error`} +\begin{ocamldocdescription} + Exception raised by the input/output functions to report an + operating system error. The string is meant to give some + information to the programmer; you must \emph{not} pattern match on + the string literal because it may change in future versions (use + \verb`Sys_error _` instead). +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception End_of_file +\end{ocamldoccode} +\index{Endoffile@\verb`End_of_file`} +\begin{ocamldocdescription} + Exception raised by input functions to signal that the + end of file has been reached. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Division_by_zero +\end{ocamldoccode} +\index{Divisionbyzero@\verb`Division_by_zero`} +\begin{ocamldocdescription} + Exception raised by integer division and remainder operations + when their second argument is zero. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Sys_blocked_io +\end{ocamldoccode} +\index{Sysblockedio@\verb`Sys_blocked_io`} +\begin{ocamldocdescription} + A special case of "Sys_error" raised when no I/O is possible + on a non-blocking I/O channel. +\end{ocamldocdescription} + +\begin{ocamldoccode} +exception Undefined_recursive_module of (string * int * int) +\end{ocamldoccode} +\index{Undefinedrecursivemodule@\verb`Undefined_recursive_module`} +\begin{ocamldocdescription} + Exception raised when an ill-founded recursive module definition + is evaluated. (See section~\ref{s-recursive-modules}.) + The arguments are the location of the definition in the source code + (file name, line number, column number). +\end{ocamldocdescription} + diff --git a/manual/manual/library/compilerlibs.etex b/manual/manual/library/compilerlibs.etex new file mode 100644 index 00000000..db924cc5 --- /dev/null +++ b/manual/manual/library/compilerlibs.etex @@ -0,0 +1,74 @@ +\chapter{The compiler front-end} \label{c:parsinglib}\cutname{parsing.html} +\pdfchapterfold{-1}{The compiler front-end} + +This chapter describes the OCaml front-end, which declares the abstract +syntax tree used by the compiler, provides a way to parse, print +and pretty-print OCaml code, and ultimately allows to write abstract +syntax tree preprocessors invoked via the {\tt -ppx} flag (see chapters~\ref{c:camlc} +and~\ref{c:nativecomp}) and plugins invoked via the {\tt -plugin} flag +(see chapter~\ref{c:plugins}). + +It is important to note that the exported front-end interface follows the evolution of the OCaml language and implementation, and thus does not provide {\bf any} backwards compatibility guarantees. + +The front-end is a part of "compiler-libs" library. +Programs that use the "compiler-libs" library should be built as follows: +\begin{alltt} + ocamlfind ocamlc \var{other options} -package compiler-libs.common \var{other files} + ocamlfind ocamlopt \var{other options} -package compiler-libs.common \var{other files} +\end{alltt} +Use of the {\tt ocamlfind} utility is recommended. However, if this is not possible, an alternative method may be used: +\begin{alltt} + ocamlc \var{other options} -I +compiler-libs ocamlcommon.cma \var{other files} + ocamlopt \var{other options} -I +compiler-libs ocamlcommon.cmxa \var{other files} +\end{alltt} +For interactive use of the "compiler-libs" library, start "ocaml" and +type\\*"#load \"compiler-libs/ocamlcommon.cma\";;". + +% Some of the files below are commented out as the documentation is too poor +% or they are thought to be nonessential. + +\ifouthtml +\begin{links} +\item \ahref{libref/Ast\_helper.html}{Module \texttt{Ast_helper}: helper functions for AST construction} +\item \ahref{libref/Ast\_mapper.html}{Module \texttt{Ast_mapper}: -ppx rewriter interface} +\item \ahref{libref/Asttypes.html}{Module \texttt{Asttypes}: auxiliary types used by Parsetree} +% \item \ahref{libref/Lexer.html}{Module \texttt{Lexer}: OCaml syntax lexing} +\item \ahref{libref/Location.html}{Module \texttt{Location}: source code locations} +\item \ahref{libref/Longident.html}{Module \texttt{Longident}: long identifiers} +\item \ahref{libref/Parse.html}{Module \texttt{Parse}: OCaml syntax parsing} +\item \ahref{libref/Parsetree.html}{Module \texttt{Parsetree}: OCaml syntax tree} +\item \ahref{libref/Pprintast.html}{Module \texttt{Pprintast}: OCaml syntax printing} +% \item \ahref{libref/Printast.html}{Module \texttt{Printast}: AST printing} +\end{links} + +\else +% Ast_helper is excluded from the PDF and text manuals. +% It is over 20 pages long and does not have doc-comments. It is expected +% that Ast_helper will be only useful in the HTML manual (to look up signatures). +% \input{Asthelper.tex} +\input{Astmapper.tex} +\input{Asttypes.tex} +% \input{Lexer.tex} +\input{Location.tex} +\input{Longident.tex} +\input{Parse.tex} +\input{Parsetree.tex} +\input{Pprintast.tex} +% \input{Printast.tex} +\fi + +\ifouthtml +The following modules provides hooks for compiler plugins: +\begin{links} +\item \ahref{libref/Pparse.html}{Module \texttt{Pparse}: OCaml parser driver} +\item \ahref{libref/Typemod.html}{Module \texttt{Typemod}: +OCaml module type checking} +\item \ahref{libref/Simplif.html}{Module \texttt{Simplif}: Lambda simplification} +\item \ahref{libref/Clflags.html}{Module \texttt{Clflags}: command line flags} +\end{links} +\else +\input{Pparse.tex} +\input{Typemod.tex} +\input{Simplif.tex} +\input{Clflags.tex} +\fi diff --git a/manual/manual/library/core.etex b/manual/manual/library/core.etex new file mode 100644 index 00000000..58297eab --- /dev/null +++ b/manual/manual/library/core.etex @@ -0,0 +1,36 @@ +\chapter{The core library} \label{c:corelib}\cutname{core.html} +\pdfchapterfold{-1}{The core library} + +This chapter describes the OCaml core library, which is + composed of declarations for built-in types and exceptions, plus +the module "Pervasives" that provides basic operations on these + built-in types. The "Pervasives" module is special in two +ways: +\begin{itemize} +\item It is automatically linked with the user's object code files by +the "ocamlc" command (chapter~\ref{c:camlc}). + +\item It is automatically ``opened'' when a compilation starts, or +when the toplevel system is launched. Hence, it is possible to use +unqualified identifiers to refer to the functions provided by the +"Pervasives" module, without adding a "open Pervasives" directive. +\end{itemize} + +\section*{Conventions} + +The declarations of the built-in types and the components of module +"Pervasives" are printed one by one in typewriter font, followed by a +short comment. All library modules and the components they provide are +indexed at the end of this report. + +\input{builtin.tex} + +\ifouthtml +\section{Module {\tt Pervasives}: the initially opened module} +\begin{links} +\item \ahref{libref/Pervasives.html}{Module \texttt{Pervasives}: the initially opened module} +\end{links} +\else +\input{Pervasives.tex} +\fi + diff --git a/manual/manual/library/libbigarray.etex b/manual/manual/library/libbigarray.etex new file mode 100644 index 00000000..a11d0589 --- /dev/null +++ b/manual/manual/library/libbigarray.etex @@ -0,0 +1,37 @@ +\chapter{The bigarray library} +\pdfchapterfold{-1}{The bigarray library} +%HEVEA\cutname{libbigarray.html} + +The "bigarray" library has now been integrated into OCaml's standard +library. + +The "bigarray" functionality may now be found in the standard library +\ifouthtml + \ahref{libref/Bigarray.html}{\texttt{Bigarray} module}, +\else + \texttt{Bigarray} module, +\fi +except for the "map_file" function which is now +part of the \hyperref[c:unix]{Unix library}. The documentation has +been integrated into the documentation for the standard library. + +The legacy "bigarray" library bundled with the compiler is a +compatibility library with exactly the same interface as before, +i.e. with "map_file" included. + +We strongly recommend that you port your code to use the standard +library version instead, as the changes required are minimal. + +If you choose to use the compatibility library, you must link your +programs as follows: +\begin{alltt} + ocamlc \var{other options} bigarray.cma \var{other files} + ocamlopt \var{other options} bigarray.cmxa \var{other files} +\end{alltt} +For interactive use of the "bigarray" compatibility library, do: +\begin{alltt} + ocamlmktop -o mytop bigarray.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"bigarray.cma\";;". diff --git a/manual/manual/library/libdynlink.etex b/manual/manual/library/libdynlink.etex new file mode 100644 index 00000000..2d4d92d9 --- /dev/null +++ b/manual/manual/library/libdynlink.etex @@ -0,0 +1,29 @@ +\chapter{The dynlink library: dynamic loading and linking of object files} +\pdfchapterfold{-1}{The dynlink library: dynamic loading and linking of object files} +%HEVEA\cutname{libdynlink.html} + +The "dynlink" library supports type-safe dynamic loading and linking +of bytecode object files (".cmo" and ".cma" files) in a running +bytecode program, or of native plugins (usually ".cmxs" files) in a +running native program. Type safety is ensured by limiting the set of +modules from the running program that the loaded object file can +access, and checking that the running program and the loaded object +file have been compiled against the same interfaces for these modules. +In native code, there are also some compatibility checks on the +implementations (to avoid errors with cross-module optimizations); it +might be useful to hide ".cmx" files when building native plugins so +that they remain independent of the implementation of modules in the +main program. + +Programs that use the "dynlink" library simply need to link +"dynlink.cma" or "dynlink.cmxa" with their object files and other libraries. + +\ifouthtml +\begin{links} +\item \ahref{libref/Dynlink.html}{Module \texttt{Dynlink}: dynamic loading of bytecode object files} +\end{links} + +\else +\input{Dynlink.tex} +\fi + diff --git a/manual/manual/library/libgraph.etex b/manual/manual/library/libgraph.etex new file mode 100644 index 00000000..28759f5b --- /dev/null +++ b/manual/manual/library/libgraph.etex @@ -0,0 +1,100 @@ +\chapter{The graphics library} +\pdfchapterfold{-1}{The graphics library} +%HEVEA\cutname{libgraph.html} + +The "graphics" library provides a set of portable drawing primitives. +Drawing takes place +in a separate window that is created when "Graphics.open_graph" is called. + +\begin{unix} +This library is implemented under the X11 windows system. +Programs that use the "graphics" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} graphics.cma \var{other files} +\end{alltt} +For interactive use of the "graphics" library, do: +\begin{alltt} + ocamlmktop -o mytop graphics.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"graphics.cma\";;". + +Here are the graphics mode specifications supported by +"Graphics.open_graph" on +the X11 implementation of this library: +the argument to "Graphics.open_graph" has the format +"\""{\it display-name} {\it geometry\/}"\"", +where {\it display-name} is the name of the X-windows display to +connect to, and {\it geometry} is a standard X-windows geometry +specification. The two components are separated by a space. Either can +be omitted, or both. Examples: +\begin{options} +\item["Graphics.open_graph \"foo:0\""] +connects to the display "foo:0" and creates a window with the default geometry +\item["Graphics.open_graph \"foo:0 300x100+50-0\""] +connects to the display "foo:0" and creates a window 300 pixels wide +by 100 pixels tall, at location $(50,0)$ +\item["Graphics.open_graph \" 300x100+50-0\""] +connects to the default display and creates a window 300 pixels wide +by 100 pixels tall, at location $(50,0)$ +\item["Graphics.open_graph \"\""] +connects to the default display and creates a window with the default +geometry. +\end{options} +\end{unix} + +\begin{windows} +This library is available both for standalone compiled programs and +under the toplevel application "ocamlwin.exe". For the latter, this +library must be loaded in-core by typing +\begin{verbatim} + #load "graphics.cma";; +\end{verbatim} +\end{windows} + +The screen coordinates are interpreted as shown in the figure below. +Notice that the coordinate system used is the same as in mathematics: +$y$ increases from the bottom of the screen to the top of the screen, +and angles are measured counterclockwise (in degrees). +Drawing is clipped to the screen. +% +\begin{latexonly} +\begin{center} +\setlength{\unitlength}{0.5mm} +\begin{picture}(130,100)(-10,-10) +\thicklines +\put(-10,0){\vector(1,0){130}} +\put(125,0){\makebox(0,0)[l]{$x$}} +\put(0,-10){\vector(0,1){100}} +\put(0,95){\makebox(0,0){$y$}} +\thinlines +\put(100,80){\line(-1,0){105}} +\put(100,80){\line(0,-1){85}} +\put(95,75){\makebox(0,0)[tr]{Screen}} +\put(100,-10){\makebox(0,0){\tt size\_x()}} +\put(-10,80){\makebox(0,0)[r]{\tt size\_y()}} +\put(30,40){\makebox(4,4){\rule{2mm}{2mm}}} +\put(36,40){pixel at $(x,y)$} +\put(30,40){\line(-1,0){35}} +\put(30,-10){\makebox(0,0){$x$}} +\put(30,40){\line(0,-1){45}} +\put(-10,40){\makebox(0,0)[r]{$y$}} +\end{picture} +\end{center} +\end{latexonly} + +\begin{htmlonly} +\begin{center} +\imgsrc{libgraph.gif} +\end{center} +\end{htmlonly} +% + +\ifouthtml +\begin{links} +\item \ahref{libref/Graphics.html}{Module \texttt{Graphics}: machine-independent graphics primitives} +\end{links} +\else +\input{Graphics.tex} +\fi diff --git a/manual/manual/library/libgraph.fig b/manual/manual/library/libgraph.fig new file mode 100644 index 00000000..55a6d1de --- /dev/null +++ b/manual/manual/library/libgraph.fig @@ -0,0 +1,29 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +2 1 0 1 0 7 0 0 -1 0.000 0 0 7 1 0 2 + 1 1 1.00 60.00 120.00 + 1050 3375 4575 3375 +2 1 0 1 0 7 0 0 -1 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 1200 3525 1200 825 +2 1 0 1 0 7 0 0 -1 0.000 0 0 7 0 0 3 + 1125 1200 3750 1200 3750 3450 +2 1 0 1 0 7 0 0 -1 0.000 0 0 -1 0 0 3 + 1125 2400 2475 2400 2475 3450 +2 2 0 1 0 0 0 0 20 0.000 0 0 7 0 0 5 + 2475 2400 2550 2400 2550 2325 2475 2325 2475 2400 +4 0 0 0 0 0 12 0.0000 4 135 525 2325 1500 Screen\001 +4 0 0 0 0 0 12 0.0000 4 180 990 2175 2250 point at (x,y)\001 +4 0 0 0 0 0 12 0.0000 4 90 90 2400 3600 x\001 +4 0 0 0 0 0 12 0.0000 4 135 90 975 2475 y\001 +4 0 0 0 0 0 12 0.0000 4 180 450 1050 750 y axis\001 +4 0 0 0 0 14 12 0.0000 4 180 840 225 1200 size_y()\001 +4 0 0 0 0 14 12 0.0000 4 165 840 3375 3600 size_x()\001 +4 0 0 0 0 0 12 0.0000 4 135 450 4650 3375 x axis\001 diff --git a/manual/manual/library/libgraph.png b/manual/manual/library/libgraph.png new file mode 100644 index 00000000..5841bfc8 Binary files /dev/null and b/manual/manual/library/libgraph.png differ diff --git a/manual/manual/library/libnum.etex b/manual/manual/library/libnum.etex new file mode 100644 index 00000000..5e53a196 --- /dev/null +++ b/manual/manual/library/libnum.etex @@ -0,0 +1,14 @@ +\chapter{The num library: arbitrary-precision rational arithmetic} +\pdfchapterfold{-3}{The num library: arbitrary-precision integer and rational arithmetic} +%HEVEA\cutname{libnum.html} + +The "num" library implements integer arithmetic and rational +arithmetic in arbitrary precision. It was split off the core +OCaml distribution starting with the 4.06.0 release, and can now be found +at \url{https://github.com/ocaml/num}. + +New applications that need arbitrary-precision arithmetic should use the +"Zarith" library (\url{https://github.com/ocaml/Zarith}) instead of the "Num" +library, and older applications that already use "Num" are encouraged to +switch to "Zarith". "Zarith" delivers much better performance than "Num" +and has a nicer API. diff --git a/manual/manual/library/libstr.etex b/manual/manual/library/libstr.etex new file mode 100644 index 00000000..a1939f8a --- /dev/null +++ b/manual/manual/library/libstr.etex @@ -0,0 +1,32 @@ +\chapter{The str library: regular expressions and string processing} +\pdfchapterfold{-1}{The str library: regular expressions and string processing} +%HEVEA\cutname{libstr.html} + +The "str" library provides high-level string processing functions, +some based on regular expressions. It is intended to support the kind +of file processing that is usually performed with scripting languages +such as "awk", "perl" or "sed". + +Programs that use the "str" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} str.cma \var{other files} + ocamlopt \var{other options} str.cmxa \var{other files} +\end{alltt} +For interactive use of the "str" library, do: +\begin{alltt} + ocamlmktop -o mytop str.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"str.cma\";;". + +\ifouthtml +\begin{links} +\item \ahref{libref/Str.html}{Module \texttt{Str}: regular expressions and string processing} +\end{links} + +\else +\input{Str.tex} +\fi + + diff --git a/manual/manual/library/libthreads.etex b/manual/manual/library/libthreads.etex new file mode 100644 index 00000000..795373c8 --- /dev/null +++ b/manual/manual/library/libthreads.etex @@ -0,0 +1,60 @@ +\chapter{The threads library} +\label{c:threads}\cutname{threads.html} +\pdfchapterfold{-5}{The threads library} +%HEVEA\cutname{libthreads.html} + +The "threads" library allows concurrent programming in OCaml. +It provides multiple threads of control (also called lightweight +processes) that execute concurrently in the same memory space. Threads +communicate by in-place modification of shared data structures, or by +sending and receiving data on communication channels. + +The "threads" library is implemented by time-sharing on a single +processor. It will not take advantage of multi-processor machines. +Using this library will therefore never make programs run +faster. However, many programs are easier to write when structured as +several communicating processes. + +Two implementations of the "threads" library are available, depending +on the capabilities of the operating system: +\begin{itemize} +\item System threads. This implementation builds on the OS-provided threads +facilities: POSIX 1003.1c threads for Unix, and Win32 threads for +Windows. When available, system threads support both bytecode and +native-code programs. +\item VM-level threads. This implementation performs time-sharing and +context switching at the level of the OCaml virtual machine (bytecode +interpreter). It is available on Unix systems, and supports only +bytecode programs. It cannot be used with native-code programs. +\end{itemize} +Programs that use system threads must be linked as follows: +\begin{alltt} + ocamlc -I +threads \var{other options} unix.cma threads.cma \var{other files} + ocamlopt -I +threads \var{other options} unix.cmxa threads.cmxa \var{other files} +\end{alltt} +Compilation units that use the "threads" library must also be compiled with +the "-I +threads" option (see chapter~\ref{c:camlc}). + +Programs that use VM-level threads must be compiled with the "-vmthread" +option to "ocamlc" (see chapter~\ref{c:camlc}), and be linked as follows: +\begin{alltt} + ocamlc -vmthread \var{other options} threads.cma \var{other files} +\end{alltt} +Compilation units that use "threads" library must also be compiled with +the "-vmthread" option (see chapter~\ref{c:camlc}). + +\ifouthtml +\begin{links} +\item \ahref{libref/Thread.html}{Module \texttt{Thread}: lightweight threads} +\item \ahref{libref/Mutex.html}{Module \texttt{Mutex}: locks for mutual exclusion} +\item \ahref{libref/Condition.html}{Module \texttt{Condition}: condition variables to synchronize between threads} +\item \ahref{libref/Event.html}{Module \texttt{Event}: first-class synchronous communication} +\item \ahref{libref/ThreadUnix.html}{Module \texttt{ThreadUnix}: thread-compatible system calls} +\end{links} +\else +\input{Thread.tex} +\input{Mutex.tex} +\input{Condition.tex} +\input{Event.tex} +\input{ThreadUnix.tex} +\fi diff --git a/manual/manual/library/libunix.etex b/manual/manual/library/libunix.etex new file mode 100644 index 00000000..6514a9f1 --- /dev/null +++ b/manual/manual/library/libunix.etex @@ -0,0 +1,98 @@ +\chapter{The unix library: Unix system calls} +\pdfchapterfold{-1}{The unix library: Unix system calls} +%HEVEA\cutname{libunix.html} +\label{c:unix} + +The "unix" library makes many Unix +system calls and system-related library functions available to +OCaml programs. This chapter describes briefly the functions +provided. Refer to sections 2~and~3 of the Unix manual for more +details on the behavior of these functions. + +\ifouthtml +\begin{links} +\item \ahref{libref/Unix.html}{Module \texttt{Unix}: Unix system calls} +\item \ahref{libref/UnixLabels.html}{Module \texttt{UnixLabels}: Labeled + Unix system calls} +\end{links} +\fi + +Not all functions are provided by all Unix variants. If some functions +are not available, they will raise "Invalid_arg" when called. + +Programs that use the "unix" library must be linked as follows: +\begin{alltt} + ocamlc \var{other options} unix.cma \var{other files} + ocamlopt \var{other options} unix.cmxa \var{other files} +\end{alltt} +For interactive use of the "unix" library, do: +\begin{alltt} + ocamlmktop -o mytop unix.cma + ./mytop +\end{alltt} +or (if dynamic linking of C libraries is supported on your platform), +start "ocaml" and type "#load \"unix.cma\";;". + +\begin{windows} +A fairly complete emulation of the Unix system calls is provided in +the Windows version of OCaml. The end of this chapter gives +more information on the functions that are not supported under Windows. +\end{windows} + +\begin{latexonly} +\input{Unix.tex} + +\section{Module \texttt{UnixLabels}: labelized version of the interface} +\label{UnixLabels} +\index{UnixLabels (module)@\verb~UnixLabels~ (module)}% +\pdfsection{Module UnixLabels: labelized version of the interface} + +This module is identical to "Unix"~(\ref{Unix}), and only differs by +the addition of labels. You may see these labels directly by looking +at "unixLabels.mli", or by using the "ocamlbrowser" tool. + +\newpage +\end{latexonly} + +\begin{windows} +The Cygwin port of OCaml fully implements all functions from +the Unix module. The native Win32 ports implement a subset of them. +Below is a list of the functions that are not implemented, or only +partially implemented, by the Win32 ports. Functions not mentioned are +fully implemented and behave as described previously in this chapter. + +\begin{tableau}{|l|p{8cm}|}{Functions}{Comment} +\entree{"fork"}{not implemented, use "create_process" or threads} +\entree{"wait"}{not implemented, use "waitpid"} +\entree{"waitpid"}{can only wait for a given PID, not any child process} +\entree{"getppid"}{not implemented (meaningless under Windows)} +\entree{"nice"}{not implemented} +\entree{"truncate", "ftruncate"}{not implemented} +\entree{"link"}{implemented (since 3.02)} +\entree{"symlink", "readlink"}{implemented (since 4.03.0)} +\entree{"access"}{execute permission "X_OK" cannot be tested, + it just tests for read permission instead} +\entree{"fchmod"}{not implemented} +\entree{"chown", "fchown"}{not implemented (make no sense on a DOS +file system)} +\entree{"umask"}{not implemented} +\entree{"mkfifo"}{not implemented} +\entree{"kill"}{partially implemented (since 4.00.0): only the "sigkill" signal +is implemented} +\entree{"pause"}{not implemented (no inter-process signals in Windows)} +\entree{"alarm"}{not implemented} +\entree{"times"}{partially implemented, will not report timings for child +processes} +\entree{"getitimer", "setitimer"}{not implemented} +\entree{"getuid", "geteuid", "getgid", "getegid"}{always return 1} +\entree{"getgroups"}{always returns "[|1|]" (since 2.00)} +\entree{"setuid", "setgid", "setgroups"}{not implemented} +\entree{"getpwnam", "getpwuid"}{always raise "Not_found"} +\entree{"getgrnam", "getgrgid"}{always raise "Not_found"} +\entree{type "socket_domain"}{"PF_INET" is fully supported; +"PF_INET6" is fully supported (since 4.01.0); "PF_UNIX" is not supported } +\entree{"establish_server"}{not implemented; use threads} +\entree{terminal functions ("tc*")}{not implemented} +\end{tableau} + +\end{windows} diff --git a/manual/manual/library/stdlib.etex b/manual/manual/library/stdlib.etex new file mode 100644 index 00000000..fb0e5578 --- /dev/null +++ b/manual/manual/library/stdlib.etex @@ -0,0 +1,200 @@ +\chapter{The standard library} \label{c:stdlib}\cutname{stdlib.html} +\pdfchapterfold{-32}{The standard library} + +This chapter describes the functions provided by the OCaml +standard library. The modules from the standard library are +automatically linked with the user's object code files by the "ocamlc" +command. Hence, these modules can be used in standalone programs without +having to add any ".cmo" file on the command line for the linking +phase. Similarly, in interactive use, these globals can be used in +toplevel phrases without having to load any ".cmo" file in memory. + +Unlike the "Pervasives" module from the core library, the modules from the +standard library are not automatically ``opened'' when a compilation +starts, or when the toplevel system is launched. Hence it is necessary +to use qualified identifiers to refer to the functions provided by these +modules, or to add "open" directives. + +\label{stdlib:top} + +\section*{Conventions} + +For easy reference, the modules are listed below in alphabetical order +of module names. +For each module, the declarations from its signature are printed +one by one in typewriter font, followed by a short comment. +All modules and the identifiers they export are indexed at the end of +this report. + +\begin{latexonly} +\section*{Overview} + +Here is a short listing, by theme, of the standard library modules. + +\subsubsection*{Data structures:} +\begin{tabular}{lll} +% Beware: these entries must be written in a very rigidly-defined +% format, or the check-stdlib-modules script will complain. +"String" & p.~\pageref{String} & string operations \\ +"Bytes" & p.~\pageref{Bytes} & operations on byte sequences\\ +"Array" & p.~\pageref{Array} & array operations \\ +"List" & p.~\pageref{List} & list operations \\ +"StdLabels" & p.~\pageref{StdLabels} & labelized versions of +the above 4 modules \\ +"Char" & p.~\pageref{Char} & character operations \\ +"Uchar" & p.~\pageref{Uchar} & Unicode characters \\ +"Sort" & p.~\pageref{Sort} & (deprecated) \\ +"Hashtbl" & p.~\pageref{Hashtbl} & hash tables and hash functions \\ +"Random" & p.~\pageref{Random} & pseudo-random number generator \\ +"Set" & p.~\pageref{Set} & sets over ordered types \\ +"Map" & p.~\pageref{Map} & association tables over ordered types \\ +"MoreLabels" & p.~\pageref{MoreLabels} & labelized versions of +"Hashtbl", "Set", and "Map" \\ +"Oo" & p.~\pageref{Oo} & useful functions on objects \\ +"Stack" & p.~\pageref{Stack} & last-in first-out stacks \\ +"Queue" & p.~\pageref{Queue} & first-in first-out queues \\ +"Buffer" & p.~\pageref{Buffer} & buffers that grow on demand \\ +"Seq" & p.~\pageref{Seq} & functional iterators \\ +"Lazy" & p.~\pageref{Lazy} & delayed evaluation \\ +"Weak" & p.~\pageref{Weak} & references that don't prevent objects +from being garbage-collected \\ +"Ephemeron" & p.~\pageref{Ephemeron} & ephemerons and weak hash tables \\ +"Bigarray" & p.~\pageref{Bigarray} & large, multi-dimensional, numerical arrays +\end{tabular} +\subsubsection*{Arithmetic:} +\begin{tabular}{lll} +"Complex" & p.~\pageref{Complex} & Complex numbers \\ +"Float" & p.~\pageref{Float} & Floating-point numbers \\ +"Int32" & p.~\pageref{Int32} & operations on 32-bit integers \\ +"Int64" & p.~\pageref{Int64} & operations on 64-bit integers \\ +"Nativeint" & p.~\pageref{Nativeint} & operations on platform-native +integers +\end{tabular} +\subsubsection{Input/output:} +\begin{tabular}{lll} +"Format" & p.~\pageref{Format} & pretty printing with automatic +indentation and line breaking \\ +"Marshal" & p.~\pageref{Marshal} & marshaling of data structures \\ +"Printf" & p.~\pageref{Printf} & formatting printing functions \\ +"Scanf" & p.~\pageref{Scanf} & formatted input functions \\ +"Digest" & p.~\pageref{Digest} & MD5 message digest \\ +\end{tabular} +\subsubsection{Parsing:} +\begin{tabular}{lll} +"Genlex" & p.~\pageref{Genlex} & a generic lexer over streams \\ +"Lexing" & p.~\pageref{Lexing} & the run-time library for lexers generated by "ocamllex" \\ +"Parsing" & p.~\pageref{Parsing} & the run-time library for parsers generated by "ocamlyacc" \\ +"Stream" & p.~\pageref{Stream} & basic functions over streams \\ +\end{tabular} +\subsubsection{System interface:} +\begin{tabular}{lll} +"Arg" & p.~\pageref{Arg} & parsing of command line arguments \\ +"Callback" & p.~\pageref{Callback} & registering OCaml functions to +be called from C \\ +"Filename" & p.~\pageref{Filename} & operations on file names \\ +"Gc" & p.~\pageref{Gc} & memory management control and statistics \\ +"Printexc" & p.~\pageref{Printexc} & a catch-all exception handler \\ +"Spacetime" & p.~\pageref{Spacetime} & memory profiler \\ +"Sys" & p.~\pageref{Sys} & system interface \\ +\end{tabular} +\end{latexonly} + +\ifouthtml +\begin{links} +\item \ahref{libref/Arg.html}{Module \texttt{Arg}: parsing of command line arguments} +\item \ahref{libref/Array.html}{Module \texttt{Array}: array operations} +\item \ahref{libref/ArrayLabels.html}{Module \texttt{ArrayLabels}: array operations (with labels)} +\item \ahref{libref/Bigarray.html}{Module \texttt{Bigarray}: large, multi-dimensional, numerical arrays} +\item \ahref{libref/Buffer.html}{Module \texttt{Buffer}: extensible buffers} +\item \ahref{libref/Bytes.html}{Module \texttt{Bytes}: byte sequences} +\item \ahref{libref/BytesLabels.html}{Module \texttt{BytesLabels}: byte sequences (with labels)} +\item \ahref{libref/Callback.html}{Module \texttt{Callback}: registering OCaml values with the C runtime} +\item \ahref{libref/Char.html}{Module \texttt{Char}: character operations} +\item \ahref{libref/Complex.html}{Module \texttt{Complex}: Complex numbers} +\item \ahref{libref/Digest.html}{Module \texttt{Digest}: MD5 message digest} +\item \ahref{libref/Ephemeron.html}{Module \texttt{Ephemeron}: Ephemerons and weak hash table} +\item \ahref{libref/Filename.html}{Module \texttt{Filename}: operations on file names} +\item \ahref{libref/Float.html}{Module \texttt{Float}: Floating-point numbers} +\item \ahref{libref/Format.html}{Module \texttt{Format}: pretty printing} +\item \ahref{libref/Gc.html}{Module \texttt{Gc}: memory management control and statistics; finalized values} +\item \ahref{libref/Genlex.html}{Module \texttt{Genlex}: a generic lexical analyzer} +\item \ahref{libref/Hashtbl.html}{Module \texttt{Hashtbl}: hash tables and hash functions} +\item \ahref{libref/Int32.html}{Module \texttt{Int32}: 32-bit integers} +\item \ahref{libref/Int64.html}{Module \texttt{Int64}: 64-bit integers} +\item \ahref{libref/Lazy.html}{Module \texttt{Lazy}: deferred computations} +\item \ahref{libref/Lexing.html}{Module \texttt{Lexing}: the run-time library for lexers generated by \texttt{ocamllex}} +\item \ahref{libref/List.html}{Module \texttt{List}: list operations} +\item \ahref{libref/ListLabels.html}{Module \texttt{ListLabels}: list operations (with labels)} +\item \ahref{libref/Map.html}{Module \texttt{Map}: association tables over ordered types} +\item \ahref{libref/Marshal.html}{Module \texttt{Marshal}: marshaling of data structures} +\item \ahref{libref/MoreLabels.html}{Module \texttt{MoreLabels}: Include modules \texttt{Hashtbl}, \texttt{Map} and \texttt{Set} with labels} +\item \ahref{libref/Nativeint.html}{Module \texttt{Nativeint}: processor-native integers} +\item \ahref{libref/Oo.html}{Module \texttt{Oo}: object-oriented extension} +\item \ahref{libref/Parsing.html}{Module \texttt{Parsing}: the run-time library for parsers generated by \texttt{ocamlyacc}} +\item \ahref{libref/Printexc.html}{Module \texttt{Printexc}: facilities for printing exceptions} +\item \ahref{libref/Printf.html}{Module \texttt{Printf}: formatting printing functions} +\item \ahref{libref/Queue.html}{Module \texttt{Queue}: first-in first-out queues} +\item \ahref{libref/Random.html}{Module \texttt{Random}: pseudo-random number generator (PRNG)} +\item \ahref{libref/Scanf.html}{Module \texttt{Scanf}: formatted input functions} +\item \ahref{libref/Seq.html}{Module \texttt{Seq}: functional iterators} +\item \ahref{libref/Set.html}{Module \texttt{Set}: sets over ordered types} +\item \ahref{libref/Sort.html}{Module \texttt{Sort}: deprecated} +\item \ahref{libref/Spacetime.html}{Module \texttt{Spacetime}: memory profiler} +\item \ahref{libref/Stack.html}{Module \texttt{Stack}: last-in first-out stacks} +\item \ahref{libref/StdLabels.html}{Module \texttt{StdLabels}: Include modules \texttt{Array}, \texttt{List} and \texttt{String} with labels} +\item \ahref{libref/Stream.html}{Module \texttt{Stream}: streams and parsers} +\item \ahref{libref/String.html}{Module \texttt{String}: string operations} +\item \ahref{libref/StringLabels.html}{Module \texttt{StringLabels}: string operations (with labels)} +\item \ahref{libref/Sys.html}{Module \texttt{Sys}: system interface} +\item \ahref{libref/Uchar.html}{Module \texttt{Uchar}: Unicode characters} +\item \ahref{libref/Weak.html}{Module \texttt{Weak}: arrays of weak pointers} +\end{links} +\else +\input{Arg.tex} +\input{Array.tex} +\input{ArrayLabels.tex} +\input{Bigarray.tex} +\input{Buffer.tex} +\input{Bytes.tex} +\input{BytesLabels.tex} +\input{Callback.tex} +\input{Char.tex} +\input{Complex.tex} +\input{Digest.tex} +\input{Ephemeron.tex} +\input{Filename.tex} +\input{Float.tex} +\input{Format.tex} +\input{Gc.tex} +\input{Genlex.tex} +\input{Hashtbl.tex} +\input{Int32.tex} +\input{Int64.tex} +\input{Lazy.tex} +\input{Lexing.tex} +\input{List.tex} +\input{ListLabels.tex} +\input{Map.tex} +\input{Marshal.tex} +\input{MoreLabels.tex} +\input{Nativeint.tex} +\input{Oo.tex} +\input{Parsing.tex} +\input{Printexc.tex} +\input{Printf.tex} +\input{Queue.tex} +\input{Random.tex} +\input{Scanf.tex} +\input{Seq.tex} +\input{Set.tex} +\input{Sort.tex} +\input{Spacetime.tex} +\input{Stack.tex} +\input{StdLabels.tex} +\input{Stream.tex} +\input{String.tex} +\input{StringLabels.tex} +\input{Sys.tex} +\input{Uchar.tex} +\input{Weak.tex} +\fi diff --git a/manual/manual/macros.hva b/manual/manual/macros.hva new file mode 100644 index 00000000..343eb7d6 --- /dev/null +++ b/manual/manual/macros.hva @@ -0,0 +1,200 @@ +% Colors for links +\def\visited@color{\#0d46a3} +\def\link@color{\#4286f4} +\def\hover@color{\@getstylecolor{subsection}} +\newstyle{a:link}{color:\link@color;text-decoration:underline;} +\newstyle{a:visited}{color:\visited@color;text-decoration:underline;} +\newstyle{a:hover}{color:black;text-decoration:underline;background-color:\hover@color} + + +\newstyle{@media all}{@font-face \{ +/* fira-sans-regular - latin */ + font-family: 'Fira Sans'; + font-style: normal; + font-weight: 400; + src: url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.eot'); /* IE9 Compat Modes */ + src: local('Fira Sans Regular'), local('FiraSans-Regular'), + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.eot?\#iefix') format('embedded-opentype'), /* IE6-IE8 */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.woff2') format('woff2'), /* Super Modern Browsers */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.woff') format('woff'), /* Modern Browsers */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.ttf') format('truetype'), /* Safari, Android, iOS */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.svg\#FiraSans') format('svg'); /* Legacy iOS */ +\}} + +% Compact layout +\newstyle{body}{ + max-width:750px; + width: 85\%; + margin: auto; + background: \#f7f7f7; + margin-top: 80px; + font-size: 1rem; +} + +% selects the index's title +\newstyle{.maintitle}{ + font-family: "Fira Sans", sans-serif; + text-align: center; +} + +\newstyle{h1, h2, h3}{ + font-family: "Fira Sans", sans-serif; + font-weight: normal; + border-bottom: 1px solid black; +} + +\newstyle{pre}{ + font-size: 1rem; + background: beige; + border: 1px solid grey; + padding: 10px; + overflow-y:auto; + white-space: pre-wrap; +} + +% More spacing between lines and inside tables +\newstyle{p,ul}{line-height:1.3em} +\newstyle{.cellpadding1 tr td}{padding:1px 4px} + +%Styles for caml-example and friends +\newstyle{div.caml-output}{color:maroon;} +\newstyle{div.caml-example pre}{margin:2ex 0px;} +% Styles for toplevel mode only +\newstyle{div.caml-example.toplevel div.caml-input::before} + {content:"\#"; color:black;} +\newstyle{div.caml-example.toplevel div.caml-input}{color:\#006000;} +%%% +\newcommand{\input@color}{\htmlcolor{006000}} +\newcommand{\output@color}{\maroon} +\newcommand{\machine}{\tt} +\newenvironment{machineenv}{\begin{alltt}}{\end{alltt}} +\newcommand{\firstline}{\ } +\newcommand{\examplespace}{\ } +\newcommand{\nextline}{\examplespace\ } +\newcommand{\@zyva}{\firstline\renewcommand{\?}{\nextline}} +\let\?=\@zyva +\newenvironment{camlunder}{\@style{U}}{} +\newcommand{\caml}{\begin{alltt}\renewcommand{\;}{}\renewcommand{\\}{\char92}\def\<{\begin{camlunder}}\def\>{\end{camlunder}}\activebracefalse} +\newcommand{\endcaml}{\activebracetrue\end{alltt} +} +\renewcommand{\:}{\renewcommand{\?}{\@zyva}} +\newcommand{\var}[1]{\textit{#1}} + +% Caml-example environment +\newcommand{\camlexample}[1]{ + \ifthenelse{\equal{#1}{toplevel}} + {\renewcommand{\examplespace}{\ }} + {\renewcommand{\examplespace}{}} + \fi + \@open{div}{class="caml-example #1"} +} +\newcommand{\endcamlexample}{ + \@close{div} + \renewcommand{\examplespace}{\ } +} + +\newcommand{\camlinput}{\@open{div}{class="caml-input"}} +\newcommand{\endcamlinput}{\@close{div}} +\newcommand{\camloutput}{\@open{div}{class="caml-output ok"}} +\newcommand{\endcamloutput}{\@close{div}} +\newcommand{\camlerror}{\@open{div}{class="caml-output error"}} +\newcommand{\endcamlerror}{\@close{div}} +\newcommand{\camlwarn}{\@open{div}{class="caml-output warn"}} +\newcommand{\endcamlwarn}{\@close{div}} + +\newenvironment{library}{}{} +\newcounter{page} +\newenvironment{comment}{\begin{quote}}{\end{quote}} +\newcommand{\nth}[2]{\({#1}_{#2}\)} +\newenvironment{options}{\begin{description}}{\end{description}} + + +%%venant de macros.tex + +\def\versionspecific#1{\begin{quote}\textsf{#1:}\quad} +\def\unix{\versionspecific{Unix}} +\def\endunix{\end{quote}} +\def\macos{\versionspecific{MacOS~9}} +\def\endmacos{\end{quote}} +\def\windows{\versionspecific{Windows}} +\def\endwindows{\end{quote}} + +\def\requirements{\trivlist \item[\hskip\labelsep {\bf Requirements.}]} +\def\endrequirements{\endtrivlist} +\def\installation{\trivlist \item[\hskip\labelsep {\bf Installation.}]} +\def\endinstallation{\endtrivlist} +\def\troubleshooting{\trivlist \item[\hskip\labelsep {\bf Troubleshooting.}]} +\def\endtroubleshooting{\endtrivlist} + +\newtheorem{gcrule}{Rule} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +\def\entree#1#2{#1 & #2 \\} +\def\tableau#1#2#3{% +\par +\@open{div}{class="tableau"} +\begin{center}% +\begin{tabular*}{.8\linewidth}{#1}% +\multicolumn{1}{c}{\textbf{#2}} & +\multicolumn{1}{c}{\textbf{#3}} \\ +%%#2 & #3 \\% +}% +\def\endtableau{\end{tabular*}\end{center}\@close{div}\par} + +\newstyle{.tableau, .syntax, .syntaxleft}{ + /* same width as body */ + max-width: 750px; + overflow-y: auto; +} + +% L'environnement library (pour composer les descriptions des modules +% de bibliotheque). + + +\def\restoreindent{\begingroup\let\@listI=\@savedlistI} +\def\endrestoreindent{\endgroup} + + +% PDF stuff + +\def\pdfchapterfold#1#2{} +\def\pdfsection#1{} +\def\pdfchapter{\pdfchapterfold{0}} + +%%% Pour camlidl + +\def\transl#1{$[\![\mbox{#1}]\!]$} + +% Pour l'index +\usepackage{multind} +\let\indexentry=\index +\renewcommand{\index}[1]{\indexentry{\jobname}{#1}} +\def\ikwd#1{\indexentry{\jobname.kwd}{#1}} +% nth + +\def\th{^{\mbox{\scriptsize th}}} +\renewcommand{\hbox}[1]{\mbox{#1}} + +% Notations pour les metavariables +\def\nmth#1#2#3{\({#1}_{#2}^{#3}\)} +\def\optvar#1{[\var{#1}\/]} +\def\event{§§} +\def\fromoneto#1#2{$#1 = 1,\ldots{} , #2$} + +\newcommand{\vfill}{} +\def\number{} +\def\year{2013} + +% Pour alltt +\def\rminalltt#1{{\rm #1}} +\def\goodbreak{\ \\} +\def\@savedlistI{} + +%List of links with no space around items +\newstyle{.li-links}{margin:0ex 0ex;} +\newenvironment{links} +{\setenvclass{itemize}{ftoc2}\setenvclass{li-itemize}{li-links}\itemize} +{\enditemize} + +\newenvironment{maintitle}{\@open{div}{class="maintitle"}}{\@close{div}} diff --git a/manual/manual/macros.tex b/manual/manual/macros.tex new file mode 100644 index 00000000..454bff2a --- /dev/null +++ b/manual/manual/macros.tex @@ -0,0 +1,252 @@ +\makeatletter +% Pour hevea +\newif\ifouthtml\outhtmlfalse +\newcommand{\cutname}[1]{} +% Notations pour les metavariables +\def\var#1{{\it#1}} +\def\nth#1#2{${\it#1}_{#2}$} +\def\nmth#1#2#3{${\it#1}_{#2}^{#3}$} +\def\optvar#1{\textrm{[}\var{#1}\/\textrm{]}} +\def\event{$\bowtie$} +\def\fromoneto#1#2{$#1 = 1, \ldots, #2$} + +% Pour avoir les exposants sur la ligne au-dessus (???) + +\ifplaintext +\fontdimen14\tensy=12pt +\fi + +% Numerotation +\setcounter{secnumdepth}{2} % Pour numeroter les \subsection +\setcounter{tocdepth}{1} % Pour ne pas mettre les \subsection + % dans la table des matieres + +% Pour avoir "_" qui marche en mode math et en mode normal +\catcode`\_=13 +\catcode`\=8 +\def\_{\hbox{\tt\char95}} +\def_{\ifmmode\else\_\fi} + +\ifplaintext +\def\ttstretch{\tt} +\else +\def\ttstretch{\tt\spaceskip=5.77pt plus 1.83pt minus 1.22pt} +% La fonte cmr10 a normalement des espaces de 5.25pt non extensibles. +% En 11 pt ca fait 5.77 pt. On lui ajoute la meme flexibilite que +% cmr10 agrandie a 11 pt. +\fi + +% Pour la traduction "xxxx" -> {\machine{xxxx}} faite par texquote2 +\def\machine#1{\mbox{\ttstretch{#1}}} + +% Pour la traduction "\begin{verbatim}...\end{verbatim}" +% -> "\begin{machineenv}...\end{machineenv}" +% faite aussi par texquote2. +\newenvironment{machineenv}{\alltt}{\endalltt} + +% Environnements + +\newlength{\versionwidth} +\setbox0=\hbox{\bf Windows:} \setlength{\versionwidth}{\wd0} + +\def\versionspecific#1{ + \begin{description}\item[#1:]~\\} + +\def\unix{\versionspecific{Unix}} +\def\endunix{\end{description}} +%\def\macos{\versionspecific{MacOS 9}} +%\def\endmacos{\end{description}} +\def\windows{\versionspecific{Windows}} +\def\endwindows{\end{description}} + +\def\requirements{\trivlist \item[\hskip\labelsep {\bf Requirements.}]} +\def\endrequirements{\endtrivlist} +\def\installation{\trivlist \item[\hskip\labelsep {\bf Installation.}]} +\def\endinstallation{\endtrivlist} +\def\troubleshooting{\trivlist \item[\hskip\labelsep {\bf Troubleshooting.}]} +\def\endtroubleshooting{\endtrivlist} + +\newtheorem{gcrule}{Rule} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +\def\tableau#1#2#3{% +\begin{center} +\begin{tabular}{#1} +\hline +#2 & #3 \\ +\hline +} +\def\endtableau{\hline\end{tabular}\end{center}} +\def\entree#1#2{#1 & #2 \\} + +% L'environnement option + +\def\optionitem[#1]{\if@noparitem \@donoparitem + \else \if@inlabel \indent \par \fi + \ifhmode \unskip\unskip \par \fi + \if@newlist \if@nobreak \@nbitem \else + \addpenalty\@beginparpenalty + \addvspace\@topsep \addvspace{-\parskip}\fi + \else \addpenalty\@itempenalty \addvspace\itemsep + \fi + \global\@inlabeltrue +\fi +\everypar{\global\@minipagefalse\global\@newlistfalse + \if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels + \penalty\z@ \fi + \everypar{}}\global\@nobreakfalse +\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi +\setbox\@tempboxa\hbox{\makelabel{#1}}% +\global\setbox\@labels +\ifdim \wd\@tempboxa >\labelwidth + \hbox{\unhbox\@labels + \hskip -\leftmargin + \box\@tempboxa}\hfil\break + \else + \hbox{\unhbox\@labels + \hskip -\leftmargin + \hbox to\leftmargin {\makelabel{#1}\hfil}} + \fi + \ignorespaces} + +\def\optionlabel#1{\bf #1} +\def\options{\list{}{\let\makelabel\optionlabel\let\@item\optionitem}} +\def\endoptions{\endlist} + +% L'environnement library (pour composer les descriptions des modules +% de bibliotheque). + +\def\comment{\penalty200\list{}{}\item[]} +\def\endcomment{\endlist\penalty-100} + +\def\library{ +\begingroup +\raggedright +\let\@savedlistI=\@listI% +\def\@listI{\leftmargin\leftmargini\parsep 0pt plus 1pt\topsep 0pt plus 2pt}% +\itemsep 0pt +\topsep 0pt plus 2pt +\partopsep 0pt +} + +\def\endlibrary{ +\endgroup +} + +\def\restoreindent{\begingroup\let\@listI=\@savedlistI} +\def\endrestoreindent{\endgroup} + +% ^^A...^^A: compose l'interieur en \tt, comme \verb + +\catcode`\^^A=\active +\def{% +\begingroup\catcode``=13\@noligs\ttstretch\let\do\@makeother\dospecials% +\def\@xobeysp{\leavevmode\penalty100\ }% +\@vobeyspaces\frenchspacing\catcode`\^^A=\active\def{\endgroup}} + +% Pour l'index + +\let\indexentry=\index +\def\index{\indexentry{\jobname}} +\def\ikwd{\indexentry{\jobname.kwd}} + +% Les en-tetes personnalises + +\pagestyle{myheadings} +\def\partmark#1{\markboth{Part \thepart. \ #1}{}} +\def\chaptermark#1{\markright{Chapter \thechapter. \ #1}} + +% nth + +\def\th{^{\hbox{\scriptsize th}}} + +% Pour annuler l'espacement vertical qui suit un "verbatim" +\def\cancelverbatim{\vspace{-\topsep}\vspace{-\parskip}}% exact. + +% Pour annuler l'espacement vertical entre deux \item consecutifs dans \options +\def\cancelitemspace{\vspace{-8mm}}% determine empiriquement + +% Pour faire la cesure apres _ dans les identificateurs +\def\={\discretionary{}{}{}} +\def\cuthere{\discretionary{}{}{}} + +% Pour la coupure en petits documents + +\let\mysection=\section + +%%% Augmenter l'espace entre numero de section +% et nom de section dans la table des matieres. + +\ifplaintext\else +\def\l@section{\@dottedtocline{1}{1.5em}{2.8em}} % D'origine: 2.3 +\fi + +% Pour alltt + +\def\rminalltt#1{{\rm #1}} + +% redefinition de l'environnement alltt pour que les {} \ et % soient +% dans la bonne fonte + +\let\@oldalltt=\alltt +\let\@oldendalltt=\endalltt +\renewenvironment{alltt}{% +\begingroup% +\renewcommand{\{}{\char`\{}% +\renewcommand{\}}{\char`\}}% +\renewcommand{\\}{\char`\\}% +\renewcommand{\%}{\char`\%}% +\@oldalltt% +}{% +\@oldendalltt% +\endgroup% +} + +% Index stuff -- cf multind.sty + +\def\printindex#1#2{\@restonecoltrue\if@twocolumn\@restonecolfalse\fi + \columnseprule \z@ \columnsep 35pt + \newpage \phantomsection \twocolumn[{\Large\bf #2 \vskip4ex}] + \markright{\uppercase{#2}} + \addcontentsline{toc}{chapter}{#2} + \pdfsection{#2} + \@input{#1.ind}} + +% PDF stuff -- no longer needed, Hyperref does the job + +\def\pdfchapterfold#1#2{} +\def\pdfchapter#1{} +\def\pdfsection#1{} + +%\ifpdf +%\newcount\pdflabel +%\pdflabel=1 +%\def\pdfchapterfold#1#2{ +%\pdfdest num \pdflabel fit +%\pdfoutline goto num \pdflabel count #1 {\arabic{chapter}. #2} +%\global\advance\pdflabel by 1 +%} +%\def\pdfsection#1{ +%\pdfdest num \pdflabel fit +%\pdfoutline goto num \pdflabel {#1} +%\global\advance\pdflabel by 1 +%} +%\else +%\def\pdfchapterfold#1#2{} +%\def\pdfsection#1{} +%\fi +% +%\def\pdfchapter{\pdfchapterfold{0}} + +%%% Pour camlidl + +\def\transl#1{$[\![\mbox{#1}]\!]$} + +%%% Pour les references des modules +\newcommand{\moduleref}[1]{\ref{#1}} +%%% Fin des hacks + +\newenvironment{maintitle}{\begin{center}}{\end{center}} + +\makeatother diff --git a/manual/manual/manual.hva b/manual/manual/manual.hva new file mode 100644 index 00000000..942dde96 --- /dev/null +++ b/manual/manual/manual.hva @@ -0,0 +1,3 @@ +\input{book.hva} +\input{macros.hva} +\newif\ifouthtml\outhtmltrue diff --git a/manual/manual/manual.inf b/manual/manual/manual.inf new file mode 100644 index 00000000..65f64104 --- /dev/null +++ b/manual/manual/manual.inf @@ -0,0 +1,121 @@ +\input{book.hva} +\renewcommand{\@indexsection}[1]{\chapter{#1}} +\newcommand{\black}{\htmlcolor{#000000}} +\newcommand{\machine}{\tt} +\newenvironment{machineenv}{\begin{alltt}}{\end{alltt}} +\newenvironment{camlunder}{\@style{U}}{} +\newcommand{\caml}{\begin{alltt}\renewcommand{\\}{\char92}\def\<{\begin{camlunder}}\def\>{\end{camlunder}}\activebracefalse} +\newcommand{\endcaml}{\activebracetrue\end{alltt}} +\newcommand{\?}{\black\#\blue } +\renewcommand{\:}{\maroon} +\def\camlinput{} +\def\endcamlinput{} +\def\camloutput{} +\def\endcamloutput{} +\def\camlerror{} +\def\endcamlerror{} +\def\camlwarn{} +\def\endcamlwarn{} +\newcommand{\var}[1]{\textit{#1}} + +\newenvironment{library}{}{} +\newcounter{page} +\newenvironment{comment}{\begin{quote}}{\end{quote}} +\newcommand{\nth}[2]{\({#1}_{#2}\)} +\newenvironment{options}{\begin{description}}{\end{description}} + + +%%venant de macros.tex +\newif\ifouthtml\outhtmlfalse +\def\versionspecific#1{ +\quad\textsf{#1:} +\begin{quote}} + +\def\unix{\versionspecific{Unix}} +\def\endunix{\end{quote}} +\def\macos{\versionspecific{MacOS}} +\def\endmacos{\end{quote}} +\def\windows{\versionspecific{Windows}} +\def\endwindows{\end{quote}} + +\def\requirements{\trivlist \item[\hskip\labelsep {\bf Requirements.}]} +\def\endrequirements{\endtrivlist} +\def\installation{\trivlist \item[\hskip\labelsep {\bf Installation.}]} +\def\endinstallation{\endtrivlist} +\def\troubleshooting{\trivlist \item[\hskip\labelsep {\bf Troubleshooting.}]} +\def\endtroubleshooting{\endtrivlist} + +\newtheorem{gcrule}{Rule} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +%\def\entree#1#2{#1 & #2 \\} +%\def\tableau#1#2#3{% +%\par\begin{center}% +%\begin{tabular}{#1}% +%\multicolumn{1}{c}{\textbf{#2}} & +%\multicolumn{1}{c}{\textbf{#3}} \\ +%%#2 & #3 \\% +%}% +%\def\endtableau{\end{tabular}\end{center}\par} + +% Pour les tables de priorites et autres tableaux a deux colonnes, encadres + +\def\tableau#1#2#3{% +\begin{center} +\begin{tabular}{#1} +\hline +\multicolumn{1}{|c|}{\textbf{#2}} & \multicolumn{1}{c|}{\textbf{#3}} \\ +\hline +} +\def\endtableau{\hline\end{tabular}\end{center}} +\def\entree#1#2{#1 & #2 \\} + + + +% L'environnement library (pour composer les descriptions des modules +% de bibliotheque). + + +\def\restoreindent{\begingroup\let\@listI=\@savedlistI} +\def\endrestoreindent{\endgroup} + + +% PDF stuff + +\def\pdfchapterfold#1#2{} +\def\pdfsection#1{} +\def\pdfchapter{\pdfchapterfold{0}} + +%%% Pour camlidl + +\def\transl#1{$[\![\mbox{#1}]\!]$} + +% Pour l'index +\usepackage{multind} +\let\indexentry=\index +\renewcommand{\index}[1]{\indexentry{\jobname}{#1}} +\def\ikwd#1{\indexentry{\jobname.kwd}{#1}} + + +% nth +\def\th{^{\mbox{\scriptsize th}}} +\renewcommand{\hbox}[1]{\mbox{#1}} + +% Notations pour les metavariables +\def\nmth#1#2#3{\({#1}_{#2}^{#3}\)} +\def\optvar#1{[\var{#1}\/]} +\def\event{§§} +\def\fromoneto#1#2{$#1 = 1,\ldots{} , #2$} + +\newcommand{\vfill}{} +\def\number{} +\def\year{2013} + +% Pour alltt + +\def\rminalltt#1{{\rm #1}} + +\def\goodbreak{\ \\} + +\def\@savedlistI{} diff --git a/manual/manual/manual.info.header b/manual/manual/manual.info.header new file mode 100644 index 00000000..74665158 --- /dev/null +++ b/manual/manual/manual.info.header @@ -0,0 +1,4 @@ +INFO-DIR-SECTION OCaml Programming Language +START-INFO-DIR-ENTRY +* ocaml: (ocaml). OCaml Reference Manual +END-INFO-DIR-ENTRY diff --git a/manual/manual/manual.tex b/manual/manual/manual.tex new file mode 100644 index 00000000..c318ba76 --- /dev/null +++ b/manual/manual/manual.tex @@ -0,0 +1,42 @@ +\documentclass[11pt]{book} +\usepackage[latin1]{inputenc} +%HEVEA\@def@charset{US-ASCII}% +\usepackage{alltt} +\usepackage{fullpage} +\usepackage{syntaxdef} +\usepackage{multind} +\usepackage{html} +\usepackage{textcomp} +\usepackage{caml-sl} +\usepackage{ocamldoc} +\usepackage{xspace} +\newif\ifplaintext +\plaintextfalse +%\newif\ifpdf +%\pdffalse + +\input{macros.tex} + +% Add meta tag to the generated head tag +\ifouthtml +\let\oldmeta=\@meta +\renewcommand{\@meta}{ +\oldmeta +\begin{rawhtml} + +\end{rawhtml} +} +\fi + +\usepackage{hyperref} +%\makeatletter \def\@wrindex#1#2{\xdef \@indexfile{\csname #1@idxfile\endcsname}\@@wrindex#2||\\}\makeatother +\def\th{^{\hbox{\scriptsize th}}} + +\raggedbottom +\input{version.tex} +%HEVEA\tocnumber +%HEVEA\setcounter{cuttingdepth}{1} +%HEVEA\title{The OCaml system, release \ocamlversion} +\input{allfiles.tex} + + diff --git a/manual/manual/pdfmanual.tex b/manual/manual/pdfmanual.tex new file mode 100644 index 00000000..73264605 --- /dev/null +++ b/manual/manual/pdfmanual.tex @@ -0,0 +1,31 @@ +%\pdfoutput=1 +\pdfpagewidth=21cm +\pdfpageheight=11in +\pdfcompresslevel=7 + +\documentclass[11pt]{book} + +\usepackage[latin1]{inputenc} +\usepackage{alltt} +\usepackage{fullpage} +\usepackage{syntaxdef} +\usepackage{multind} +\usepackage{html} +\usepackage{textcomp} +\usepackage{caml-sl} +\usepackage{ocamldoc} +\usepackage{xspace} + +\newif\ifplaintext +\plaintextfalse +%\newif\ifpdf +%\pdftrue +\input macros.tex + +\usepackage[colorlinks,linkcolor=blue]{hyperref} +\def\th{^{\hbox{\scriptsize th}}} + +\raggedbottom +\input{version.tex} + +\input allfiles.tex diff --git a/manual/manual/plaintext.tex b/manual/manual/plaintext.tex new file mode 100644 index 00000000..86201b4b --- /dev/null +++ b/manual/manual/plaintext.tex @@ -0,0 +1,17 @@ +\documentclass[11pt]{report} + +\usepackage{plaintext} +\usepackage[latin1]{inputenc} +\usepackage{alltt} +\usepackage{fullpage} +\usepackage{syntaxdef} +\usepackage{multind} +\usepackage{html} +\usepackage{caml-sl} + +\newif\ifplaintext +\plaintexttrue +%\newif\ifpdf +%\pdffalse +\input macros.tex +\input allfiles.tex diff --git a/manual/manual/refman/.cvsignore b/manual/manual/refman/.cvsignore new file mode 100644 index 00000000..81ccbe71 --- /dev/null +++ b/manual/manual/refman/.cvsignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/refman/.gitignore b/manual/manual/refman/.gitignore new file mode 100644 index 00000000..81ccbe71 --- /dev/null +++ b/manual/manual/refman/.gitignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/refman/Makefile b/manual/manual/refman/Makefile new file mode 100644 index 00000000..61883ae5 --- /dev/null +++ b/manual/manual/refman/Makefile @@ -0,0 +1,43 @@ +FILES= refman.tex lex.tex names.tex values.tex const.tex types.tex \ + patterns.tex expr.tex typedecl.tex modtypes.tex modules.tex compunit.tex \ + exten.tex classes.tex + +TOPDIR=../../.. + +include $(TOPDIR)/Makefile.tools + +LD_PATH="$(TOPDIR)/otherlibs/str:$(TOPDIR)/otherlibs/unix" + +CAMLLATEX=$(SET_LD_PATH) $(OCAMLRUN) ../../tools/caml-tex2 \ + -caml "TERM=norepeat $(OCAML)" -n 80 -v false +TRANSF=$(SET_LD_PATH) $(OCAMLRUN) ../../tools/transf +TEXQUOTE=../../tools/texquote2 + +ALLFILES=$(FILES) + +etex-files: $(ALLFILES) +all: $(ALLFILES) + +clean: + rm -f $(ALLFILES) + +.SUFFIXES: +.SUFFIXES: .etex .tex + +exten.tex:exten.etex + @$(CAMLLATEX) -o $*.caml_tex_error.tex $*.etex \ + && mv $*.caml_tex_error.tex $*.gen.tex \ + && $(TRANSF) < $*.gen.tex > $*.transf_error.tex \ + && mv $*.transf_error.tex $*.gen.tex\ + && $(TEXQUOTE) < $*.gen.tex > $*.texquote_error.tex\ + && mv $*.texquote_error.tex $*.tex\ + || printf "Failure when generating %s\n" $*.tex +.etex.tex: + @$(TRANSF) < $*.etex > $*.transf_error.tex \ + && mv $*.transf_error.tex $*.gen.tex\ + && $(TEXQUOTE) < $*.gen.tex > $*.texquote_error.tex\ + && mv $*.texquote_error.tex $*.tex\ + || printf "Failure when generating %s\n" $*.tex + + +$(ALLFILES): ../../tools/transf $(TEXQUOTE) diff --git a/manual/manual/refman/classes.etex b/manual/manual/refman/classes.etex new file mode 100644 index 00000000..2480357c --- /dev/null +++ b/manual/manual/refman/classes.etex @@ -0,0 +1,530 @@ +\section{Classes} +\pdfsection{Classes} +%HEVEA\cutname{classes.html} +Classes are defined using a small language, similar to the module +language. + +\subsection{Class types} + +Class types are the class-level equivalent of type expressions: they +specify the general shape and type properties of classes. + +\ikwd{object\@\texttt{object}} +\ikwd{end\@\texttt{end}} +\ikwd{inherit\@\texttt{inherit}} +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} +\ikwd{virtual\@\texttt{virtual}|see{\texttt{val}, \texttt{method}, \texttt{class}}} +\ikwd{constraint\@\texttt{constraint}} + +\begin{syntax} +class-type: + [['?']label-name':'] typexpr '->' class-type + | class-body-type +; +class-body-type: + 'object' ['(' typexpr ')'] {class-field-spec} 'end' + | ['[' typexpr {',' typexpr} ']'] classtype-path + | 'let' 'open' module-path 'in' class-body-type +; +%\end{syntax} \begin{syntax} +class-field-spec: + 'inherit' class-body-type + | 'val' ['mutable'] ['virtual'] inst-var-name ':' typexpr + | 'val' 'virtual' 'mutable' inst-var-name ':' typexpr + | 'method' ['private'] ['virtual'] method-name ':' poly-typexpr + | 'method' 'virtual' 'private' method-name ':' poly-typexpr + | 'constraint' typexpr '=' typexpr +\end{syntax} +See also the following language extensions: +\hyperref[s:attributes]{attributes} and +\hyperref[s:extension-nodes]{extension nodes}. + +\subsubsection*{Simple class expressions} + +The expression @classtype-path@ is equivalent to the class type bound to +the name @classtype-path@. Similarly, the expression +@'[' typexpr_1 ',' \ldots typexpr_n ']' classtype-path@ is equivalent to +the parametric class type bound to the name @classtype-path@, in which +type parameters have been instantiated to respectively @typexpr_1@, +\ldots @typexpr_n@. + +\subsubsection*{Class function type} + +The class type expression @typexpr '->' class-type@ is the type of +class functions (functions from values to classes) that take as +argument a value of type @typexpr@ and return as result a class of +type @class-type@. + +\subsubsection*{Class body type} + +The class type expression +@'object' ['(' typexpr ')'] {class-field-spec} 'end'@ +is the type of a class body. It specifies its instance variables and +methods. In this type, @typexpr@ is matched against the self type, therefore +providing a name for the self type. + +A class body will match a class body type if it provides definitions +for all the components specified in the class body type, and these +definitions meet the type requirements given in the class body type. +Furthermore, all methods either virtual or public present in the class +body must also be present in the class body type (on the other hand, some +instance variables and concrete private methods may be omitted). A +virtual method will match a concrete method, which makes it possible +to forget its implementation. An immutable instance variable will match a +mutable instance variable. + +\subsubsection*{Local opens} + +Local opens are supported in class types since OCaml 4.06. + +\subsubsection*{Inheritance} + +\ikwd{inherit\@\texttt{inherit}} + +The inheritance construct @'inherit' class-body-type@ provides for inclusion of +methods and instance variables from other class types. +The instance variable and method types from @class-body-type@ are added +into the current class type. + +\subsubsection*{Instance variable specification} + +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} + +A specification of an instance variable is written +@'val' ['mutable'] ['virtual'] inst-var-name ':' typexpr@, where +@inst-var-name@ +is the name of the instance variable and @typexpr@ its expected type. +% +The flag @'mutable'@ indicates whether this instance variable can be +physically modified. +% +The flag @'virtual'@ indicates that this instance variable is not +initialized. It can be initialized later through inheritance. + +An instance variable specification will hide any previous +specification of an instance variable of the same name. + +\subsubsection*{Method specification} +\label{sec-methspec} + +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +The specification of a method is written +@'method' ['private'] method-name ':' poly-typexpr@, where +@method-name@ is the name of the method and @poly-typexpr@ its +expected type, possibly polymorphic. The flag @'private'@ indicates +that the method cannot be accessed from outside the object. + +The polymorphism may be left implicit in public method specifications: +any type variable which is not bound to a class parameter and does not +appear elsewhere inside the class specification will be assumed to be +universal, and made polymorphic in the resulting method type. +Writing an explicit polymorphic type will disable this behaviour. + +If several specifications are present for the same method, they +must have compatible types. +Any non-private specification of a method forces it to be public. + +\subsubsection*{Virtual method specification} + +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +A virtual method specification is written @'method' ['private'] +'virtual' method-name ':' poly-typexpr@, where @method-name@ is the +name of the method and @poly-typexpr@ its expected type. + +\subsubsection*{Constraints on type parameters} + +\ikwd{constraint\@\texttt{constraint}} + +The construct @'constraint' typexpr_1 '=' typexpr_2@ forces the two +type expressions to be equal. This is typically used to specify type +parameters: in this way, they can be bound to specific type +expressions. + +\subsection{Class expressions} + +Class expressions are the class-level equivalent of value expressions: +they evaluate to classes, thus providing implementations for the +specifications expressed in class types. + +\ikwd{object\@\texttt{object}} +\ikwd{end\@\texttt{end}} +\ikwd{fun\@\texttt{fun}} +\ikwd{let\@\texttt{let}} +\ikwd{and\@\texttt{and}} +\ikwd{inherit\@\texttt{inherit}} +\ikwd{as\@\texttt{as}} +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} +\ikwd{constraint\@\texttt{constraint}} +\ikwd{initializer\@\texttt{initializer}} + +\begin{syntax} +class-expr: + class-path + | '[' typexpr {',' typexpr} ']' class-path + | '(' class-expr ')' + | '(' class-expr ':' class-type ')' + | class-expr {{argument}} + | 'fun' {{parameter}} '->' class-expr + | 'let' ['rec'] let-binding {'and' let-binding} 'in' class-expr + | 'object' class-body 'end' + | 'let' 'open' module-path 'in' class-expr +; +%BEGIN LATEX +\end{syntax} \begin{syntax} +%END LATEX +class-field: + 'inherit' class-expr ['as' lowercase-ident] + | 'inherit!' class-expr ['as' lowercase-ident] + | 'val' ['mutable'] inst-var-name [':' typexpr] '=' expr + | 'val!' ['mutable'] inst-var-name [':' typexpr] '=' expr + | 'val' ['mutable'] 'virtual' inst-var-name ':' typexpr + | 'val' 'virtual' 'mutable' inst-var-name ':' typexpr + | 'method' ['private'] method-name {parameter} [':' typexpr] '=' expr + | 'method!' ['private'] method-name {parameter} [':' typexpr] '=' expr + | 'method' ['private'] method-name ':' poly-typexpr '=' expr + | 'method!' ['private'] method-name ':' poly-typexpr '=' expr + | 'method' ['private'] 'virtual' method-name ':' poly-typexpr + | 'method' 'virtual' 'private' method-name ':' poly-typexpr + | 'constraint' typexpr '=' typexpr + | 'initializer' expr +\end{syntax} +See also the following language extensions: +\hyperref[s:locally-abstract]{locally abstract types}, +\hyperref[s:attributes]{attributes} and +\hyperref[s:extension-nodes]{extension nodes}. + +\subsubsection*{Simple class expressions} + +The expression @class-path@ evaluates to the class bound to the name +@class-path@. Similarly, the expression +@'[' typexpr_1 ',' \ldots typexpr_n ']' class-path@ +evaluates to the parametric class bound to the name @class-path@, +in which type parameters have been instantiated respectively to +@typexpr_1@, \ldots @typexpr_n@. + +The expression @'(' class-expr ')'@ evaluates to the same module as +@class-expr@. + +The expression @'(' class-expr ':' class-type ')'@ checks that +@class-type@ matches the type of @class-expr@ (that is, that the +implementation @class-expr@ meets the type specification +@class-type@). The whole expression evaluates to the same class as +@class-expr@, except that all components not specified in +@class-type@ are hidden and can no longer be accessed. + +\subsubsection*{Class application} + +Class application is denoted by juxtaposition of (possibly labeled) +expressions. It denotes the class whose constructor is the first +expression applied to the given arguments. The arguments are +evaluated as for expression application, but the constructor itself will +only be evaluated when objects are created. In particular, side-effects +caused by the application of the constructor will only occur at object +creation time. + +\subsubsection*{Class function} + +The expression @'fun' [['?']label-name':']pattern '->' class-expr@ evaluates +to a function from values to classes. +When this function is applied to a value \var{v}, this value is +matched against the pattern @pattern@ and the result is the result of +the evaluation of @class-expr@ in the extended environment. + +Conversion from functions with default values to functions with +patterns only works identically for class functions as for normal +functions. + +The expression +\begin{center} +@"fun" parameter_1 \ldots parameter_n "->" class-expr@ +\end{center} +is a short form for +\begin{center} +@"fun" parameter_1 "->" \ldots "fun" parameter_n "->" expr@ +\end{center} + +\subsubsection*{Local definitions} + +The {\tt let} and {\tt let rec} constructs bind value names locally, +as for the core language expressions. + +If a local definition occurs at the very beginning of a class +definition, it will be evaluated when the class is created (just as if +the definition was outside of the class). +Otherwise, it will be evaluated when the object constructor is called. + +\subsubsection*{Local opens} + +Local opens are supported in class expressions since OCaml 4.06. + +\subsubsection*{Class\label{ss:class-body} body} +\begin{syntax} +class-body: ['(' pattern [':' typexpr] ')'] { class-field } +\end{syntax} +The expression +@'object' class-body 'end'@ denotes +a class body. This is the prototype for an object : it lists the +instance variables and methods of an objet of this class. + +A class body is a class value: it is not evaluated at once. Rather, +its components are evaluated each time an object is created. + +In a class body, the pattern @'(' pattern [':' typexpr] ')'@ is +matched against self, therefore providing a binding for self and self +type. Self can only be used in method and initializers. + +Self type cannot be a closed object type, so that the class remains +extensible. + +Since OCaml 4.01, it is an error if the same method or instance +variable name is defined several times in the same class body. + +\subsubsection*{Inheritance} + +\ikwd{inherit\@\texttt{inherit}} + +The inheritance construct @'inherit' class-expr@ allows reusing +methods and instance variables from other classes. The class +expression @class-expr@ must evaluate to a class body. The instance +variables, methods and initializers from this class body are added +into the current class. The addition of a method will override any +previously defined method of the same name. + +\ikwd{as\@\texttt{as}} +An ancestor can be bound by appending @'as' lowercase-ident@ +to the inheritance construct. @lowercase-ident@ is not a true +variable and can only be used to select a method, i.e. in an expression +@lowercase-ident '#' method-name@. This gives access to the +method @method-name@ as it was defined in the parent class even if it is +redefined in the current class. +The scope of this ancestor binding is limited to the current class. +The ancestor method may be called from a subclass but only indirectly. + +\subsubsection*{Instance variable definition} + +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} + +The definition @'val' ['mutable'] inst-var-name '=' expr@ adds an +instance variable @inst-var-name@ whose initial value is the value of +expression @expr@. +% +The flag @'mutable'@ allows physical modification of this variable by +methods. + +An instance variable can only be used in the methods and +initializers that follow its definition. + +Since version 3.10, redefinitions of a visible instance variable with +the same name do not create a new variable, but are merged, using the +last value for initialization. They must have identical types and +mutability. +However, if an instance variable is hidden by +omitting it from an interface, it will be kept distinct from +other instance variables with the same name. + +\subsubsection*{Virtual instance variable definition} + +\ikwd{val\@\texttt{val}} +\ikwd{mutable\@\texttt{mutable}} + +A variable specification is written @'val' ['mutable'] 'virtual' +inst-var-name ':' typexpr@. It specifies whether the variable is +modifiable, and gives its type. + +Virtual instance variables were added in version 3.10. + +\subsubsection*{Method definition} + +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +A method definition is written @'method' method-name '=' expr@. The +definition of a method overrides any previous definition of this +method. The method will be public (that is, not private) if any of +the definition states so. + +A private method, @'method' 'private' method-name '=' expr@, is a +method that can only be invoked on self (from other methods of the +same object, defined in this class or one of its subclasses). This +invocation is performed using the expression +@value-name '#' method-name@, where @value-name@ is directly bound to +self at the beginning of the class definition. Private methods do +not appear in object types. A method may have both public and private +definitions, but as soon as there is a public one, all subsequent +definitions will be made public. + +Methods may have an explicitly polymorphic type, allowing them to be +used polymorphically in programs (even for the same object). The +explicit declaration may be done in one of three ways: (1) by giving an +explicit polymorphic type in the method definition, immediately after +the method name, {\em i.e.} +@'method' ['private'] method-name ':' {{"'" ident}} '.' typexpr '=' +expr@; (2) by a forward declaration of the explicit polymorphic type +through a virtual method definition; (3) by importing such a +declaration through inheritance and/or constraining the type of {\em +self}. + +Some special expressions are available in method bodies for +manipulating instance variables and duplicating self: +\begin{syntax} +expr: + \ldots + | inst-var-name '<-' expr + | '{<' [ inst-var-name '=' expr { ';' inst-var-name '=' expr } [';'] ] '>}' +\end{syntax} + +The expression @inst-var-name '<-' expr@ modifies in-place the current +object by replacing the value associated to @inst-var-name@ by the +value of @expr@. Of course, this instance variable must have been +declared mutable. + +The expression +@'{<' inst-var-name_1 '=' expr_1 ';' \ldots ';' inst-var-name_n '=' expr_n '>}'@ +evaluates to a copy of the current object in which the values of +instance variables @inst-var-name_1, \ldots, inst-var-name_n@ have +been replaced by the values of the corresponding expressions @expr_1, +\ldots, expr_n@. + +\subsubsection*{Virtual method definition} + +\ikwd{method\@\texttt{method}} +\ikwd{private\@\texttt{private}} + +A method specification is written @'method' ['private'] 'virtual' +method-name ':' poly-typexpr@. It specifies whether the method is +public or private, and gives its type. If the method is intended to be +polymorphic, the type must be explicitly polymorphic. + +\subsubsection*{Explicit overriding} + +Since Ocaml 3.12, the keywords @"inherit!"@, @"val!"@ and @"method!"@ +have the same semantics as @"inherit"@, @"val"@ and @"method"@, but +they additionally require the definition they introduce to be +overriding. Namely, @"method!"@ requires @method-name@ to be already +defined in this class, @"val!"@ requires @inst-var-name@ to be already +defined in this class, and @"inherit!"@ requires @class-expr@ to +override some definitions. If no such overriding occurs, an error is +signaled. + +As a side-effect, these 3 keywords avoid the warnings~7 +(method override) and~13 (instance variable override). +Note that warning~7 is disabled by default. + +\subsubsection*{Constraints on type parameters} + +\ikwd{constraint\@\texttt{constraint}} +The construct @'constraint' typexpr_1 '=' typexpr_2@ forces the two +type expressions to be equals. This is typically used to specify type +parameters: in that way they can be bound to specific type +expressions. + +\subsubsection*{Initializers} + +\ikwd{initializer\@\texttt{initializer}} + +A class initializer @'initializer' expr@ specifies an expression that +will be evaluated whenever an object is created from the class, once +all its instance variables have been initialized. + +\subsection{Class definitions} +\label{s:classdef} + +\ikwd{class\@\texttt{class}} +\ikwd{and\@\texttt{and}} + +\begin{syntax} +class-definition: + 'class' class-binding { 'and' class-binding } +; +class-binding: + ['virtual'] ['[' type-parameters ']'] class-name + {parameter} [':' class-type] \\ '=' class-expr +; +type-parameters: + "'" ident { "," "'" ident } +\end{syntax} + +A class definition @'class' class-binding { 'and' class-binding }@ is +recursive. Each @class-binding@ defines a @class-name@ that can be +used in the whole expression except for inheritance. It can also be +used for inheritance, but only in the definitions that follow its own. + +A class binding binds the class name @class-name@ to the value of +expression @class-expr@. It also binds the class type @class-name@ to +the type of the class, and defines two type abbreviations : +@class-name@ and @'#' class-name@. The first one is the type of +objects of this class, while the second is more general as it unifies +with the type of any object belonging to a subclass (see +section~\ref{s:sharp-types}). + +\subsubsection*{Virtual class} + +A class must be flagged virtual if one of its methods is virtual (that +is, appears in the class type, but is not actually defined). +Objects cannot be created from a virtual class. + +\subsubsection*{Type parameters} + +The class type parameters correspond to the ones of the class type and +of the two type abbreviations defined by the class binding. They must +be bound to actual types in the class definition using type +constraints. So that the abbreviations are well-formed, type +variables of the inferred type of the class must either be type +parameters or be bound in the constraint clause. + +\subsection{Class specifications} +\label{s:class-spec} + +\ikwd{class\@\texttt{class}} +\ikwd{and\@\texttt{and}} + +\begin{syntax} +class-specification: + 'class' class-spec { 'and' class-spec } +; +class-spec: + ['virtual'] ['[' type-parameters ']'] class-name ':' + class-type +\end{syntax} + +This is the counterpart in signatures of class definitions. +A class specification matches a class definition if they have the same +type parameters and their types match. + +\subsection{Class type definitions} +\label{s:classtype} + +\ikwd{class\@\texttt{class}} +\ikwd{type\@\texttt{type}} +\ikwd{and\@\texttt{and}} + +\begin{syntax} +classtype-definition: + 'class' 'type' classtype-def + { 'and' classtype-def } +; +classtype-def: + ['virtual'] ['[' type-parameters ']'] class-name '=' class-body-type +\end{syntax} + +A class type definition @'class' class-name '=' class-body-type@ +defines an abbreviation @class-name@ for the class body type +@class-body-type@. As for class definitions, two type abbreviations +@class-name@ and @'#' class-name@ are also defined. The definition can +be parameterized by some type parameters. If any method in the class +type body is virtual, the definition must be flagged @'virtual'@. + +Two class type definitions match if they have the same type parameters +and they expand to matching types. diff --git a/manual/manual/refman/compunit.etex b/manual/manual/refman/compunit.etex new file mode 100644 index 00000000..6c8c09f7 --- /dev/null +++ b/manual/manual/refman/compunit.etex @@ -0,0 +1,42 @@ +\section{Compilation units} +\pdfsection{Compilation units} +%HEVEA\cutname{compunit.html} + +\begin{syntax} +unit-interface: { specification [';;'] } +; +unit-implementation: [ module-items ] +\end{syntax} + +Compilation units bridge the module system and the separate +compilation system. A compilation unit is composed of two parts: an +interface and an implementation. The interface contains a sequence of +specifications, just as the inside of a @'sig' \ldots 'end'@ +signature expression. The implementation contains a sequence of +definitions and expressions, just as the inside of a +@'struct' \ldots 'end'@ module +expression. A compilation unit also has a name @unit-name@, derived +from the names of the files containing the interface and the +implementation (see chapter~\ref{c:camlc} for more details). A +compilation unit behaves roughly as the module definition +\begin{center} +@'module' unit-name ':' 'sig' unit-interface 'end' '=' + 'struct' unit-implementation 'end'@ +\end{center} + +A compilation unit can refer to other compilation units by their +names, as if they were regular modules. For instance, if "U" is a +compilation unit that defines a type "t", other compilation units can +refer to that type under the name "U.t"; they can also refer to "U" as +a whole structure. Except for names of other compilation units, a unit +interface or unit implementation must not have any other free variables. +In other terms, the type-checking and compilation of an interface or +implementation proceeds in the initial environment +\begin{center} +@name_1 ':' 'sig' specification_1 'end' \ldots + name_n ':' 'sig' specification_n 'end'@ +\end{center} +where @name_1 \ldots name_n@ are the names of the other +compilation units available in the search path (see +chapter~\ref{c:camlc} for more details) and @specification_1 \ldots +specification_n@ are their respective interfaces. diff --git a/manual/manual/refman/const.etex b/manual/manual/refman/const.etex new file mode 100644 index 00000000..c503668d --- /dev/null +++ b/manual/manual/refman/const.etex @@ -0,0 +1,35 @@ +\section{Constants} +\pdfsection{Constants} +%HEVEA\cutname{const.html} + +\ikwd{false\@\texttt{false}} +\ikwd{true\@\texttt{true}} +\ikwd{begin\@\texttt{begin}} +\ikwd{end\@\texttt{end}} + +\begin{syntax} +constant: + integer-literal + | float-literal + | char-literal + | string-literal + | constr + | "false" + | "true" + | "("")" + | "begin" "end" + | "[""]" + | "[|""|]" + | "`"tag-name +\end{syntax} +See also the following language extensions: +\hyperref[s:ext-integer]{integer literals for types \texttt{int32}, \texttt{int64} +and \texttt{nativeint}}, \hyperref[s:quoted-strings]{quoted strings} +and \hyperref[s:extension-literals]{extension literals}. + +The syntactic class of constants comprises literals from the four +base types (integers, floating-point numbers, characters, character +strings), and constant constructors from both normal and polymorphic +variants, as well as the special constants @"false"@, @"true"@, @"("")"@, +@"[""]"@, and @"[|""|]"@, which behave like constant constructors, and +@"begin" "end"@, which is equivalent to @'('')'@. diff --git a/manual/manual/refman/expr.etex b/manual/manual/refman/expr.etex new file mode 100644 index 00000000..6e47c03a --- /dev/null +++ b/manual/manual/refman/expr.etex @@ -0,0 +1,998 @@ +\section{Expressions\label{s:value-expr}} +\pdfsection{Expressions} +%HEVEA\cutname{expr.html} +\ikwd{in\@\texttt{in}|see{\texttt{let}}} +\ikwd{and\@\texttt{and}} +\ikwd{rec\@\texttt{rec}|see{\texttt{let}, \texttt{module}}} +\ikwd{let\@\texttt{let}} +\ikwd{try\@\texttt{try}} +\ikwd{function\@\texttt{function}} +\ikwd{fun\@\texttt{fun}} +\ikwd{with\@\texttt{with}} +\ikwd{done\@\texttt{done}|see{\texttt{while}, \texttt{for}}} +\ikwd{do\@\texttt{do}|see{\texttt{while}, \texttt{for}}} +\ikwd{downto\@\texttt{downto}|see{\texttt{for}}} +\ikwd{to\@\texttt{to}|see{\texttt{for}}} +\ikwd{for\@\texttt{for}} +\ikwd{else\@\texttt{else}|see{\texttt{if}}} +\ikwd{then\@\texttt{then}|see{\texttt{if}}} +\ikwd{if\@\texttt{if}} +\ikwd{or\@\texttt{or}} +\ikwd{match\@\texttt{match}} +\ikwd{begin\@\texttt{begin}} +\ikwd{end\@\texttt{end}} +\ikwd{when\@\texttt{when}} +\ikwd{new\@\texttt{new}} +\ikwd{object\@\texttt{object}} +\ikwd{lazy\@\texttt{lazy}} + +\begin{syntax} +expr: + value-path + | constant + | '(' expr ')' + | 'begin' expr 'end' + | '(' expr ':' typexpr ')' + | expr {{',' expr}} + | constr expr + | "`"tag-name expr + | expr '::' expr + | '[' expr { ';' expr } [';'] ']' + | '[|' expr { ';' expr } [';'] '|]' + | '{' field [':' typexpr] ['=' expr]% + { ';' field [':' typexpr] ['=' expr] } [';'] '}' + | '{' expr 'with' field [':' typexpr] ['=' expr]% + { ';' field [':' typexpr] ['=' expr] } [';'] '}' + | expr {{ argument }} + | prefix-symbol expr + | '-' expr + | '-.' expr + | expr infix-op expr + | expr '.' field + | expr '.' field '<-' expr + | expr '.(' expr ')' + | expr '.(' expr ')' '<-' expr + | expr '.[' expr ']' + | expr '.[' expr ']' '<-' expr + | 'if' expr 'then' expr [ 'else' expr ] + | 'while' expr 'do' expr 'done' + | 'for' value-name '=' expr ( 'to' || 'downto' ) expr 'do' expr 'done' + | expr ';' expr + | 'match' expr 'with' pattern-matching + | 'function' pattern-matching + | 'fun' {{ parameter }} [ ':' typexpr ] '->' expr + | 'try' expr 'with' pattern-matching + | 'let' ['rec'] let-binding { 'and' let-binding } 'in' expr + | 'new' class-path + | 'object' class-body 'end' + | expr '#' method-name + | inst-var-name + | inst-var-name '<-' expr + | '(' expr ':>' typexpr ')' + | '(' expr ':' typexpr ':>' typexpr ')' + | '{<' [ inst-var-name '=' expr { ';' inst-var-name '=' expr } [';'] ] '>}' + | 'assert' expr + | 'lazy' expr + | 'let' 'module' module-name { '(' module-name ':' module-type ')' } + [ ':' module-type ] \\ '=' module-expr 'in' expr + | "let" "open" module-path "in" expr + | module-path '.(' expr ')' + | module-path '.[' expr ']' + | module-path '.[|' expr '|]' + | module-path '.{' expr '}' + | module-path '.{<' expr '>}' +; +%BEGIN LATEX +\end{syntax} \begin{syntax} +%END LATEX +argument: + expr + | '~' label-name + | '~' label-name ':' expr + | '?' label-name + | '?' label-name ':' expr +; +%\end{syntax} \begin{syntax} +pattern-matching: + [ '|' ] pattern ['when' expr] '->' expr + { '|' pattern ['when' expr] '->' expr } +; +let-binding: + pattern '=' expr + | value-name { parameter } [':' typexpr] [':>' typexpr] '=' expr + | value-name ':' poly-typexpr '=' expr %since 3.12 +; +parameter: + pattern + | '~' label-name + | '~' '(' label-name [':' typexpr] ')' + | '~' label-name ':' pattern + | '?' label-name + | '?' '(' label-name [':' typexpr] ['=' expr] ')' + | '?' label-name ':' pattern + | '?' label-name ':' '(' pattern [':' typexpr] ['=' expr] ')' +\end{syntax} +See also the following language extensions: +\hyperref[s:object-notations]{object notations}, +\hyperref[s-first-class-modules]{first-class modules}, +\hyperref[s:explicit-overriding-open]{overriding in open statements}, +\hyperref[s:bigarray-access]{syntax for Bigarray access}, +\hyperref[s:attributes]{attributes}, +\hyperref[s:extension-nodes]{extension nodes}, +\hyperref[s:local-exceptions]{local exceptions} +\hyperref[s:index-operators]{extended indexing operators}. + +The table below shows the relative precedences and associativity of +operators and non-closed constructions. The constructions with higher +precedence come first. For infix and prefix symbols, we write +``"*"\ldots'' to mean ``any symbol starting with "*"''. +\ikwd{or\@\texttt{or}}% +\ikwd{if\@\texttt{if}}% +\ikwd{fun\@\texttt{fun}}% +\ikwd{function\@\texttt{function}}% +\ikwd{match\@\texttt{match}}% +\ikwd{try\@\texttt{try}}% +\ikwd{let\@\texttt{let}}% +\ikwd{mod\@\texttt{mod}} +\ikwd{land\@\texttt{land}} +\ikwd{lor\@\texttt{lor}} +\ikwd{lxor\@\texttt{lxor}} +\ikwd{lsl\@\texttt{lsl}} +\ikwd{lsr\@\texttt{lsr}} +\ikwd{asr\@\texttt{asr}} +\begin{tableau}{|l|l|}{Construction or operator}{Associativity} +\entree{prefix-symbol}{--} +\entree{". .( .[ .{" (see section~\ref{s:bigarray-access})}{--} +\entree{"#"\ldots}{--} +\entree{function application, constructor application, tag + application, "assert", + "lazy"}{left} +\entree{"- -." (prefix)}{--} +\entree{"**"\ldots" lsl lsr asr"}{right} +\entree{"*"\ldots" /"\ldots" %"\ldots" mod land lor lxor"}{left} + %% "`"@ident@"`" +\entree{"+"\ldots" -"\ldots}{left} +\entree{"::"}{right} +\entree{{\tt \char64}\ldots " ^"\ldots}{right} +\entree{"="\ldots" <"\ldots" >"\ldots" |"\ldots" &"\ldots" $"\ldots" !="}{left} +\entree{"& &&"}{right} +\entree{"or ||"}{right} +\entree{","}{--} +\entree{"<- :="}{right} +\entree{"if"}{--} +\entree{";"}{right} +\entree{"let match fun function try"}{--} +\end{tableau} + +\subsection{Basic expressions} + +\subsubsection*{Constants} + +An expression consisting in a constant evaluates to this constant. + +\subsubsection*{Value paths} \label{expr:var} + +An expression consisting in an access path evaluates to the value bound to +this path in the current evaluation environment. The path can +be either a value name or an access path to a value component of a module. + +\subsubsection*{Parenthesized expressions} +\ikwd{begin\@\texttt{begin}} +\ikwd{end\@\texttt{end}} + +The expressions @'(' expr ')'@ and @'begin' expr 'end'@ have the same +value as @expr@. The two constructs are semantically equivalent, but it +is good style to use @'begin' \ldots 'end'@ inside control structures: +\begin{alltt} + if \ldots then begin \ldots ; \ldots end else begin \ldots ; \ldots end +\end{alltt} +and @'(' \ldots ')'@ for the other grouping situations. + +Parenthesized expressions can contain a type constraint, as in @'(' +expr ':' typexpr ')'@. This constraint forces the type of @expr@ to be +compatible with @typexpr@. + +Parenthesized expressions can also contain coercions +@'(' expr [':' typexpr] ':>' typexpr')'@ (see +subsection~\ref{s:coercions} below). + + +\subsubsection*{Function application} + +Function application is denoted by juxtaposition of (possibly labeled) +expressions. The expression @expr argument_1 \ldots argument_n@ +evaluates the expression @expr@ and those appearing in @argument_1@ +to @argument_n@. The expression @expr@ must evaluate to a +functional value $f$, which is then applied to the values of +@argument_1, \ldots, argument_n@. + +The order in which the expressions @expr, argument_1, \ldots, +argument_n@ are evaluated is not specified. + +Arguments and parameters are matched according to their respective +labels. Argument order is irrelevant, except among arguments with the +same label, or no label. + +If a parameter is specified as optional (label prefixed by @"?"@) in the +type of @expr@, the corresponding argument will be automatically +wrapped with the constructor "Some", except if the argument itself is +also prefixed by @"?"@, in which case it is passed as is. +% +If a non-labeled argument is passed, and its corresponding parameter +is preceded by one or several optional parameters, then these +parameters are {\em defaulted}, {\em i.e.} the value "None" will be +passed for them. +% +All other missing parameters (without corresponding argument), both +optional and non-optional, will be kept, and the result of the +function will still be a function of these missing parameters to the +body of $f$. + +As a special case, if the function has a known arity, all the +arguments are unlabeled, and their number matches the number of +non-optional parameters, then labels are ignored and non-optional +parameters are matched in their definition order. Optional arguments +are defaulted. + +In all cases but exact match of order and labels, without optional +parameters, the function type should be known at the application +point. This can be ensured by adding a type constraint. Principality +of the derivation can be checked in the "-principal" mode. + +\subsubsection*{Function definition} + +Two syntactic forms are provided to define functions. The first form +is introduced by the keyword "function": +\ikwd{function\@\texttt{function}} + +$$\begin{array}{rlll} +\token{function} & \textsl{pattern}_1 & \token{->} & \textsl{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \textsl{pattern}_n & \token{->} & \textsl{expr}_n +\end{array}$$ +This expression evaluates to a functional value with one argument. +When this function is applied to a value \var{v}, this value is +matched against each pattern @pattern_1@ to @pattern_n@. +If one of these matchings succeeds, that is, if the value \var{v} +matches the pattern @pattern_i@ for some \var{i}, +then the expression @expr_i@ associated to the selected pattern +is evaluated, and its value becomes the value of the function +application. The evaluation of @expr_i@ takes place in an +environment enriched by the bindings performed during the matching. + +If several patterns match the argument \var{v}, the one that occurs +first in the function definition is selected. If none of the patterns +matches the argument, the exception "Match_failure" is raised. +% +\index{Matchfailure\@\verb`Match_failure`} + +\medskip + +The other form of function definition is introduced by the keyword "fun": +\ikwd{fun\@\texttt{fun}} +\begin{center} +@"fun" parameter_1 \ldots parameter_n "->" expr@ +\end{center} +This expression is equivalent to: +\begin{center} +@"fun" parameter_1 "->" \ldots "fun" parameter_n "->" expr@ +\end{center} + +An optional type constraint @typexpr@ can be added before "->" to enforce +the type of the result to be compatible with the constraint @typexpr@: +\begin{center} +@"fun" parameter_1 \ldots parameter_n ":" typexpr "->" expr@ +\end{center} +is equivalent to +\begin{center} + @"fun" parameter_1 "->" \ldots "fun" parameter_n "->" % + (expr ":" typexpr )@ +\end{center} +Beware of the small syntactic difference between a type constraint on +the last parameter +\begin{center} + @"fun" parameter_1 \ldots (parameter_n":"typexpr)"->" expr @ +\end{center} +and one on the result +\begin{center} + @"fun" parameter_1 \ldots parameter_n":" typexpr "->" expr @ +\end{center} + +The parameter patterns @"~"lab@ and @"~("lab [":" typ]")"@ +are shorthands for respectively @"~"lab":"lab@ and +@"~"lab":("lab [":" typ]")"@, and similarly for their optional +counterparts. + +A function of the form @"fun" "?" lab ":(" pattern '=' expr_0 ')' '->' +expr@ is equivalent to +\begin{center} +@"fun" "?" lab ":" ident '->' + "let" pattern '=' + "match" ident "with" "Some" ident "->" ident '|' "None" '->' expr_0 + "in" expr@ +\end{center} +where @ident@ +is a fresh variable, except that it is unspecified when @expr_0@ is evaluated. + +After these two transformations, expressions are of the form +\begin{center} +@"fun" [label_1] pattern_1 "->" \ldots "fun" [label_n] pattern_n "->" expr@ +\end{center} +If we ignore labels, which will only be meaningful at function +application, this is equivalent to +\begin{center} +@"function" pattern_1 "->" \ldots "function" pattern_n "->" expr@ +\end{center} +That is, the @"fun"@ expression above evaluates to a curried function +with \var{n} arguments: after applying this function $n$ times to the +values @@v@_1 \ldots @v@_n@, the values will be matched +in parallel against the patterns @pattern_1 \ldots pattern_n@. +If the matching succeeds, the function returns the value of @expr@ in +an environment enriched by the bindings performed during the matchings. +If the matching fails, the exception "Match_failure" is raised. + +\subsubsection*{Guards in pattern-matchings} + +\ikwd{when\@\texttt{when}} +The cases of a pattern matching (in the @"function"@, @"match"@ and +@"try"@ constructs) can include guard expressions, which are +arbitrary boolean expressions that must evaluate to "true" for the +match case to be selected. Guards occur just before the @"->"@ token and +are introduced by the @"when"@ keyword: + +$$\begin{array}{rlll} +\token{function} & \nt{pattern}_1 \; [\token{when} \; \nt{cond}_1] & \token{->} & \nt{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \nt{pattern}_n \; [\token{when} \; \nt{cond}_n] & \token{->} & \nt{expr}_n +\end{array}$$ + + +Matching proceeds as described before, except that if the value +matches some pattern @pattern_i@ which has a guard @@cond@_i@, then the +expression @@cond@_i@ is evaluated (in an environment enriched by the +bindings performed during matching). If @@cond@_i@ evaluates to "true", +then @expr_i@ is evaluated and its value returned as the result of the +matching, as usual. But if @@cond@_i@ evaluates to "false", the matching +is resumed against the patterns following @pattern_i@. + +\subsubsection*{Local definitions} \label{s:localdef} + +\ikwd{let\@\texttt{let}} + +The @"let"@ and @"let" "rec"@ constructs bind value names locally. +The construct +\begin{center} +@"let" pattern_1 "=" expr_1 "and" \ldots "and" pattern_n "=" expr_n "in" expr@ +\end{center} +evaluates @expr_1 \ldots expr_n@ in some unspecified order and matches +their values against the patterns @pattern_1 \ldots pattern_n@. If the +matchings succeed, @expr@ is evaluated in the environment enriched by +the bindings performed during matching, and the value of @expr@ is +returned as the value of the whole @"let"@ expression. If one of the +matchings fails, the exception "Match_failure" is raised. +% +\index{Matchfailure\@\verb`Match_failure`} + +An alternate syntax is provided to bind variables to functional +values: instead of writing +\begin{center} +@"let" ident "=" "fun" parameter_1 \ldots parameter_m "->" expr@ +\end{center} +in a @"let"@ expression, one may instead write +\begin{center} +@"let" ident parameter_1 \ldots parameter_m "=" expr@ +\end{center} + +\medskip +\noindent +Recursive definitions of names are introduced by @"let" "rec"@: +\begin{center} +@"let" "rec" pattern_1 "=" expr_1 "and" \ldots "and" pattern_n "=" expr_n + "in" expr@ +\end{center} +The only difference with the @"let"@ construct described above is +that the bindings of names to values performed by the +pattern-matching are considered already performed when the expressions +@expr_1@ to @expr_n@ are evaluated. That is, the expressions @expr_1@ +to @expr_n@ can reference identifiers that are bound by one of the +patterns @pattern_1, \ldots, pattern_n@, and expect them to have the +same value as in @expr@, the body of the @"let" "rec"@ construct. + +The recursive definition is guaranteed to behave as described above if +the expressions @expr_1@ to @expr_n@ are function definitions +(@"fun" \ldots@ or @"function" \ldots@), and the patterns @pattern_1 +\ldots pattern_n@ are just value names, as in: +\begin{center} +@"let" "rec" name_1 "=" "fun" \ldots +"and" \ldots +"and" name_n "=" "fun" \ldots +"in" expr@ +\end{center} +This defines @name_1 \ldots name_n@ as mutually recursive functions +local to @expr@. + +The behavior of other forms of @"let" "rec"@ definitions is +implementation-dependent. The current implementation also supports +a certain class of recursive definitions of non-functional values, +as explained in section~\ref{s:letrecvalues}. +\subsubsection{Explicit polymorphic type annotations} +(Introduced in OCaml 3.12) + +Polymorphic type annotations in @"let"@-definitions behave in a way +similar to polymorphic methods: + +\begin{center} +@"let" pattern_1 ":" typ_1 \ldots typ_n "." typeexpr "=" expr @ +\end{center} + +These annotations explicitly require the defined value to be polymorphic, +and allow one to use this polymorphism in recursive occurrences +(when using @"let" "rec"@). Note however that this is a normal polymorphic +type, unifiable with any instance of itself. + +\subsection{Control structures} + +\subsubsection*{Sequence} + +The expression @expr_1 ";" expr_2@ evaluates @expr_1@ first, then +@expr_2@, and returns the value of @expr_2@. + +\subsubsection*{Conditional} +\ikwd{if\@\texttt{if}} + +The expression @"if" expr_1 "then" expr_2 "else" expr_3@ evaluates to +the value of @expr_2@ if @expr_1@ evaluates to the boolean @"true"@, +and to the value of @expr_3@ if @expr_1@ evaluates to the boolean +@"false"@. + +The @"else" expr_3@ part can be omitted, in which case it defaults to +@"else" "()"@. + +\subsubsection*{Case expression}\ikwd{match\@\texttt{match}} + +The expression +$$\begin{array}{rlll} +\token{match} & \textsl{expr} \\ +\token{with} & \textsl{pattern}_1 & \token{->} & \textsl{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \textsl{pattern}_n & \token{->} & \textsl{expr}_n +\end{array}$$ +matches the value of @expr@ against the patterns @pattern_1@ to +@pattern_n@. If the matching against @pattern_i@ succeeds, the +associated expression @expr_i@ is evaluated, and its value becomes the +value of the whole @'match'@ expression. The evaluation of +@expr_i@ takes place in an environment enriched by the bindings +performed during matching. If several patterns match the value of +@expr@, the one that occurs first in the @'match'@ expression is +selected. If none of the patterns match the value of @expr@, the +exception "Match_failure" is raised. +% +\index{Matchfailure\@\verb`Match_failure`} + +\subsubsection*{Boolean operators} + +The expression @expr_1 '&&' expr_2@ evaluates to @'true'@ if both +@expr_1@ and @expr_2@ evaluate to @'true'@; otherwise, it evaluates to +@'false'@. The first component, @expr_1@, is evaluated first. The +second component, @expr_2@, is not evaluated if the first component +evaluates to @'false'@. Hence, the expression @expr_1 '&&' expr_2@ behaves +exactly as +\begin{center} +@'if' expr_1 'then' expr_2 'else' 'false'@. +\end{center} + +The expression @expr_1 '||' expr_2@ evaluates to @'true'@ if one of +the expressions +@expr_1@ and @expr_2@ evaluates to @'true'@; otherwise, it evaluates to +@'false'@. The first component, @expr_1@, is evaluated first. The +second component, @expr_2@, is not evaluated if the first component +evaluates to @'true'@. Hence, the expression @expr_1 '||' expr_2@ behaves +exactly as +\begin{center} +@'if' expr_1 'then' 'true' 'else' expr_2@. +\end{center} + +\ikwd{or\@\texttt{or}} +The boolean operators @'&'@ and @'or'@ are deprecated synonyms for +(respectively) @'&&'@ and @'||'@. + +\subsubsection*{Loops} + +\ikwd{while\@\texttt{while}} +The expression @'while' expr_1 'do' expr_2 'done'@ repeatedly +evaluates @expr_2@ while @expr_1@ evaluates to @'true'@. The loop +condition @expr_1@ is evaluated and tested at the beginning of each +iteration. The whole @'while' \ldots 'done'@ expression evaluates to +the unit value @'()'@. + +\ikwd{for\@\texttt{for}} +The expression @'for' name '=' expr_1 'to' expr_2 'do' expr_3 'done'@ +first evaluates the expressions @expr_1@ and @expr_2@ (the boundaries) +into integer values \var{n} and \var{p}. Then, the loop body @expr_3@ is +repeatedly evaluated in an environment where @name@ is successively +bound to the values + $n$, $n+1$, \ldots, $p-1$, $p$. + The loop body is never evaluated if $n > p$. + + +The expression @'for' name '=' expr_1 'downto' expr_2 'do' expr_3 'done'@ +evaluates similarly, except that @name@ is successively bound to the values + $n$, $n-1$, \ldots, $p+1$, $p$. + The loop body is never evaluated if $n < p$. + + +In both cases, the whole @'for'@ expression evaluates to the unit +value @'()'@. + +\subsubsection*{Exception handling} +\ikwd{try\@\texttt{try}} + +The expression +$$\begin{array}{rlll} +\token{try~} & \textsl{expr} \\ +\token{with} & \textsl{pattern}_1 & \token{->} & \textsl{expr}_1 \\ +\token{|} & \ldots \\ +\token{|} & \textsl{pattern}_n & \token{->} & \textsl{expr}_n +\end{array}$$ +evaluates the expression @expr@ and returns its value if the +evaluation of @expr@ does not raise any exception. If the evaluation +of @expr@ raises an exception, the exception value is matched against +the patterns @pattern_1@ to @pattern_n@. If the matching against +@pattern_i@ succeeds, the associated expression @expr_i@ is evaluated, +and its value becomes the value of the whole @'try'@ expression. The +evaluation of @expr_i@ takes place in an environment enriched by the +bindings performed during matching. If several patterns match the value of +@expr@, the one that occurs first in the @'try'@ expression is +selected. If none of the patterns matches the value of @expr@, the +exception value is raised again, thereby transparently ``passing +through'' the @'try'@ construct. + +\subsection{Operations on data structures} + +\subsubsection*{Products} + +The expression @expr_1 ',' \ldots ',' expr_n@ evaluates to the +\var{n}-tuple of the values of expressions @expr_1@ to @expr_n@. The +evaluation order of the subexpressions is not specified. + +\subsubsection*{Variants} + +The expression @constr expr@ evaluates to the unary variant value +whose constructor is @constr@, and whose argument is the value of +@expr@. Similarly, the expression @constr '(' expr_1 ',' \ldots ',' +expr_n ')'@ evaluates to the n-ary variant value whose constructor is +@constr@ and whose arguments are the values of @expr_1, \ldots, +expr_n@. + +The expression @constr '('expr_1, \ldots, expr_n')'@ evaluates to the +variant value whose constructor is @constr@, and whose arguments are +the values of @expr_1 \ldots expr_n@. + +For lists, some syntactic sugar is provided. The expression +@expr_1 '::' expr_2@ stands for the constructor @'(' '::' ')' @ +applied to the arguments @'(' expr_1 ',' expr_2 ')'@, and therefore +evaluates to the list whose head is the value of @expr_1@ and whose tail +is the value of @expr_2@. The expression @'[' expr_1 ';' \ldots ';' +expr_n ']'@ is equivalent to @expr_1 '::' \ldots '::' expr_n '::' +'[]'@, and therefore evaluates to the list whose elements are the +values of @expr_1@ to @expr_n@. + +\subsubsection*{Polymorphic variants} + +The expression @"`"tag-name expr@ evaluates to the polymorphic variant +value whose tag is @tag-name@, and whose argument is the value of @expr@. + +\subsubsection*{Records} + +The expression @'{' field_1 ['=' expr_1] ';' \ldots ';' field_n ['=' +expr_n ']}'@ evaluates to the record value +$\{ field_1 = v_1; \ldots; field_n = v_n \}$ +where $v_i$ is the value of @expr_i@ for \fromoneto{i}{n}. +A single identifier @field_k@ stands for @field_k '=' field_k@, +and a qualified identifier @module-path '.' field_k@ stands for +@module-path '.' field_k '=' field_k@. +The fields @field_1@ to @field_n@ must all belong to the same record +type; each field of this record type must appear exactly +once in the record expression, though they can appear in any +order. The order in which @expr_1@ to @expr_n@ are evaluated is not +specified. Optional type constraints can be added after each field +@'{' field_1 ':' typexpr_1 '=' expr_1 ';'% + \ldots ';' field_n ':' typexpr_n '=' expr_n '}'@ +to force the type of @field_k@ to be compatible with @typexpr_k@. + +The expression +@"{" expr "with" field_1 ["=" expr_1] ";" \ldots ";" field_n ["=" expr_n] "}"@ +builds a fresh record with fields @field_1 \ldots field_n@ equal to +@expr_1 \ldots expr_n@, and all other fields having the same value as +in the record @expr@. In other terms, it returns a shallow copy of +the record @expr@, except for the fields @field_1 \ldots field_n@, +which are initialized to @expr_1 \ldots expr_n@. As previously, +single identifier @field_k@ stands for @field_k '=' field_k@, +a qualified identifier @module-path '.' field_k@ stands for +@module-path '.' field_k '=' field_k@ and it is +possible to add an optional type constraint on each field being updated +with +@"{" expr "with" field_1 ':' typexpr_1 "=" expr_1 ";" % + \ldots ";" field_n ':' typexpr_n "=" expr_n "}"@. + +The expression @expr_1 '.' field@ evaluates @expr_1@ to a record +value, and returns the value associated to @field@ in this record +value. + +The expression @expr_1 '.' field '<-' expr_2@ evaluates @expr_1@ to a record +value, which is then modified in-place by replacing the value +associated to @field@ in this record by the value of +@expr_2@. This operation is permitted only if @field@ has been +declared @'mutable'@ in the definition of the record type. The whole +expression @expr_1 '.' field '<-' expr_2@ evaluates to the unit value +@'()'@. + +\subsubsection*{Arrays} + +The expression @'[|' expr_1 ';' \ldots ';' expr_n '|]'@ evaluates to +a \var{n}-element array, whose elements are initialized with the values of +@expr_1@ to @expr_n@ respectively. The order in which these +expressions are evaluated is unspecified. + +The expression @expr_1 '.(' expr_2 ')'@ returns the value of element +number @expr_2@ in the array denoted by @expr_1@. The first element +has number 0; the last element has number $n-1$, where \var{n} is the +size of the array. The exception "Invalid_argument" is raised if the +access is out of bounds. + +The expression @expr_1 '.(' expr_2 ')' '<-' expr_3@ modifies in-place +the array denoted by @expr_1@, replacing element number @expr_2@ by +the value of @expr_3@. The exception "Invalid_argument" is raised if +the access is out of bounds. The value of the whole expression is @'()'@. + +\subsubsection*{Strings} + +The expression @expr_1 '.[' expr_2 ']'@ returns the value of character +number @expr_2@ in the string denoted by @expr_1@. The first character +has number 0; the last character has number $n-1$, where \var{n} is the +length of the string. The exception "Invalid_argument" is raised if the +access is out of bounds. + +The expression @expr_1 '.[' expr_2 ']' '<-' expr_3@ modifies in-place +the string denoted by @expr_1@, replacing character number @expr_2@ by +the value of @expr_3@. The exception "Invalid_argument" is raised if +the access is out of bounds. The value of the whole expression is @'()'@. + +{\bf Note:} this possibility is offered only for backward +compatibility with older versions of OCaml and will be removed in a +future version. New code should use byte sequences and the "Bytes.set" +function. + +\subsection{Operators} +\ikwd{mod\@\texttt{mod}} +\ikwd{land\@\texttt{land}} +\ikwd{lor\@\texttt{lor}} +\ikwd{lxor\@\texttt{lxor}} +\ikwd{lsl\@\texttt{lsl}} +\ikwd{lsr\@\texttt{lsr}} +\ikwd{asr\@\texttt{asr}} + +Symbols from the class @infix-symbol@, as well as the keywords +@"*"@, @"+"@, @"-"@, @'-.'@, @"="@, @"!="@, @"<"@, @">"@, @"or"@, @"||"@, +@"&"@, @"&&"@, @":="@, @"mod"@, @"land"@, @"lor"@, @"lxor"@, @"lsl"@, @"lsr"@, +and @"asr"@ can appear in infix position (between two +expressions). Symbols from the class @prefix-symbol@, as well as +the keywords @"-"@ and @"-."@ +can appear in prefix position (in front of an expression). + +Infix and prefix symbols do not have a fixed meaning: they are simply +interpreted as applications of functions bound to the names +corresponding to the symbols. The expression @prefix-symbol expr@ is +interpreted as the application @'(' prefix-symbol ')' +expr@. Similarly, the expression @expr_1 infix-symbol expr_2@ is +interpreted as the application @'(' infix-symbol ')' expr_1 expr_2@. + +The table below lists the symbols defined in the initial environment +and their initial meaning. (See the description of the core +library module "Pervasives" in chapter~\ref{c:corelib} for more +details). Their meaning may be changed at any time using +@"let" "(" infix-op ")" name_1 name_2 "=" \ldots@ + +Note: the operators @'&&'@, @'||'@, and @'~-'@ are handled specially +and it is not advisable to change their meaning. + +The keywords @'-'@ and @'-.'@ can appear both as infix and +prefix operators. When they appear as prefix operators, they are +interpreted respectively as the functions @'(~-)'@ and @'(~-.)'@. + +%% Conversely, a regular function identifier can also be used as an infix +%% operator by enclosing it in backquotes: @expr_1 '`' ident '`' expr_2@ +%% is interpreted as the application @ident expr_1 expr_2@. + +\ikwd{mod\@\texttt{mod}}% +\ikwd{land\@\texttt{land}}% +\ikwd{lor\@\texttt{lor}}% +\ikwd{lxor\@\texttt{lxor}}% +\ikwd{lsl\@\texttt{lsl}}% +\ikwd{lsr\@\texttt{lsr}}% +\ikwd{asr\@\texttt{asr}}% +\begin{tableau}{|l|p{12cm}|}{Operator}{Initial meaning} +\entree{"+"}{Integer addition.} +\entree{"-" (infix)}{Integer subtraction.} +\entree{"~- -" (prefix)}{Integer negation.} +\entree{"*"}{Integer multiplication.} +\entree{"/"}{Integer division. + Raise "Division_by_zero" if second argument is zero.} +\entree{"mod"}{Integer modulus. Raise + "Division_by_zero" if second argument is zero.} +\entree{"land"}{Bitwise logical ``and'' on integers.} +\entree{"lor"}{Bitwise logical ``or'' on integers.} +\entree{"lxor"}{Bitwise logical ``exclusive or'' on integers.} +\entree{"lsl"}{Bitwise logical shift left on integers.} +\entree{"lsr"}{Bitwise logical shift right on integers.} +\entree{"asr"}{Bitwise arithmetic shift right on integers.} +\entree{"+."}{Floating-point addition.} +\entree{"-." (infix)}{Floating-point subtraction.} +\entree{"~-. -." (prefix)}{Floating-point negation.} +\entree{"*."}{Floating-point multiplication.} +\entree{"/."}{Floating-point division.} +\entree{"**"}{Floating-point exponentiation.} +\entree{{\tt\char64} }{List concatenation.} +\entree{"^" }{String concatenation.} +\entree{"!" }{Dereferencing (return the current + contents of a reference).} +\entree{":="}{Reference assignment (update the + reference given as first argument with the value of the second + argument).} +\entree{"=" }{Structural equality test.} +\entree{"<>" }{Structural inequality test.} +\entree{"==" }{Physical equality test.} +\entree{"!=" }{Physical inequality test.} +\entree{"<" }{Test ``less than''.} +\entree{"<=" }{Test ``less than or equal''.} +\entree{">" }{Test ``greater than''.} +\entree{">=" }{Test ``greater than or equal''.} +\entree{"&& &"}{Boolean conjunction.} +\entree{"|| or"}{Boolean disjunction.} +\end{tableau} + +\subsection{Objects} \label{s:objects} + +\subsubsection*{Object creation} + +\ikwd{new\@\texttt{new}} + +When @class-path@ evaluates to a class body, @'new' class-path@ +evaluates to a new object containing the instance variables and +methods of this class. + +When @class-path@ evaluates to a class function, @'new' class-path@ +evaluates to a function expecting the same number of arguments and +returning a new object of this class. + +\subsubsection*{Immediate object creation} + +\ikwd{object\@\texttt{object}} + +Creating directly an object through the @'object' class-body 'end'@ +construct is operationally equivalent to defining locally a @'class' +class-name '=' 'object' class-body 'end'@ ---see sections +\ref{ss:class-body} and following for the syntax of @class-body@--- +and immediately creating a single object from it by @'new' class-name@. + +The typing of immediate objects is slightly different from explicitly +defining a class in two respects. First, the inferred object type may +contain free type variables. Second, since the class body of an +immediate object will never be extended, its self type can be unified +with a closed object type. + +\subsubsection*{Method invocation} + +The expression @expr '#' method-name@ invokes the method +@method-name@ of the object denoted by @expr@. + +If @method-name@ is a polymorphic method, its type should be known at +the invocation site. This is true for instance if @expr@ is the name +of a fresh object (@'let' ident = 'new' class-path \dots @) or if +there is a type constraint. Principality of the derivation can be +checked in the "-principal" mode. + +\subsubsection*{Accessing and modifying instance variables} + +The instance variables of a class are visible only in the body of the +methods defined in the same class or a class that inherits from the +class defining the instance variables. The expression @inst-var-name@ +evaluates to the value of the given instance variable. The expression +@inst-var-name '<-' expr@ assigns the value of @expr@ to the instance +variable @inst-var-name@, which must be mutable. The whole expression +@inst-var-name '<-' expr@ evaluates to @"()"@. + + +\subsubsection*{Object duplication} + +An object can be duplicated using the library function "Oo.copy" +(see +\ifouthtml \ahref{libref/Oo.html}{Module \texttt{Oo}}\else +section~\ref{Oo}\fi). Inside a method, the expression +@ '{<' inst-var-name '=' expr { ';' inst-var-name '=' expr } '>}'@ +returns a copy of self with the given instance variables replaced by +the values of the associated expressions; other instance variables +have the same value in the returned object as in self. + +\subsection{Coercions} \label{s:coercions} + +Expressions whose type contains object or polymorphic variant types +can be explicitly coerced (weakened) to a supertype. +% +The expression @'('expr ':>' typexpr')'@ coerces the expression @expr@ +to type @typexpr@. +% +The expression @'('expr ':' typexpr_1 ':>' typexpr_2')'@ coerces the +expression @expr@ from type @typexpr_1@ to type @typexpr_2@. + +The former operator will sometimes fail to coerce an expression @expr@ +from a type @typ_1@ to a type @typ_2@ +even if type @typ_1@ is a subtype of type +@typ_2@: in the current implementation it only expands two levels of +type abbreviations containing objects and/or polymorphic variants, +keeping only recursion when it is explicit in the class type (for objects). +As an exception to the above algorithm, if both the inferred type of @expr@ +and @typ@ are ground ({\em i.e.} do not contain type variables), the +former operator behaves as the latter one, taking the inferred type of +@expr@ as @typ_1@. In case of failure with the former operator, +the latter one should be used. + +It is only possible to coerce an expression @expr@ from type +@typ_1@ to type @typ_2@, if the type of @expr@ is an instance of +@typ_1@ (like for a type annotation), and @typ_1@ is a subtype +of @typ_2@. The type of the coerced expression is an +instance of @typ_2@. If the types contain variables, +they may be instantiated by the subtyping algorithm, but this is only +done after determining whether @typ_1@ is a potential subtype of +@typ_2@. This means that typing may fail during this latter +unification step, even if some instance of @typ_1@ is a subtype of +some instance of @typ_2@. +% +In the following paragraphs we describe the subtyping relation used. + +\subsubsection*{Object types} + +A fixed object type admits as subtype any object type that includes all +its methods. The types of the methods shall be subtypes of those in +the supertype. Namely, +\begin{center} +@ '<' met_1 ':' typ_1 ';' \dots ';' met_n ':' typ_n '>' @ +\end{center} +is a supertype of +\begin{center} +@ '<' met_1 ':' typ@$'_1$@ ';' \dots ';' met_n ':' typ@$'_n$@ ';' +met@$_{n+1}$@ ':' typ@$'_{n+1}$@ ';' \dots ';' met@$_{n+m}$@ ':' typ@$'_{n+m}$@ +~[';' '..'] '>' @ +\end{center} +which may contain an ellipsis ".." if every @typ_i@ is a supertype of +the corresponding @typ@$'_i$. + +A monomorphic method type can be a supertype of a polymorphic method +type. Namely, if @typ@ is an instance of @typ@$'$, then @ "'"@a@_1 +\dots "'"@a@_n '.' typ@$'$ is a subtype of @typ@. + +Inside a class definition, newly defined types are not available for +subtyping, as the type abbreviations are not yet completely +defined. There is an exception for coercing @@self@@ to the (exact) +type of its class: this is allowed if the type of @@self@@ does not +appear in a contravariant position in the class type, {\em i.e.} if +there are no binary methods. + +\subsubsection*{Polymorphic variant types} + +A polymorphic variant type @typ@ is a subtype of another polymorphic +variant type @typ@$'$ if the upper bound of @typ@ ({\em i.e.} the +maximum set of constructors that may appear in an instance of @typ@) +is included in the lower bound of @typ@$'$, and the types of arguments +for the constructors of @typ@ are subtypes of those in +@typ@$'$. Namely, +\begin{center} +@ "["["<"] "`"C_1 "of" typ_1 "|" \dots "|" "`"C_n "of" typ_n "]" @ +\end{center} +which may be a shrinkable type, is a subtype of +\begin{center} +@ "["[">"] "`"C_1 "of" typ@$'_1$@ "|" \dots "|" "`"C_n "of" typ@$'_n$@ + "|" "`"C@$_{n+1}$@ "of" typ@$'_{n+1}$@ "|" \dots "|" "`"C@$_{n+m}$@ "of" + typ@$'_{n+m}$@ "]" @ +\end{center} +which may be an extensible type, if every @typ_i@ is a subtype of @typ@$'_i$. + +\subsubsection*{Variance} + +Other types do not introduce new subtyping, but they may propagate the +subtyping of their arguments. For instance, @typ_1 "*" typ_2@ is a +subtype of @typ@$'_1$@ "*" typ@$'_2$ when @typ_1@ and @typ_2@ are +respectively subtypes of @typ@$'_1$ and @typ@$'_2$. +For function types, the relation is more subtle: +@typ_1 "->" typ_2@ is a subtype of @typ@$'_1$@~"->" typ@$'_2$ +if @typ_1@ is a supertype of @typ@$'_1$ and @typ_2@ is a +subtype of @typ@$'_2$. For this reason, function types are covariant in +their second argument (like tuples), but contravariant in their first +argument. Mutable types, like "array" or "ref" are neither covariant +nor contravariant, they are nonvariant, that is they do not propagate +subtyping. + +For user-defined types, the variance is automatically inferred: a +parameter is covariant if it has only covariant occurrences, +contravariant if it has only contravariant occurrences, +variance-free if it has no occurrences, and nonvariant otherwise. +A variance-free parameter may change freely through subtyping, it does +not have to be a subtype or a supertype. +% +For abstract and private types, the variance must be given explicitly +(see section~\ref{s:type-defs}), +otherwise the default is nonvariant. This is also the case for +constrained arguments in type definitions. + + +\subsection{Other} + +\subsubsection*{Assertion checking} + + +\ikwd{assert\@\texttt{assert}} + +OCaml supports the @"assert"@ construct to check debugging assertions. +The expression @"assert" expr@ evaluates the expression @expr@ and +returns @"()"@ if @expr@ evaluates to @"true"@. If it evaluates to +@"false"@ the exception +"Assert_failure" is raised with the source file name and the +location of @expr@ as arguments. Assertion +checking can be turned off with the "-noassert" compiler option. In +this case, @expr@ is not evaluated at all. + +As a special case, @"assert false"@ is reduced to +@'raise' '('@"Assert_failure ..."@')'@, which gives it a polymorphic +type. This means that it can be used in place of any expression (for +example as a branch of any pattern-matching). It also means that +the @"assert false"@ ``assertions'' cannot be turned off by the +"-noassert" option. +% +\index{Assertfailure\@\verb`Assert_failure`} + +\subsubsection*{Lazy expressions} +\ikwd{lazy\@\texttt{lazy}} + +The expression @"lazy" expr@ returns a value \var{v} of type "Lazy.t" that +encapsulates the computation of @expr@. The argument @expr@ is not +evaluated at this point in the program. Instead, its evaluation will +be performed the first time the function "Lazy.force" is applied to the value +\var{v}, returning the actual value of @expr@. Subsequent applications +of "Lazy.force" to \var{v} do not evaluate @expr@ again. Applications +of "Lazy.force" may be implicit through pattern matching (see~\ref{s:lazypat}). + +\subsubsection*{Local modules} +\ikwd{let\@\texttt{let}} +\ikwd{module\@\texttt{module}} + +The expression +@"let" "module" module-name "=" module-expr "in" expr@ +locally binds the module expression @module-expr@ to the identifier +@module-name@ during the evaluation of the expression @expr@. +It then returns the value of @expr@. For example: +\begin{verbatim} + let remove_duplicates comparison_fun string_list = + let module StringSet = + Set.Make(struct type t = string + let compare = comparison_fun end) in + StringSet.elements + (List.fold_right StringSet.add string_list StringSet.empty) +\end{verbatim} + +\subsubsection*{Local opens} +\ikwd{let\@\texttt{let}} +\ikwd{module\@\texttt{open}} + +The expressions @"let" "open" module-path "in" expr@ and +@module-path'.('expr')'@ are strictly equivalent. These +constructions locally open the module referred to by the module path +@module-path@ in the respective scope of the expression @expr@. + +When the body of a local open expression is delimited by +@'[' ']'@, @'[|' '|]'@, or @'{' '}'@, the parentheses can be omitted. +For expression, parentheses can also be omitted for @'{<' '>}'@. +For example, @module-path'.['expr']'@ is equivalent to +@module-path'.(['expr'])'@, and @module-path'.[|' expr '|]'@ is +equivalent to @module-path'.([|' expr '|])'@. + +%% \newpage diff --git a/manual/manual/refman/exten.etex b/manual/manual/refman/exten.etex new file mode 100644 index 00000000..080354d8 --- /dev/null +++ b/manual/manual/refman/exten.etex @@ -0,0 +1,2393 @@ +\chapter{Language extensions} \label{c:extensions} +\pdfchapter{Language extensions} +%HEVEA\cutname{extn.html} + +This chapter describes language extensions and convenience features +that are implemented in OCaml, but not described in the +OCaml reference manual. + +\section{Integer literals for types \texttt{int32}, \texttt{int64} + and \texttt{nativeint}} \label{s:ext-integer} + +(Introduced in Objective Caml 3.07) + +\begin{syntax} +constant: ... + | int32-literal + | int64-literal + | nativeint-literal +; +int32-literal: integer-literal 'l' +; +int64-literal: integer-literal 'L' +; +nativeint-literal: integer-literal 'n' +\end{syntax} + +An integer literal can be followed by one of the letters "l", "L" or "n" +to indicate that this integer has type "int32", "int64" or "nativeint" +respectively, instead of the default type "int" for integer literals. +\index{int32\@\verb`int32`} +\index{int64\@\verb`int64`} +\index{nativeint\@\verb`nativeint`} +The library modules "Int32"[\moduleref{Int32}], +"Int64"[\moduleref{Int64}] and "Nativeint"[\moduleref{Nativeint}] +provide operations on these integer types. + +\section{Recursive definitions of values} \label{s:letrecvalues} + +(Introduced in Objective Caml 1.00) + +As mentioned in section~\ref{s:localdef}, the @'let' 'rec'@ binding +construct, in addition to the definition of recursive functions, +also supports a certain class of recursive definitions of +non-functional values, such as +\begin{center} +@"let" "rec" name_1 "=" "1" "::" name_2 +"and" name_2 "=" "2" "::" name_1 +"in" expr@ +\end{center} +which binds @name_1@ to the cyclic list "1::2::1::2::"\ldots, and +@name_2@ to the cyclic list "2::1::2::1::"\ldots +Informally, the class of accepted definitions consists of those +definitions where the defined names occur only inside function +bodies or as argument to a data constructor. + +More precisely, consider the expression: +\begin{center} +@"let" "rec" name_1 "=" expr_1 "and" \ldots "and" name_n "=" expr_n "in" expr@ +\end{center} +It will be accepted if each one of @expr_1 \ldots expr_n@ is +statically constructive with respect to @name_1 \ldots name_n@, +is not immediately linked to any of @name_1 \ldots name_n@, +and is not an array constructor whose arguments have abstract type. + +An expression @@e@@ is said to be {\em statically constructive +with respect to} the variables @name_1 \ldots name_n@ if at least +one of the following conditions is true: +\begin{itemize} +\item @@e@@ has no free occurrence of any of @name_1 \ldots name_n@ +\item @@e@@ is a variable +\item @@e@@ has the form @"fun" \ldots "->" \ldots@ +\item @@e@@ has the form @"function" \ldots "->" \ldots@ +\item @@e@@ has the form @"lazy" "(" \ldots ")"@ +\item @@e@@ has one of the following forms, where each one of + @expr_1 \ldots expr_m@ is statically constructive with respect to + @name_1 \ldots name_n@, and @expr_0@ is statically constructive with + respect to @name_1 \ldots name_n, xname_1 \ldots xname_m@: + \begin{itemize} + \item @"let" ["rec"] xname_1 "=" expr_1 "and" \ldots + "and" xname_m "=" expr_m "in" expr_0@ + \item @"let" "module" \ldots "in" expr_1@ + \item @constr "("expr_1"," \ldots "," expr_m")"@ + \item @"`"tag-name "("expr_1"," \ldots "," expr_m")"@ + \item @"[|" expr_1";" \ldots ";" expr_m "|]"@ + \item @"{" field_1 "=" expr_1";" \ldots ";" field_m = expr_m "}"@ + \item @"{" expr_1 "with" field_2 "=" expr_2";" \ldots ";" + field_m = expr_m "}"@ where @expr_1@ is not immediately + linked to @name_1 \ldots name_n@ + \item @"(" expr_1"," \ldots "," expr_m ")"@ + \item @expr_1";" \ldots ";" expr_m@ + \end{itemize} +\end{itemize} + +An expression @@e@@ is said to be {\em immediately linked to} the variable +@name@ in the following cases: +\begin{itemize} +\item @@e@@ is @name@ +\item @@e@@ has the form @expr_1";" \ldots ";" expr_m@ where @expr_m@ + is immediately linked to @name@ +\item @@e@@ has the form @"let" ["rec"] xname_1 "=" expr_1 "and" \ldots + "and" xname_m "=" expr_m "in" expr_0@ where @expr_0@ is immediately + linked to @name@ or to one of the @xname_i@ such that @expr_i@ + is immediately linked to @name@. +\end{itemize} + +\section{Lazy patterns} \label{s:lazypat} + +\ikwd{lazy\@\texttt{lazy}} + +(Introduced in Objective Caml 3.11) + +\begin{syntax} +pattern: ... + | 'lazy' pattern +\end{syntax} + +The pattern @"lazy" pattern@ matches a value \var{v} of type "Lazy.t", +provided @pattern@ matches the result of forcing \var{v} with +"Lazy.force". A successful match of a pattern containing @"lazy"@ +sub-patterns forces the corresponding parts of the value being matched, even +those that imply no test such as @"lazy" value-name@ or @"lazy" "_"@. +Matching a value with a @pattern-matching@ where some patterns +contain @"lazy"@ sub-patterns may imply forcing parts of the value, +even when the pattern selected in the end has no @"lazy"@ sub-pattern. + +For more information, see the description of module "Lazy" in the +standard library ( +\ifouthtml +\ahref{libref/Lazy.html}{Module \texttt{Lazy}}\else section~\ref{Lazy}\fi). +% +\index{Lazy (module)\@\verb`Lazy` (module)}% +\index{force\@\verb`force`}% + +\section{Recursive modules} \label{s-recursive-modules} +\ikwd{module\@\texttt{module}} +\ikwd{and\@\texttt{and}} + +(Introduced in Objective Caml 3.07) + +% TODO: relaxed syntax + +\begin{syntax} +definition: + ... + | 'module' 'rec' module-name ':' module-type '=' module-expr \\ + { 'and' module-name ':' module-type '=' module-expr } +; +specification: + ... + | 'module' 'rec' module-name ':' module-type + { 'and' module-name':' module-type } +\end{syntax} + +Recursive module definitions, introduced by the @"module rec"@ \ldots +@"and"@ \ldots\ construction, generalize regular module definitions +@'module' module-name '=' module-expr@ and module specifications +@'module' module-name ':' module-type@ by allowing the defining +@module-expr@ and the @module-type@ to refer recursively to the module +identifiers being defined. A typical example of a recursive module +definition is: +\begin{verbatim} + module rec A : sig + type t = Leaf of string | Node of ASet.t + val compare: t -> t -> int + end + = struct + type t = Leaf of string | Node of ASet.t + let compare t1 t2 = + match (t1, t2) with + (Leaf s1, Leaf s2) -> Pervasives.compare s1 s2 + | (Leaf _, Node _) -> 1 + | (Node _, Leaf _) -> -1 + | (Node n1, Node n2) -> ASet.compare n1 n2 + end + and ASet : Set.S with type elt = A.t + = Set.Make(A) +\end{verbatim} +It can be given the following specification: +\begin{verbatim} + module rec A : sig + type t = Leaf of string | Node of ASet.t + val compare: t -> t -> int + end + and ASet : Set.S with type elt = A.t +\end{verbatim} + +This is an experimental extension of OCaml: the class of +recursive definitions accepted, as well as its dynamic semantics are +not final and subject to change in future releases. + +Currently, the compiler requires that all dependency cycles between +the recursively-defined module identifiers go through at least one +``safe'' module. A module is ``safe'' if all value definitions that +it contains have function types @typexpr_1 '->' typexpr_2@. Evaluation of a +recursive module definition proceeds by building initial values for +the safe modules involved, binding all (functional) values to +@'fun' '_' '->' 'raise' @"Undefined_recursive_module". The defining +module expressions are then evaluated, and the initial values +for the safe modules are replaced by the values thus computed. If a +function component of a safe module is applied during this computation +(which corresponds to an ill-founded recursive definition), the +"Undefined_recursive_module" exception is raised at runtime: + +\begin{caml_example}{verbatim} +module rec M: sig val f: unit -> int end = struct let f () = N.x end +and N:sig val x: int end = struct let x = M.f () end +\end{caml_example} + +If there are no safe modules along a dependency cycle, an error is raised + +\begin{caml_example}{verbatim}[error] +module rec M: sig val x: int end = struct let x = N.y end +and N:sig val x: int val y:int end = struct let x = M.x let y = 0 end +\end{caml_example} + +Note that, in the @specification@ case, the @module-type@s must be +parenthesized if they use the @'with' mod-constraint@ construct. + +\section{Private types}\label{s:private-types} +\ikwd{private\@\texttt{private}} + +Private type declarations in module signatures, of the form +"type t = private ...", enable libraries to +reveal some, but not all aspects of the implementation of a type to +clients of the library. In this respect, they strike a middle ground +between abstract type declarations, where no information is revealed +on the type implementation, and data type definitions and type +abbreviations, where all aspects of the type implementation are +publicized. Private type declarations come in three flavors: for +variant and record types (section~\ref{s-private-types-variant}), +for type abbreviations (section~\ref{s-private-types-abbrev}), +and for row types (section~\ref{s-private-rows}). + +\subsection{Private variant and record types} \label{s-private-types-variant} + +(Introduced in Objective Caml 3.07) + +\begin{syntax} +type-representation: + ... + | '=' 'private' [ '|' ] constr-decl { '|' constr-decl } + | '=' 'private' record-decl +\end{syntax} + +Values of a variant or record type declared @"private"@ +can be de-structured normally in pattern-matching or via +the @expr '.' field@ notation for record accesses. However, values of +these types cannot be constructed directly by constructor application +or record construction. Moreover, assignment on a mutable field of a +private record type is not allowed. + +The typical use of private types is in the export signature of a +module, to ensure that construction of values of the private type always +go through the functions provided by the module, while still allowing +pattern-matching outside the defining module. For example: +\begin{verbatim} + module M : sig + type t = private A | B of int + val a : t + val b : int -> t + end + = struct + type t = A | B of int + let a = A + let b n = assert (n > 0); B n + end +\end{verbatim} +Here, the @"private"@ declaration ensures that in any value of type +"M.t", the argument to the "B" constructor is always a positive integer. + +With respect to the variance of their parameters, private types are +handled like abstract types. That is, if a private type has +parameters, their variance is the one explicitly given by prefixing +the parameter by a `"+"' or a `"-"', it is invariant otherwise. + +\subsection{Private type abbreviations} \label{s-private-types-abbrev} + +(Introduced in Objective Caml 3.11) + +\begin{syntax} +type-equation: + ... + | '=' 'private' typexpr +\end{syntax} + +Unlike a regular type abbreviation, a private type abbreviation +declares a type that is distinct from its implementation type @typexpr@. +However, coercions from the type to @typexpr@ are permitted. +Moreover, the compiler ``knows'' the implementation type and can take +advantage of this knowledge to perform type-directed optimizations. + +The following example uses a private type abbreviation to define a +module of nonnegative integers: +\begin{verbatim} + module N : sig + type t = private int + val of_int: int -> t + val to_int: t -> int + end + = struct + type t = int + let of_int n = assert (n >= 0); n + let to_int n = n + end +\end{verbatim} +The type "N.t" is incompatible with "int", ensuring that nonnegative +integers and regular integers are not confused. However, if "x" has +type "N.t", the coercion "(x :> int)" is legal and returns the +underlying integer, just like "N.to_int x". Deep coercions are also +supported: if "l" has type "N.t list", the coercion "(l :> int list)" +returns the list of underlying integers, like "List.map N.to_int l" +but without copying the list "l". + +Note that the coercion @"(" expr ":>" typexpr ")"@ is actually an abbreviated +form, +and will only work in presence of private abbreviations if neither the +type of @expr@ nor @typexpr@ contain any type variables. If they do, +you must use the full form @"(" expr ":" typexpr_1 ":>" typexpr_2 ")"@ where +@typexpr_1@ is the expected type of @expr@. Concretely, this would be "(x : +N.t :> int)" and "(l : N.t list :> int list)" for the above examples. + +\subsection{Private row types} \label{s-private-rows} +\ikwd{private\@\texttt{private}} + +(Introduced in Objective Caml 3.09) + +\begin{syntax} +type-equation: + ... + | '=' 'private' typexpr +\end{syntax} + +Private row types are type abbreviations where part of the +structure of the type is left abstract. Concretely @typexpr@ in the +above should denote either an object type or a polymorphic variant +type, with some possibility of refinement left. If the private +declaration is used in an interface, the corresponding implementation +may either provide a ground instance, or a refined private type. +\begin{verbatim} + module M : sig type c = private < x : int; .. > val o : c end = + struct + class c = object method x = 3 method y = 2 end + let o = new c + end +\end{verbatim} +This declaration does more than hiding the "y" method, it also makes +the type "c" incompatible with any other closed object type, meaning +that only "o" will be of type "c". In that respect it behaves +similarly to private record types. But private row types are +more flexible with respect to incremental refinement. This feature can +be used in combination with functors. +\begin{verbatim} + module F(X : sig type c = private < x : int; .. > end) = + struct + let get_x (o : X.c) = o#x + end + module G(X : sig type c = private < x : int; y : int; .. > end) = + struct + include F(X) + let get_y (o : X.c) = o#y + end +\end{verbatim} + +A polymorphic variant type [t], for example +\begin{verbatim} + type t = [ `A of int | `B of bool ] +\end{verbatim} +can be refined in two ways. A definition [u] may add new field to [t], +and the declaration +\begin{verbatim} + type u = private [> t] +\end{verbatim} +will keep those new fields abstract. Construction of values of type +[u] is possible using the known variants of [t], but any +pattern-matching will require a default case to handle the potential +extra fields. Dually, a declaration [u] may restrict the fields of [t] +through abstraction: the declaration +\begin{verbatim} + type v = private [< t > `A] +\end{verbatim} +corresponds to private variant types. One cannot create a value of the +private type [v], except using the constructors that are explicitly +listed as present, "(`A n)" in this example; yet, when +patter-matching on a [v], one should assume that any of the +constructors of [t] could be present. + +Similarly to abstract types, the variance of type parameters +is not inferred, and must be given explicitly. + + +\section{Local opens for patterns} +\ikwd{let\@\texttt{let}} +\ikwd{open\@\texttt{open}} \label{s:local-opens} + +(Introduced in OCaml 4.04) + +\begin{syntax} +pattern: + ... + | module-path '.(' pattern ')' + | module-path '.[' pattern ']' + | module-path '.[|' pattern '|]' + | module-path '.{' pattern '}' + +\end{syntax} + +For patterns, local opens are limited to the +@module-path'.('pattern')'@ construction. This +construction locally open the module referred to by the module path +@module-path@ in the scope of the pattern @pattern@. + +When the body of a local open pattern is delimited by +@'[' ']'@, @'[|' '|]'@, or @'{' '}'@, the parentheses can be omitted. +For example, @module-path'.['pattern']'@ is equivalent to +@module-path'.(['pattern'])'@, and @module-path'.[|' pattern '|]'@ is +equivalent to @module-path'.([|' pattern '|])'@. + +\section{Object copy short notations} \label{s:object-notations} +\ikwd{with\@\texttt{with}} + +(Introduced in OCaml 4.03) + +\begin{syntax} +expr: + ... + | '{' '<' expr 'with' field ['=' expr] { ';' field ['=' expr] } [';'] '>' '}' +\end{syntax} + +In an object copy expression, +a single identifier @id@ stands for @id '=' id@, and a qualified identifier +@module-path '.' id@ stands for @module-path '.' id '=' id@. +For example, all following methods are equivalent: +\begin{verbatim} + object + val x=0. val y=0. val z=0. + method f_0 x y = {< x; y >} + method f_1 x y = {< x = x; y >} + method f_2 x y = {< x=x ; y = y >} + end +\end{verbatim} + +\section{Locally abstract types} +\ikwd{type\@\texttt{type}} +\ikwd{fun\@\texttt{fun}} \label{s:locally-abstract} + +(Introduced in OCaml 3.12, short syntax added in 4.03) + +\begin{syntax} +parameter: + ... + | '(' "type" {{typeconstr-name}} ')' +\end{syntax} + +The expression @"fun" '(' "type" typeconstr-name ')' "->" expr@ introduces a +type constructor named @typeconstr-name@ which is considered abstract +in the scope of the sub-expression, but then replaced by a fresh type +variable. Note that contrary to what the syntax could suggest, the +expression @"fun" '(' "type" typeconstr-name ')' "->" expr@ itself does not +suspend the evaluation of @expr@ as a regular abstraction would. The +syntax has been chosen to fit nicely in the context of function +declarations, where it is generally used. It is possible to freely mix +regular function parameters with pseudo type parameters, as in: +\begin{caml_example*}{verbatim} + let f = fun (type t) (foo : t list) -> assert false[@ellipsis] +\end{caml_example*} +and even use the alternative syntax for declaring functions: +\begin{caml_example*}{verbatim} + let f (type t) (foo : t list) = assert false[@ellipsis] +\end{caml_example*} +If several locally abstract types need to be introduced, it is possible to use +the syntax +@"fun" '(' "type" typeconstr-name_1 \ldots typeconstr-name_n ')' "->" expr@ +as syntactic sugar for @"fun" '(' "type" typeconstr-name_1 ')' "->" \ldots "->" +"fun" '(' "type" typeconstr-name_n ')' "->" expr@. For instance, +\begin{caml_example*}{verbatim} + let f = fun (type t u v) -> fun (foo : (t * u * v) list) -> assert false[@ellipsis] + let f' (type t u v) (foo : (t * u * v) list) = assert false[@ellipsis] +\end{caml_example} + +This construction is useful because the type constructors it introduces +can be used in places where a type variable is not allowed. For +instance, one can use it to define an exception in a local module +within a polymorphic function. +\begin{verbatim} + let f (type t) () = + let module M = struct exception E of t end in + (fun x -> M.E x), (function M.E x -> Some x | _ -> None) +\end{verbatim} + +Here is another example: +\begin{verbatim} + let sort_uniq (type s) (cmp : s -> s -> int) = + let module S = Set.Make(struct type t = s let compare = cmp end) in + fun l -> + S.elements (List.fold_right S.add l S.empty) +\end{verbatim} + +It is also extremely useful for first-class modules (see +section~\ref{s-first-class-modules}) and generalized algebraic datatypes +(GADTs: see section~\ref{s:gadts}). + +\paragraph{Polymorphic syntax} (Introduced in OCaml 4.00) + +\begin{syntax} +let-binding: + ... + | value-name ':' 'type' {{ typeconstr-name }} '.' typexpr '=' expr +; +class-field: + ... + | 'method' ['private'] method-name ':' 'type' + {{ typeconstr-name }} '.' typexpr '=' expr + | 'method!' ['private'] method-name ':' 'type' + {{ typeconstr-name }} '.' typexpr '=' expr +\end{syntax} + +The @"(type" typeconstr-name")"@ syntax construction by itself does not make +polymorphic the type variable it introduces, but it can be combined +with explicit polymorphic annotations where needed. +The above rule is provided as syntactic sugar to make this easier: +\begin{caml_example*}{verbatim} + let rec f : type t1 t2. t1 * t2 list -> t1 = assert false[@ellipsis] +\end{caml_example*} +\noindent +is automatically expanded into +\begin{caml_example*}{verbatim} + let rec f : 't1 't2. 't1 * 't2 list -> 't1 = + fun (type t1) (type t2) -> ( assert false[@ellipsis] : t1 * t2 list -> t1) +\end{caml_example*} +This syntax can be very useful when defining recursive functions involving +GADTs, see the section~\ref{s:gadts} for a more detailed explanation. + +The same feature is provided for method definitions. + +\section{First-class modules}\label{s-first-class-modules} +\ikwd{module\@\texttt{module}} +\ikwd{val\@\texttt{val}} +\ikwd{with\@\texttt{with}} +\ikwd{and\@\texttt{and}} + +(Introduced in OCaml 3.12; pattern syntax and package type inference +introduced in 4.00; structural comparison of package types introduced in 4.02.; +fewer parens required starting from 4.05) + +\begin{syntax} +typexpr: + ... + | '(''module' package-type')' +; +module-expr: + ... + | '(''val' expr [':' package-type]')' +; +expr: + ... + | '(''module' module-expr [':' package-type]')' +; +pattern: + ... + | '(''module' module-name [':' package-type]')' +; +package-type: + modtype-path + | modtype-path 'with' package-constraint { 'and' package-constraint } +; +package-constraint: + 'type' typeconstr '=' typexpr +; +\end{syntax} + +Modules are typically thought of as static components. This extension +makes it possible to pack a module as a first-class value, which can +later be dynamically unpacked into a module. + +The expression @'(' 'module' module-expr ':' package-type ')'@ converts the +module (structure or functor) denoted by module expression @module-expr@ +to a value of the core language that encapsulates this module. The +type of this core language value is @'(' 'module' package-type ')'@. +The @package-type@ annotation can be omitted if it can be inferred +from the context. + +Conversely, the module expression @'(' 'val' expr ':' package-type ')'@ +evaluates the core language expression @expr@ to a value, which must +have type @'module' package-type@, and extracts the module that was +encapsulated in this value. Again @package-type@ can be omitted if the +type of @expr@ is known. +If the module expression is already parenthesized, like the arguments +of functors are, no additional parens are needed: "Map.Make(val key)". + +The pattern @'(' 'module' module-name ':' package-type ')'@ matches a +package with type @package-type@ and binds it to @module-name@. +It is not allowed in toplevel let bindings. +Again @package-type@ can be omitted if it can be inferred from the +enclosing pattern. + +The @package-type@ syntactic class appearing in the @'(' 'module' +package-type ')'@ type expression and in the annotated forms represents a +subset of module types. +This subset consists of named module types with optional constraints +of a limited form: only non-parametrized types can be specified. + +For type-checking purposes (and starting from OCaml 4.02), package types +are compared using the structural comparison of module types. + +In general, the module expression @'(' "val" expr ":" package-type +')'@ cannot be used in the body of a functor, because this could cause +unsoundness in conjunction with applicative functors. +Since OCaml 4.02, this is relaxed in two ways: +if @package-type@ does not contain nominal type declarations ({\em + i.e.} types that are created with a proper identity), then this +expression can be used anywhere, and even if it contains such types +it can be used inside the body of a generative +functor, described in section~\ref{s:generative-functors}. +It can also be used anywhere in the context of a local module binding +@'let' 'module' module-name '=' '(' "val" expr_1 ":" package-type ')' + "in" expr_2@. + +\paragraph{Basic example} A typical use of first-class modules is to +select at run-time among several implementations of a signature. +Each implementation is a structure that we can encapsulate as a +first-class module, then store in a data structure such as a hash +table: +\begin{caml_example*}{verbatim} + module type DEVICE = sig [@@@ellipsis] end + let devices : (string, (module DEVICE)) Hashtbl.t = Hashtbl.create 17 + + module SVG = struct [@@@ellipsis] end + let _ = Hashtbl.add devices "SVG" (module SVG : DEVICE) + + module PDF = struct [@@@ellipsis] end + let _ = Hashtbl.add devices "PDF" (module PDF: DEVICE) +\end{caml_example*} +We can then select one implementation based on command-line +arguments, for instance: +\begin{verbatim} + module Device = + (val (try Hashtbl.find devices (parse_cmdline()) + with Not_found -> eprintf "Unknown device %s\n"; exit 2) + : DEVICE) +\end{verbatim} +Alternatively, the selection can be performed within a function: +\begin{verbatim} + let draw_using_device device_name picture = + let module Device = + (val (Hashtbl.find devices device_name) : DEVICE) + in + Device.draw picture +\end{verbatim} + +\paragraph{Advanced examples} +With first-class modules, it is possible to parametrize some code over the +implementation of a module without using a functor. + +\begin{verbatim} + let sort (type s) (module Set : Set.S with type elt = s) l = + Set.elements (List.fold_right Set.add l Set.empty) + val sort : (module Set.S with type elt = 'a) -> 'a list -> 'a list +\end{verbatim} + +To use this function, one can wrap the "Set.Make" functor: + +\begin{verbatim} + let make_set (type s) cmp = + let module S = Set.Make(struct + type t = s + let compare = cmp + end) in + (module S : Set.S with type elt = s) + val make_set : ('a -> 'a -> int) -> (module Set.S with type elt = 'a) +\end{verbatim} + +\iffalse +Another advanced use of first-class module is to encode existential +types. In particular, they can be used to simulate generalized +algebraic data types (GADT). To demonstrate this, we first define a type +of witnesses for type equalities: + +\begin{verbatim} + module TypEq : sig + type ('a, 'b) t + val apply: ('a, 'b) t -> 'a -> 'b + val refl: ('a, 'a) t + val sym: ('a, 'b) t -> ('b, 'a) t + end = struct + type ('a, 'b) t = ('a -> 'b) * ('b -> 'a) + let refl = (fun x -> x), (fun x -> x) + let apply (f, _) x = f x + let sym (f, g) = (g, f) + end +\end{verbatim} + +We can then define a parametrized algebraic data type whose +constructors provide some information about the type parameter: + +\begin{verbatim} + module rec Typ : sig + module type PAIR = sig + type t and t1 and t2 + val eq: (t, t1 * t2) TypEq.t + val t1: t1 Typ.typ + val t2: t2 Typ.typ + end + + type 'a typ = + | Int of ('a, int) TypEq.t + | String of ('a, string) TypEq.t + | Pair of (module PAIR with type t = 'a) + end = Typ +\end{verbatim} + +Values of type "'a typ" are supposed to be runtime representations for +the type "'a". The constructors "Int" and "String" are easy: they +directly give a witness of type equality between the parameter "'a" +and the ground types "int" (resp. "string"). The constructor "Pair" is +more complex. One wants to give a witness of type equality between +"'a" and a type of the form "t1 * t2" together with the representations +for "t1" and "t2". However, these two types are unknown. The code above +shows how to use first-class modules to simulate existentials. + +Here is how to construct values of type "'a typ": + +\begin{verbatim} + let int = Typ.Int TypEq.refl + + let str = Typ.String TypEq.refl + + let pair (type s1) (type s2) t1 t2 = + let module P = struct + type t = s1 * s2 + type t1 = s1 + type t2 = s2 + let eq = TypEq.refl + let t1 = t1 + let t2 = t2 + end in + let pair = (module P : Typ.PAIR with type t = s1 * s2) in + Typ.Pair pair +\end{verbatim} + +And finally, here is an example of a polymorphic function that takes the +runtime representation of some type "'a" and a value of the same type, +then pretty-prints the value into a string: + +\begin{verbatim} + open Typ + let rec to_string: 'a. 'a Typ.typ -> 'a -> string = + fun (type s) t x -> + match t with + | Int eq -> string_of_int (TypEq.apply eq x) + | String eq -> Printf.sprintf "%S" (TypEq.apply eq x) + | Pair p -> + let module P = (val p : PAIR with type t = s) in + let (x1, x2) = TypEq.apply P.eq x in + Printf.sprintf "(%s,%s)" (to_string P.t1 x1) (to_string P.t2 x2) +\end{verbatim} + +Note that this function uses an explicit polymorphic annotation to obtain +polymorphic recursion. +\fi + +\section{Recovering the type of a module} \label{s:module-type-of} + +\ikwd{module\@\texttt{module}} +\ikwd{type\@\texttt{type}} +\ikwd{of\@\texttt{of}} +\ikwd{include\@\texttt{include}} + +(Introduced in OCaml 3.12) + +\begin{syntax} +module-type: + ... + | 'module' 'type' 'of' module-expr +\end{syntax} + +The construction @'module' 'type' 'of' module-expr@ expands to the module type +(signature or functor type) inferred for the module expression @module-expr@. +To make this module type reusable in many situations, it is +intentionally not strengthened: abstract types and datatypes are not +explicitly related with the types of the original module. +For the same reason, module aliases in the inferred type are expanded. + +A typical use, in conjunction with the signature-level @'include'@ +construct, is to extend the signature of an existing structure. +In that case, one wants to keep the types equal to types in the +original module. This can done using the following idiom. +\begin{verbatim} + module type MYHASH = sig + include module type of struct include Hashtbl end + val replace: ('a, 'b) t -> 'a -> 'b -> unit + end +\end{verbatim} +The signature "MYHASH" then contains all the fields of the signature +of the module "Hashtbl" (with strengthened type definitions), plus the +new field "replace". An implementation of this signature can be +obtained easily by using the @'include'@ construct again, but this +time at the structure level: +\begin{verbatim} + module MyHash : MYHASH = struct + include Hashtbl + let replace t k v = remove t k; add t k v + end +\end{verbatim} + +Another application where the absence of strengthening comes handy, is +to provide an alternative implementation for an existing module. +\begin{verbatim} + module MySet : module type of Set = struct + ... + end +\end{verbatim} +This idiom guarantees that "Myset" is compatible with Set, but allows +it to represent sets internally in a different way. + +\section{Substituting inside a signature} +\ikwd{with\@\texttt{with}} +\ikwd{module\@\texttt{module}} +\ikwd{type\@\texttt{type}} +\label{s:signature-substitution} + +(Introduced in OCaml 3.12, generalized in 4.06) + +\begin{syntax} +mod-constraint: + ... + | 'type' [type-params] typeconstr-name ':=' typexpr + | 'module' module-path ':=' extended-module-path +\end{syntax} + +A ``destructive'' substitution (@'with' ... ':=' ...@) behaves essentially like +normal signature constraints (@'with' ... '=' ...@), but it additionally removes +the redefined type or module from the signature. + +Prior to OCaml 4.06, there were a number of restrictions: one could only remove +types and modules at the outermost level (not inside submodules), and in the +case of @'with type'@ the definition had to be another type constructor with the +same type parameters. + +A natural application of destructive substitution is merging two +signatures sharing a type name. +\begin{caml_example*}{verbatim} + module type Printable = sig + type t + val print : Format.formatter -> t -> unit + end + module type Comparable = sig + type t + val compare : t -> t -> int + end + module type PrintableComparable = sig + include Printable + include Comparable with type t := t + end +\end{caml_example*} + +One can also use this to completely remove a field: +\begin{caml_example}{verbatim} +module type S = Comparable with type t := int +\end{caml_example} +or to rename one: +\begin{caml_example}{verbatim} +module type S = sig + type u + include Comparable with type t := u +end +\end{caml_example} + +Note that you can also remove manifest types, by substituting with the +same type. +\begin{caml_example}{verbatim} +module type ComparableInt = Comparable with type t = int ;; +module type CompareInt = ComparableInt with type t := int +\end{caml_example} + +\section{Type-level module aliases} +\ikwd{module\@\texttt{module}} +\label{s:module-alias} + +(Introduced in OCaml 4.02) + +\begin{syntax} +specification: + ... + | 'module' module-name '=' module-path +\end{syntax} + +The above specification, inside a signature, only matches a module +definition equal to @module-path@. Conversely, a type-level module +alias can be matched by itself, or by any supertype of the type of the +module it references. + +There are several restrictions on @module-path@: +\begin{enumerate} +\item it should be of the form \(M_0.M_1...M_n\) ({\em i.e.} without + functor applications); +\item inside the body of a functor, \(M_0\) should not be one of the + functor parameters; +\item inside a recursive module definition, \(M_0\) should not be one of + the recursively defined modules. +\end{enumerate} + +Such specifications are also inferred. Namely, when @P@ is a path +satisfying the above constraints, +\begin{caml_eval} +module P = struct end +\end{caml_eval} +\begin{caml_example*}{verbatim} +module N = P +\end{caml_example*} +has type +\caml +\:module N = P +\endcaml + +Type-level module aliases are used when checking module path +equalities. That is, in a context where module name @N@ is known to be +an alias for @P@, not only these two module paths check as equal, but +@F(N)@ and @F(P)@ are also recognized as equal. In the default +compilation mode, this is the only difference with the previous +approach of module aliases having just the same module type as the +module they reference. + +When the compiler flag @'-no-alias-deps'@ is enabled, type-level +module aliases are also exploited to avoid introducing dependencies +between compilation units. Namely, a module alias referring to a +module inside another compilation unit does not introduce a link-time +dependency on that compilation unit, as long as it is not +dereferenced; it still introduces a compile-time dependency if the +interface needs to be read, {\em i.e.} if the module is a submodule +of the compilation unit, or if some type components are referred to. +Additionally, accessing a module alias introduces a link-time +dependency on the compilation unit containing the module referenced by +the alias, rather than the compilation unit containing the alias. +Note that these differences in link-time behavior may be incompatible +with the previous behavior, as some compilation units might not be +extracted from libraries, and their side-effects ignored. + +These weakened dependencies make possible to use module aliases in +place of the @'-pack'@ mechanism. Suppose that you have a library +@'Mylib'@ composed of modules @'A'@ and @'B'@. Using @'-pack'@, one +would issue the command line +\begin{verbatim} + ocamlc -pack a.cmo b.cmo -o mylib.cmo +\end{verbatim} +and as a result obtain a @'Mylib'@ compilation unit, containing +physically @'A'@ and @'B'@ as submodules, and with no dependencies on +their respective compilation units. +Here is a concrete example of a possible alternative approach: +\begin{enumerate} +\item Rename the files containing @'A'@ and @'B'@ to @'Mylib__A'@ and + @'Mylib__B'@. +\item Create a packing interface @'Mylib.ml'@, containing the + following lines. +\begin{verbatim} + module A = Mylib__A + module B = Mylib__B +\end{verbatim} +\item Compile @'Mylib.ml'@ using @'-no-alias-deps'@, and the other + files using @'-no-alias-deps'@ and @'-open' 'Mylib'@ (the last one is + equivalent to adding the line @'open!' 'Mylib'@ at the top of each + file). +\begin{verbatim} + ocamlc -c -no-alias-deps Mylib.ml + ocamlc -c -no-alias-deps -open Mylib Mylib__*.mli Mylib__*.ml +\end{verbatim} +\item Finally, create a library containing all the compilation units, + and export all the compiled interfaces. +\begin{verbatim} + ocamlc -a Mylib*.cmo -o Mylib.cma +\end{verbatim} +\end{enumerate} +This approach lets you access @'A'@ and @'B'@ directly inside the +library, and as @'Mylib.A'@ and @'Mylib.B'@ from outside. +It also has the advantage that @'Mylib'@ is no longer monolithic: if +you use @'Mylib.A'@, only @'Mylib__A'@ will be linked in, not +@'Mylib__B'@. +%Note that in the above @'Mylib.cmo'@ is actually empty, and one could +%name the interface @'Mylib.mli'@, but this would require that all +%clients are compiled with the @'-no-alias-deps'@ flag. + +Note the use of double underscores in @'Mylib__A'@ and +@'Mylib__B'@. These were chosen on purpose; the compiler uses the +following heuristic when printing paths: given a path @'Lib__fooBar'@, +if @'Lib.FooBar'@ exists and is an alias for @'Lib__fooBar'@, then the +compiler will always display @'Lib.FooBar'@ instead of +@'Lib__fooBar'@. This way the long @'Mylib__'@ names stay hidden and +all the user sees is the nicer dot names. This is how the OCaml +standard library is compiled. + +\section{Overriding in open statements}\label{s:explicit-overriding-open} +\ikwd{open.\@\texttt{open\char33}} + +(Introduced in OCaml 4.01) + +\begin{syntax} +definition: + ... + | 'open!' module-path +; +specification: + ... + | 'open!' module-path +; +expr: + ... + | 'let' 'open!' module-path 'in' expr +; +class-body-type: + ... + | 'let' 'open!' module-path 'in' class-body-type +; +class-expr: + ... + | 'let' 'open!' module-path 'in' class-expr +; +\end{syntax} + +Since OCaml 4.01, @"open"@ statements shadowing an existing identifier +(which is later used) trigger the warning 44. Adding a @"!"@ +character after the @"open"@ keyword indicates that such a shadowing is +intentional and should not trigger the warning. + +This is also available (since OCaml 4.06) for local opens in class +expressions and class type expressions. + +\section{Generalized algebraic datatypes} \ikwd{type\@\texttt{type}} +\ikwd{match\@\texttt{match}} \label{s:gadts} + +(Introduced in OCaml 4.00) + +\begin{syntax} +constr-decl: + ... + | constr-name ':' [ constr-args '->' ] typexpr +; +type-param: + ... + | [variance] '_' +\end{syntax} + +Generalized algebraic datatypes, or GADTs, extend usual sum types in +two ways: constraints on type parameters may change depending on the +value constructor, and some type variables may be existentially +quantified. +Adding constraints is done by giving an explicit return type +(the rightmost @typexpr@ in the above syntax), where type parameters +are instantiated. +This return type must use the same type constructor as the type being +defined, and have the same number of parameters. +Variables are made existential when they appear inside a constructor's +argument, but not in its return type. + +Since the use of a return type often eliminates the need to name type +parameters in the left-hand side of a type definition, one can replace +them with anonymous types @"_"@ in that case. + +The constraints associated to each constructor can be recovered +through pattern-matching. +Namely, if the type of the scrutinee of a pattern-matching contains +a locally abstract type, this type can be refined according to the +constructor used. +These extra constraints are only valid inside the corresponding branch +of the pattern-matching. +If a constructor has some existential variables, fresh locally +abstract types are generated, and they must not escape the +scope of this branch. + +\paragraph{Recursive functions} + +Here is a concrete example: +\begin{verbatim} + type _ term = + | Int : int -> int term + | Add : (int -> int -> int) term + | App : ('b -> 'a) term * 'b term -> 'a term + + let rec eval : type a. a term -> a = function + | Int n -> n (* a = int *) + | Add -> (fun x y -> x+y) (* a = int -> int -> int *) + | App(f,x) -> (eval f) (eval x) + (* eval called at types (b->a) and b for fresh b *) + + let two = eval (App (App (Add, Int 1), Int 1)) + val two : int = 2 +\end{verbatim} +It is important to remark that the function "eval" is using the +polymorphic syntax for locally abstract types. When defining a recursive +function that manipulates a GADT, explicit polymorphic recursion should +generally be used. For instance, the following definition fails with a +type error: +\begin{verbatim} + let rec eval (type a) : a term -> a = function + | Int n -> n + | Add -> (fun x y -> x+y) + | App(f,x) -> (eval f) (eval x) +(* ^ + Error: This expression has type ($App_'b -> a) term but an expression was + expected of type 'a + The type constructor $App_'b would escape its scope +*) +\end{verbatim} +In absence of an explicit polymorphic annotation, a monomorphic type +is inferred for the recursive function. If a recursive call occurs +inside the function definition at a type that involves an existential +GADT type variable, this variable flows to the type of the recursive +function, and thus escapes its scope. In the above example, this happens +in the branch "App(f,x)" when "eval" is called with "f" as an argument. +In this branch, the type of "f" is "($App_ 'b-> a)". The prefix "$" in +"$App_ 'b" denotes an existential type named by the compiler +(see~\ref{p:existential-names}). Since the type of "eval" is +"'a term -> 'a", the call "eval f" makes the existential type "$App_'b" +flow to the type variable "'a" and escape its scope. This triggers the +above error. + +\paragraph{Type inference} + +Type inference for GADTs is notoriously hard. +This is due to the fact some types may become ambiguous when escaping +from a branch. +For instance, in the "Int" case above, "n" could have either type "int" +or "a", and they are not equivalent outside of that branch. +As a first approximation, type inference will always work if a +pattern-matching is annotated with types containing no free type +variables (both on the scrutinee and the return type). +This is the case in the above example, thanks to the type annotation +containing only locally abstract types. + +In practice, type inference is a bit more clever than that: type +annotations do not need to be immediately on the pattern-matching, and +the types do not have to be always closed. +As a result, it is usually enough to only annotate functions, as in +the example above. Type annotations are +propagated in two ways: for the scrutinee, they follow the flow of +type inference, in a way similar to polymorphic methods; for the +return type, they follow the structure of the program, they are split +on functions, propagated to all branches of a pattern matching, +and go through tuples, records, and sum types. +Moreover, the notion of ambiguity used is stronger: a type is only +seen as ambiguous if it was mixed with incompatible types (equated by +constraints), without type annotations between them. +For instance, the following program types correctly. +\begin{verbatim} + let rec sum : type a. a term -> _ = fun x -> + let y = + match x with + | Int n -> n + | Add -> 0 + | App(f,x) -> sum f + sum x + in y + 1 + val sum : 'a term -> int = +\end{verbatim} +Here the return type "int" is never mixed with "a", so it is seen as +non-ambiguous, and can be inferred. +When using such partial type annotations we strongly suggest +specifying the "-principal" mode, to check that inference is +principal. + +The exhaustiveness check is aware of GADT constraints, and can +automatically infer that some cases cannot happen. +For instance, the following pattern matching is correctly seen as +exhaustive (the "Add" case cannot happen). +\begin{verbatim} + let get_int : int term -> int = function + | Int n -> n + | App(_,_) -> 0 +\end{verbatim} + + +\paragraph{Refutation cases} (Introduced in OCaml 4.03) + +Usually, the exhaustiveness check only tries to check whether the +cases omitted from the pattern matching are typable or not. +However, you can force it to try harder by adding {\em refutation cases}: +\begin{syntax} +matching-case: + pattern ['when' expr] '->' expr + | pattern '->' '.' +\end{syntax} +In presence of a refutation case, the exhaustiveness check will first +compute the intersection of the pattern with the complement of the +cases preceding it. It then checks whether the resulting patterns can +really match any concrete values by trying to type-check them. +Wild cards in the generated patterns are handled in a special way: if +their type is a variant type with only GADT constructors, then the +pattern is split into the different constructors, in order to check whether +any of them is possible (this splitting is not done for arguments of these +constructors, to avoid non-termination). We also split tuples and +variant types with only one case, since they may contain GADTs inside. +For instance, the following code is deemed exhaustive: + +\begin{verbatim} + type _ t = + | Int : int t + | Bool : bool t + + let deep : (char t * int) option -> char = function + | None -> 'c' + | _ -> . +\end{verbatim} + +Namely, the inferred remaining case is "Some _", which is split into +"Some (Int, _)" and "Some (Bool, _)", which are both untypable because +"deep" expects a non-existing "char t" as the first element of the tuple. +Note that the refutation case could be omitted here, because it is +automatically added when there is only one case in the pattern +matching. + +Another addition is that the redundancy check is now aware of GADTs: a +case will be detected as redundant if it could be replaced by a +refutation case using the same pattern. + +\paragraph{Advanced examples} +The "term" type we have defined above is an {\em indexed} type, where +a type parameter reflects a property of the value contents. +Another use of GADTs is {\em singleton} types, where a GADT value +represents exactly one type. This value can be used as runtime +representation for this type, and a function receiving it can have a +polytypic behavior. + +Here is an example of a polymorphic function that takes the +runtime representation of some type "t" and a value of the same type, +then pretty-prints the value as a string: +\begin{verbatim} + type _ typ = + | Int : int typ + | String : string typ + | Pair : 'a typ * 'b typ -> ('a * 'b) typ + + let rec to_string: type t. t typ -> t -> string = + fun t x -> + match t with + | Int -> string_of_int x + | String -> Printf.sprintf "%S" x + | Pair(t1,t2) -> + let (x1, x2) = x in + Printf.sprintf "(%s,%s)" (to_string t1 x1) (to_string t2 x2) +\end{verbatim} + +Another frequent application of GADTs is equality witnesses. +\begin{verbatim} + type (_,_) eq = Eq : ('a,'a) eq + + let cast : type a b. (a,b) eq -> a -> b = fun Eq x -> x +\end{verbatim} +Here type "eq" has only one constructor, and by matching on it one +adds a local constraint allowing the conversion between "a" and "b". +By building such equality witnesses, one can make equal types which +are syntactically different. + +Here is an example using both singleton types and equality witnesses +to implement dynamic types. +\begin{verbatim} + let rec eq_type : type a b. a typ -> b typ -> (a,b) eq option = + fun a b -> + match a, b with + | Int, Int -> Some Eq + | String, String -> Some Eq + | Pair(a1,a2), Pair(b1,b2) -> + begin match eq_type a1 b1, eq_type a2 b2 with + | Some Eq, Some Eq -> Some Eq + | _ -> None + end + | _ -> None + + type dyn = Dyn : 'a typ * 'a -> dyn + + let get_dyn : type a. a typ -> dyn -> a option = + fun a (Dyn(b,x)) -> + match eq_type a b with + | None -> None + | Some Eq -> Some x +\end{verbatim} + +\paragraph{Existential type names in error messages}% +\label{p:existential-names} +(Updated in OCaml 4.03.0) + +The typing of pattern matching in presence of GADT can generate many +existential types. When necessary, error messages refer to these +existential types using compiler-generated names. Currently, the +compiler generates these names according to the following nomenclature: +\begin{itemize} +\item First, types whose name starts with a "$" are existentials. +\item "$Constr_'a" denotes an existential type introduced for the type +variable "'a" of the GADT constructor "Constr": +\begin{caml_example}{verbatim}[error] +type any = Any : 'name -> any +let escape (Any x) = x +\end{caml_example} +\item "$Constr" denotes an existential type introduced for an anonymous %$ +type variable in the GADT constructor "Constr": +\begin{caml_example}{verbatim}[error] +type any = Any : _ -> any +let escape (Any x) = x +\end{caml_example} +\item "$'a" if the existential variable was unified with the type %$ +variable "'a" during typing: +\begin{caml_example}{verbatim}[error] +type ('arg,'result,'aux) fn = + | Fun: ('a ->'b) -> ('a,'b,unit) fn + | Mem1: ('a ->'b) * 'a * 'b -> ('a, 'b, 'a * 'b) fn + let apply: ('arg,'result, _ ) fn -> 'arg -> 'result = fun f x -> + match f with + | Fun f -> f x + | Mem1 (f,y,fy) -> if x = y then fy else f x +\end{caml_example} +\item "$n" (n a number) is an internally generated existential %$ +which could not be named using one of the previous schemes. +\end{itemize} + +As shown by the last item, the current behavior is imperfect +and may be improved in future versions. + +\paragraph{Equations on non-local abstract types} (Introduced in OCaml +4.04) + +GADT pattern-matching may also add type equations to non-local +abstract types. The behaviour is the same as with local abstract +types. Reusing the above "eq" type, one can write: +\begin{verbatim} + module M : sig type t val x : t val e : (t,int) eq end = struct + type t = int + let x = 33 + let e = Eq + end + + let x : int = let Eq = M.e in M.x +\end{verbatim} + +Of course, not all abstract types can be refined, as this would +contradict the exhaustiveness check. Namely, builtin types (those +defined by the compiler itself, such as "int" or "array"), and +abstract types defined by the local module, are non-instantiable, and +as such cause a type error rather than introduce an equation. + +\section{Syntax for Bigarray access}\label{s:bigarray-access} + +(Introduced in Objective Caml 3.00) + +\begin{syntax} +expr: + ... + | expr '.{' expr { ',' expr } '}' + | expr '.{' expr { ',' expr } '}' '<-' expr +\end{syntax} + +This extension provides syntactic sugar for getting and setting +elements in the arrays provided by the +"Bigarray"[\moduleref{Bigarray}] library. + +The short expressions are translated into calls to functions of the +"Bigarray" module as described in the following table. + +\begin{tableau}{|l|l|}{expression}{translation} +\entree{@expr_0'.{'expr_1'}'@} + {"Bigarray.Array1.get "@expr_0 expr_1@} +\entree{@expr_0'.{'expr_1'}' '<-'expr@} + {"Bigarray.Array1.set "@expr_0 expr_1 expr@} +\entree{@expr_0'.{'expr_1',' expr_2'}'@} + {"Bigarray.Array2.get "@expr_0 expr_1 expr_2@} +\entree{@expr_0'.{'expr_1',' expr_2'}' '<-'expr@} + {"Bigarray.Array2.set "@expr_0 expr_1 expr_2 expr@} +\entree{@expr_0'.{'expr_1',' expr_2',' expr_3'}'@} + {"Bigarray.Array3.get "@expr_0 expr_1 expr_2 expr_3@} +\entree{@expr_0'.{'expr_1',' expr_2',' expr_3'}' '<-'expr@} + {"Bigarray.Array3.set "@expr_0 expr_1 expr_2 expr_3 expr@} +\entree{@expr_0'.{'expr_1',' \ldots',' expr_n'}'@} + {"Bigarray.Genarray.get "@ expr_0 '[|' expr_1',' \ldots ',' + expr_n '|]'@} +\entree{@expr_0'.{'expr_1',' \ldots',' expr_n'}' '<-'expr@} + {"Bigarray.Genarray.set "@ expr_0 '[|' expr_1',' \ldots ',' + expr_n '|]' expr@} +\end{tableau} + +The last two entries are valid for any $n > 3$. + +\section{Attributes}\label{s:attributes} + +\ikwd{when\@\texttt{when}} + +(Introduced in OCaml 4.02, +infix notations for constructs other than expressions added in 4.03) + +Attributes are ``decorations'' of the syntax tree which are mostly +ignored by the type-checker but can be used by external tools. An +attribute is made of an identifier and a payload, which can be a +structure, a type expression (prefixed with ":"), a signature +(prefixed with ":") or a pattern (prefixed with "?") optionally +followed by a "when" clause: + + +\begin{syntax} +attr-id: + lowercase-ident + | capitalized-ident + | attr-id '.' attr-id +; +attr-payload: + [ module-items ] + | ':' typexpr + | ':' [ specification ] + | '?' pattern ['when' expr] +; +\end{syntax} + +The first form of attributes is attached with a postfix notation on +``algebraic'' categories: + +\begin{syntax} +attribute: + '[@' attr-id attr-payload ']' +; +expr: ... + | expr attribute +; +typexpr: ... + | typexpr attribute +; +pattern: ... + | pattern attribute +; +module-expr: ... + | module-expr attribute +; +module-type: ... + | module-type attribute +; +class-expr: ... + | class-expr attribute +; +class-type: ... + | class-type attribute +; +\end{syntax} + +This form of attributes can also be inserted after the @'`'tag-name@ +in polymorphic variant type expressions (@tag-spec-first@, @tag-spec@, +@tag-spec-full@) or after the @method-name@ in @method-type@. + +The same syntactic form is also used to attach attributes to labels and +constructors in type declarations: + +\begin{syntax} +field-decl: + ['mutable'] field-name ':' poly-typexpr {attribute} +; +constr-decl: + (constr-name || '()') [ 'of' constr-args ] {attribute} +; +\end{syntax} + +Note: when a label declaration is followed by a semi-colon, attributes +can also be put after the semi-colon (in which case they are merged to +those specified before). + + +The second form of attributes are attached to ``blocks'' such as type +declarations, class fields, etc: + +\begin{syntax} +item-attribute: + '[@@' attr-id attr-payload ']' +; +typedef: ... + | typedef item-attribute +; +exception-definition: + 'exception' constr-decl + | 'exception' constr-name '=' constr +; +module-items: + [';;'] ( definition || expr { item-attribute } ) { [';;'] definition || ';;' expr { item-attribute } } [';;'] +; +class-binding: ... + | class-binding item-attribute +; +class-spec: ... + | class-spec item-attribute +; +classtype-def: ... + | classtype-def item-attribute +; +definition: + 'let' ['rec'] let-binding { 'and' let-binding } + | 'external' value-name ':' typexpr '=' external-declaration { item-attribute } + | type-definition + | exception-definition { item-attribute } + | class-definition + | classtype-definition + | 'module' module-name { '(' module-name ':' module-type ')' } + [ ':' module-type ] \\ '=' module-expr { item-attribute } + | 'module' 'type' modtype-name '=' module-type { item-attribute } + | 'open' module-path { item-attribute } + | 'include' module-expr { item-attribute } + | 'module' 'rec' module-name ':' module-type '=' \\ + module-expr { item-attribute } \\ + { 'and' module-name ':' module-type '=' module-expr \\ + { item-attribute } } +; +specification: + 'val' value-name ':' typexpr { item-attribute } + | 'external' value-name ':' typexpr '=' external-declaration { item-attribute } + | type-definition + | 'exception' constr-decl { item-attribute } + | class-specification + | classtype-definition + | 'module' module-name ':' module-type { item-attribute } + | 'module' module-name { '(' module-name ':' module-type ')' } + ':' module-type { item-attribute } + | 'module' 'type' modtype-name { item-attribute } + | 'module' 'type' modtype-name '=' module-type { item-attribute } + | 'open' module-path { item-attribute } + | 'include' module-type { item-attribute } +; +class-field-spec: ... + | class-field-spec item-attribute +; +class-field: ... + | class-field item-attribute +; +\end{syntax} + +A third form of attributes appears as stand-alone structure or +signature items in the module or class sub-languages. They are not +attached to any specific node in the syntax tree: + +\begin{syntax} +floating-attribute: + '[@@@' attr-id attr-payload ']' +; +definition: ... + | floating-attribute +; +specification: ... + | floating-attribute +; +class-field-spec: ... + | floating-attribute +; +class-field: ... + | floating-attribute +; +\end{syntax} + +(Note: contrary to what the grammar above describes, @item-attributes@ +cannot be attached to these floating attributes in @class-field-spec@ +and @class-field@.) + + +It is also possible to specify attributes using an infix syntax. For instance: + +\begin{verbatim} +let[@foo] x = 2 in x + 1 === (let x = 2 [@@foo] in x + 1) +begin[@foo][@bar x] ... end === (begin ... end)[@foo][@@bar x] +module[@foo] M = ... === module M = ... [@@foo] +type[@foo] t = T === type t = T [@@foo] +method[@foo] m = ... === method m = ... [@@foo] +\end{verbatim} + +For "let", the attributes are applied to each bindings: + +\begin{verbatim} +let[@foo] x = 2 and y = 3 in x + y === (let x = 2 [@@foo] and y = 3 in x + y) +let[@foo] x = 2 +and[@bar] y = 3 in x + y === (let x = 2 [@@foo] and y = 3 [@bar] in x + y) +\end{verbatim} + + +\subsection{Built-in attributes} +\label{ss:builtin-attributes} + +Some attributes are understood by the type-checker: +\begin{itemize} +\item + ``ocaml.warning'' or ``warning'', with a string literal payload. + This can be used as floating attributes in a + signature/structure/object/object type. The string is parsed and has + the same effect as the "-w" command-line option, in the scope between + the attribute and the end of the current + signature/structure/object/object type. The attribute can also be + attached to any kind of syntactic item which support attributes + (such as an expression, or a type expression) + in which case its scope is limited to that item. + Note that it is not well-defined which scope is used for a specific + warning. This is implementation dependant and can change between versions. + Some warnings are even completely outside the control of ``ocaml.warning'' + (for instance, warnings 1, 2, 14, 29 and 50). + +\item + ``ocaml.warnerror'' or ``warnerror'', with a string literal payload. + Same as ``ocaml.warning'', for the "-warn-error" command-line option. +\item + ``ocaml.deprecated'' or ``deprecated''. + Can be applied to most kind of items in signatures or + structures. When the element is later referenced, a warning (3) is + triggered. If the payload of the attribute is a string literal, + the warning message includes this text. It is also possible + to use this ``ocaml.deprecated'' as a floating attribute + on top of an ``.mli'' file (i.e. before any other non-attribute + item) or on top of an ``.ml'' file without a corresponding + interface; this marks the unit itself as being deprecated. +\item + ``ocaml.deprecated_mutable'' or ``deprecated_mutable''. + Can be applied to a mutable record label. If the label is later + used to modify the field (with ``expr.l <- expr''), a warning (3) + will be triggered. If the payload of the attribute is a string literal, + the warning message includes this text. +\item + ``ocaml.ppwarning'' or ``ppwarning'', in any context, with + a string literal payload. The text is reported as warning (22) + by the compiler (currently, the warning location is the location + of the string payload). This is mostly useful for preprocessors which + need to communicate warnings to the user. This could also be used + to mark explicitly some code location for further inspection. +\item + ``ocaml.warn_on_literal_pattern'' or ``warn_on_literal_pattern'' annotate + constructors in type definition. A warning (52) is then emitted when this + constructor is pattern matched with a constant literal as argument. This + attribute denotes constructors whose argument is purely informative and + may change in the future. Therefore, pattern matching on this argument + with a constant literal is unreliable. For instance, all built-in exception + constructors are marked as ``warn_on_literal_pattern''. + Note that, due to an implementation limitation, this warning (52) is only + triggered for single argument constructor. +\item + ``ocaml.tailcall'' or ``tailcall'' can be applied to function + application in order to check that the call is tailcall optimized. + If it it not the case, a warning (51) is emitted. +\item + ``ocaml.inline'' or ``inline'' take either ``never'', ``always'' + or nothing as payload on a function or functor definition. If no payload + is provided, the default value is ``always''. This payload controls when + applications of the annotated functions should be inlined. +\item + ``ocaml.inlined'' or ``inlined'' can be applied to any function or functor + application to check that the call is inlined by the compiler. If the call + is not inlined, a warning (55) is emitted. +\item + ``ocaml.noalloc'', ``ocaml.unboxed''and ``ocaml.untagged'' or + ``noalloc'', ``unboxed'' and ``untagged'' can be used on external + definitions to obtain finer control over the C-to-OCaml interface. See + \ref{s:C-cheaper-call} for more details. +\item + ``ocaml.immediate'' or ``immediate'' applied on an abstract type mark the type as + having a non-pointer implementation (e.g. ``int'', ``bool'', ``char'' or + enumerated types). Mutation of these immediate types does not activate the + garbage collector's write barrier, which can significantly boost performance in + programs relying heavily on mutable state. +\item + "ocaml.unboxed" or "unboxed" can be used on a type definition if the + type is a single-field record or a concrete type with a single + constructor that has a single argument. It tells the compiler to + optimize the representation of the type by removing the block that + represents the record or the constructor (i.e. a value of this type + is physically equal to its argument). In the case of GADTs, an + additional restriction applies: the argument must not be an + existential variable, represented by an existential type variable, + or an abstract type constructor applied to an existential type + variable. +\item + "ocaml.boxed" or "boxed" can be used on type definitions to mean + the opposite of "ocaml.unboxed": keep the unoptimized + representation of the type. When there is no annotation, the + default is currently "boxed" but it may change in the future. +\end{itemize} + +\begin{verbatim} +module X = struct + [@@@warning "+9"] (* locally enable warning 9 in this structure *) + ... +end + [@@deprecated "Please use module 'Y' instead."] + +let x = begin[@warning "+9"] ... end in .... + +type t = A | B + [@@deprecated "Please use type 's' instead."] + +let f x = + assert (x >= 0) [@ppwarning "TODO: remove this later"]; + +let rec no_op = function + | [] -> () + | _ :: q -> (no_op[@tailcall]) q;; + +let f x = x [@@inline] + +let () = (f[@inlined]) () + +type fragile = + | Int of int [@warn_on_literal_pattern] + | String of string [@warn_on_literal_pattern] + +let f = function +| Int 0 | String "constant" -> () (* trigger warning 52 *) +| _ -> () + +module Immediate: sig + type t [@@immediate] + val x: t ref +end = struct + type t = A | B + let x = ref 0 +end + .... + +\end{verbatim} + + +\section{Extension nodes}\label{s:extension-nodes} + +(Introduced in OCaml 4.02, +infix notations for constructs other than expressions added in 4.03, +infix notation (e1 ;\%ext e2) added in 4.04. +) + +Extension nodes are generic placeholders in the syntax tree. They are +rejected by the type-checker and are intended to be ``expanded'' by external +tools such as "-ppx" rewriters. + +Extension nodes share the same notion of identifier and payload as +attributes~\ref{s:attributes}. + +The first form of extension node is used for ``algebraic'' categories: + +\begin{syntax} +extension: + '[%' attr-id attr-payload ']' +; +expr: ... + | extension +; +typexpr: ... + | extension +; +pattern: ... + | extension +; +module-expr: ... + | extension +; +module-type: ... + | extension +; +class-expr: ... + | extension +; +class-type: ... + | extension +; +\end{syntax} + +A second form of extension node can be used in structures and +signatures, both in the module and object languages: + +\begin{syntax} +item-extension: + '[%%' attr-id attr-payload ']' +; +definition: ... + | item-extension +; +specification: ... + | item-extension +; +class-field-spec: ... + | item-extension +; +class-field: ... + | item-extension +; +\end{syntax} + +An infix form is available for extension nodes when +the payload is of the same kind +(expression with expression, pattern with pattern ...). + +Examples: + +\begin{verbatim} +let%foo x = 2 in x + 1 === [%foo let x = 2 in x + 1] +begin%foo ... end === [%foo begin ... end] +x ;%foo 2 === [%foo x; 2] +module%foo M = .. === [%%foo module M = ... ] +val%foo x : t === [%%foo: val x : t] +\end{verbatim} + +When this form is used together with the infix syntax for attributes, +the attributes are considered to apply to the payload: + +\begin{verbatim} +fun%foo[@bar] x -> x + 1 === [%foo (fun x -> x + 1)[@bar ] ]; +\end{verbatim} + +\subsection{Built-in extension nodes} + +(Introduced in OCaml 4.03) + +Some extension nodes are understood by the compiler itself: +\begin{itemize} + \item + ``ocaml.extension_constructor'' or ``extension_constructor'' + take as payload a constructor from an extensible variant type + (see \ref{s:extensible-variants}) and return its extension + constructor slot. +\end{itemize} + +\begin{caml_example*}{verbatim} +type t = .. +type t += X of int | Y of string +let x = [%extension_constructor X] +let y = [%extension_constructor Y] +\end{caml_example*} +\begin{caml_example}{toplevel} + x <> y;; +\end{caml_example} + +\section{Quoted strings}\label{s:quoted-strings} + +(Introduced in OCaml 4.02) + +Quoted strings "{foo|...|foo}" provide a different lexical syntax to +write string literals in OCaml code. They are useful to represent +strings of arbitrary content without escaping -- as long as the +delimiter you chose (here "|foo}") does not occur in the string +itself. + +\begin{syntax} +string-literal: ... + | '{' quoted-string-id '|' ........ '|' quoted-string-id '}' +; +quoted-string-id: + { 'a'...'z' || '_' } +; +\end{syntax} + +The opening delimiter has the form "{id|" where "id" is a (possibly +empty) sequence of lowercase letters and underscores. The +corresponding closing delimiter is "|id}" (with the same +identifier). Unlike regular OCaml string literals, quoted +strings do not interpret any character in a special way. + +Example: + +\begin{verbatim} +String.length {|\"|} (* returns 2 *) +String.length {foo|\"|foo} (* returns 2 *) +\end{verbatim} + +Quoted strings are interesting in particular in conjunction to +extension nodes "[%foo ...]" (see \ref{s:extension-nodes}) to embed +foreign syntax fragments to be interpreted by a preprocessor and +turned into OCaml code: you can use "[%sql {|...|}]" for example to +represent arbitrary SQL statements -- assuming you have a ppx-rewriter +that recognizes the "%sql" extension -- without requiring escaping +quotes. + +Note that the non-extension form, for example "{sql|...|sql}", should +not be used for this purpose, as the user cannot see in the code that +this string literal has a different semantics than they expect, and +giving a semantics to a specific delimiter limits the freedom to +change the delimiter to avoid escaping issues. + +\section{Exception cases in pattern matching}\label{s:exception-match} + +(Introduced in OCaml 4.02) + +A new form of exception patterns is allowed, only as a toplevel +pattern under a "match"..."with" pattern-matching (other occurrences +are rejected by the type-checker). + +\begin{syntax} +pattern: ... + | 'exception' pattern +; +\end{syntax} + +Cases with such a toplevel pattern are called ``exception cases'', +as opposed to regular ``value cases''. Exception cases are applied +when the evaluation of the matched expression raises an exception. +The exception value is then matched against all the exception cases +and re-raised if none of them accept the exception (as for a +"try"..."with" block). Since the bodies of all exception and value +cases is outside the scope of the exception handler, they are all +considered to be in tail-position: if the "match"..."with" block +itself is in tail position in the current function, any function call +in tail position in one of the case bodies results in an actual tail +call. + +It is an error if all cases are exception cases in a given pattern +matching. + +\section{Extensible variant types}\label{s:extensible-variants} + +(Introduced in OCaml 4.02) + +\begin{syntax} +type-representation: + ... + | '=' '..' +; +specification: + ... + | 'type' [type-params] typeconstr type-extension-spec +; +definition: + ... + | 'type' [type-params] typeconstr type-extension-def +; +type-extension-spec: '+=' ['private'] ['|'] constr-decl { '|' constr-decl } +; +type-extension-def: '+=' ['private'] ['|'] constr-def { '|' constr-def } +; +constr-def: + constr-decl + | constr-name '=' constr +; +\end{syntax} + +Extensible variant types are variant types which can be extended with +new variant constructors. Extensible variant types are defined using +"..". New variant constructors are added using "+=". +\begin{verbatim} + type attr = .. + + type attr += Str of string + + type attr += + | Int of int + | Float of float +\end{verbatim} + +Pattern matching on an extensible variant type requires a default case +to handle unknown variant constructors: +\begin{verbatim} + let to_string = function + | Str s -> s + | Int i -> string_of_int i + | Float f -> string_of_float f + | _ -> "?" +\end{verbatim} + +A preexisting example of an extensible variant type is the built-in +"exn" type used for exceptions. Indeed, exception constructors can be +declared using the type extension syntax: +\begin{verbatim} + type exn += Exc of int +\end{verbatim} + +Extensible variant constructors can be rebound to a different name. This +allows exporting variants from another module. +\begin{verbatim} + type Expr.attr += Str = Expr.Str +\end{verbatim} + +Extensible variant constructors can be declared "private". As with +regular variants, this prevents them from being constructed directly by +constructor application while still allowing them to be de-structured in +pattern-matching. +\begin{verbatim} + module Bool : sig + type attr += private Bool of int + val bool : bool -> attr + end = struct + type attr += Bool of int + let bool p = if p then Bool 1 else Bool 0 + end +\end{verbatim} + +\subsection{Private extensible variant types} + +(Introduced in OCaml 4.06) + +\begin{syntax} +type-representation: + ... + | '=' 'private' '..' +; +\end{syntax} + +Extensible variant types can be declared "private". This prevents new +constructors from being declared directly, but allows extension +constructors to be referred to in interfaces. +\begin{verbatim} + module Msg : sig + type t = private .. + module MkConstr (X : sig type t end) : sig + type t += C of X.t + end + end = struct + type t = .. + module MkConstr (X : sig type t end) = struct + type t += C of X.t + end + end +\end{verbatim} + +\section{Generative functors}\label{s:generative-functors} + +(Introduced in OCaml 4.02) + +\begin{syntax} +module-expr: + ... + | 'functor' '()' '->' module-expr + | module-expr '()' +; +definition: + ... + | 'module' module-name { '(' module-name ':' module-type ')' || '()' } + [ ':' module-type ] \\ '=' module-expr +; +module-type: + ... + | 'functor' '()' '->' module-type +; +specification: + ... + | 'module' module-name { '(' module-name ':' module-type ')' || '()' } + ':' module-type +; +\end{syntax} + +A generative functor takes a unit "()" argument. +In order to use it, one must necessarily apply it to this unit argument, +ensuring that all type components in the result of the functor behave +in a generative way, {\em i.e.} they are different from types obtained +by other applications of the same functor. +This is equivalent to taking an argument of signature "sig end", and always +applying to "struct end", but not to some defined module (in the +latter case, applying twice to the same module would return identical +types). + +As a side-effect of this generativity, one is allowed to unpack +first-class modules in the body of generative functors. + +\section{Extension-only syntax} +(Introduced in OCaml 4.02.2, extended in 4.03) + +Some syntactic constructions are accepted during parsing and rejected +during type checking. These syntactic constructions can therefore not +be used directly in vanilla OCaml. However, "-ppx" rewriters and other +external tools can exploit this parser leniency to extend the language +with these new syntactic constructions by rewriting them to +vanilla constructions. +\subsection{Extension operators} \label{s:ext-ops} +(Introduced in OCaml 4.02.2) +\begin{syntax} +infix-symbol: + ... + | "#" {operator-chars} "#" {operator-char '|' "#"} +; +\end{syntax} + +Operator names starting with a "#" character and containing more than +one "#" character are reserved for extensions. + +\subsection{Extension literals} \label{s:extension-literals} +(Introduced in OCaml 4.03) +\begin{syntax} +float-literal: + ... + | ["-"] ("0"\ldots"9") { "0"\ldots"9"||"_" } ["." { "0"\ldots"9"||"_" }] + [("e"||"E") ["+"||"-"] ("0"\ldots"9") { "0"\ldots"9"||"_" }] + ["g"\ldots"z"||"G"\ldots"Z"] + | ["-"] ("0x"||"0X") + ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + { "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f"||"_" }\\ + ["." { "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f"||"_" }] + [("p"||"P") ["+"||"-"] ("0"\ldots"9") { "0"\ldots"9"||"_" }] + ["g"\ldots"z"||"G"\ldots"Z"] +; +int-literal: + ... + | ["-"] ("0"\ldots"9") { "0"\ldots"9" || "_" }["g"\ldots"z"||"G"\ldots"Z"] + | ["-"] ("0x"||"0X") ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + { "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f"||"_" } + ["g"\ldots"z"||"G"\ldots"Z"] + | ["-"] ("0o"||"0O") ("0"\ldots"7") { "0"\ldots"7"||"_" } + ["g"\ldots"z"||"G"\ldots"Z"] + | ["-"] ("0b"||"0B") ("0"\ldots"1") { "0"\ldots"1"||"_" } + ["g"\ldots"z"||"G"\ldots"Z"] +; +\end{syntax} +Int and float literals followed by an one-letter identifier in the +range @["g".."z"||"G".."Z"]@ are extension-only literals. + +\section{Inline records} \label{s:inline-records} +(Introduced in OCaml 4.03) +\begin{syntax} + constr-args: + ... + | record-decl +; +\end{syntax} + +The arguments of a sum-type constructors can now be defined using the +same syntax as records. Mutable and polymorphic fields are allowed. +GADT syntax is supported. Attributes can be specified on individual +fields. + +Syntactically, building or matching constructors with such an inline +record argument is similar to working with a unary constructor whose +unique argument is a declared record type. A pattern can bind +the inline record as a pseudo-value, but the record cannot escape the +scope of the binding and can only be used with the dot-notation to +extract or modify fields or to build new constructor values. + +\begin{verbatim} +type t = + | Point of {width: int; mutable x: float; mutable y: float} + | ... + +let v = Point {width = 10; x = 0.; y = 0.} + +let scale l = function + | Point p -> Point {p with x = l *. p.x; y = l *. p.y} + | .... + +let print = function + | Point {x; y; _} -> Printf.printf "%f/%f" x y + | .... + +let reset = function + | Point p -> p.x <- 0.; p.y <- 0. + | ... + +let invalid = function + | Point p -> p (* INVALID *) + | ... +\end{verbatim} + + +\section{Local exceptions} +\ikwd{let\@\texttt{let}} +\ikwd{exception\@\texttt{exception}} \label{s:local-exceptions} + +(Introduced in OCaml 4.04) + +It is possible to define local exceptions in expressions: + +\begin{syntax} +expr: + ... + | "let" "exception" constr-decl "in" expr +\end{syntax} + + +The syntactic scope of the exception constructor is the inner +expression, but nothing prevents exception values created with this +constructor from escaping this scope. Two executions of the definition +above result in two incompatible exception constructors (as for any +exception definition). + + +\section{Documentation comments} +(Introduced in OCaml 4.03) + +Comments which start with "**" are treated specially by the +compiler. They are automatically converted during parsing into +attributes (see \ref{s:attributes}) to allow tools to process them as +documentation. + +Such comments can take three forms: {\em floating comments}, {\em item +comments} and {\em label comments}. Any comment starting with "**" which +does not match one of these forms will cause the compiler to emit +warning 50. + +Comments which start with "**" are also used by the ocamldoc +documentation generator (see \ref{c:ocamldoc}). The three comment forms +recognised by the compiler are a subset of the forms accepted by +ocamldoc (see \ref{s:ocamldoc-comments}). + +\subsection{Floating comments} + +Comments surrounded by blank lines that appear within structures, +signatures, classes or class types are converted into +@floating-attribute@s. For example: + +\begin{verbatim} +type t = T + +(** Now some definitions for [t] *) + +let mkT = T +\end{verbatim} + +will be converted to: + +\begin{verbatim} +type t = T + +[@@@ocaml.text " Now some definitions for [t] "] + +let mkT = T +\end{verbatim} + +\subsection{Item comments} + +Comments which appear {\em immediately before} or {\em immediately +after} a structure item, signature item, class item or class type item +are converted into @item-attribute@s. Immediately before or immediately +after means that there must be no blank lines, ";;", or other +documentation comments between them. For example: + +\begin{verbatim} +type t = T +(** A description of [t] *) + +\end{verbatim} + +or + +\begin{verbatim} + +(** A description of [t] *) +type t = T +\end{verbatim} + +will be converted to: + +\begin{verbatim} +type t = T +[@@ocaml.doc " A description of [t] "] +\end{verbatim} + +Note that, if a comment appears immediately next to multiple items, +as in: + +\begin{verbatim} +type t = T +(** An ambiguous comment *) +type s = S +\end{verbatim} + +then it will be attached to both items: + +\begin{verbatim} +type t = T +[@@ocaml.doc " An ambiguous comment "] +type s = S +[@@ocaml.doc " An ambiguous comment "] +\end{verbatim} + +and the compiler will emit warning 50. + +\subsection{Label comments} + +Comments which appear {\em immediately after} a labelled argument, +record field, variant constructor, object method or polymorphic variant +constructor are are converted into @attribute@s. Immediately +after means that there must be no blank lines or other documentation +comments between them. For example: + +\begin{verbatim} +type t1 = lbl:int (** Labelled argument *) -> unit + +type t2 = { + fld: int; (** Record field *) + fld2: float; +} + +type t3 = + | Cstr of string (** Variant constructor *) + | Cstr2 of string + +type t4 = < meth: int * int; (** Object method *) > + +type t5 = [ + `PCstr (** Polymorphic variant constructor *) +] +\end{verbatim} + +will be converted to: + +\begin{verbatim} +type t1 = lbl:(int [@ocaml.doc " Labelled argument "]) -> unit + +type t2 = { + fld: int [@ocaml.doc " Record field "]; + fld2: float; +} + +type t3 = + | Cstr of string [@ocaml.doc " Variant constructor "] + | Cstr2 of string + +type t4 = < meth : int * int [@ocaml.doc " Object method "] > + +type t5 = [ + `PCstr [@ocaml.doc " Polymorphic variant constructor "] +] +\end{verbatim} + +Note that label comments take precedence over item comments, so: + +\begin{verbatim} +type t = T of string +(** Attaches to T not t *) +\end{verbatim} + +will be converted to: + +\begin{verbatim} +type t = T of string [@ocaml.doc " Attaches to T not t "] +\end{verbatim} + +whilst: + +\begin{verbatim} +type t = T of string +(** Attaches to T not t *) +(** Attaches to t *) +\end{verbatim} + +will be converted to: + +\begin{verbatim} +type t = T of string [@ocaml.doc " Attaches to T not t "] +[@@ocaml.doc " Attaches to t "] +\end{verbatim} + +In the absence of meaningful comment on the last constructor of +a type, an empty comment~"(**)" can be used instead: + +\begin{verbatim} +type t = T of string +(**) +(** Attaches to t *) +\end{verbatim} + +will be converted directly to + +\begin{verbatim} +type t = T of string +[@@ocaml.doc " Attaches to t "] +\end{verbatim} + +\section{Extended indexing operators \label{s:index-operators} } +(Introduced in 4.06) + +\begin{syntax} + +dot-ext: + | ('!'||'$'||'%'||'&'||'*'||'+'||'-'||'/'||':'||'='||'>'||'?'||'@'||'^'||'|'||'~') { operator-char } +; +expr: + ... + | expr '.' [module-path '.'] dot-ext ( '(' expr ')' || '[' expr ']' || '{' expr '}' ) [ '<-' expr ] +; +operator-name: + ... + | '.' dot-ext ('()' || '[]' || '{}') ['<-'] +; +\end{syntax} + + +This extension provides syntactic sugar for getting and setting elements +for user-defined indexed types. For instance, we can define python-like +dictionaries with +\begin{caml_example*}{verbatim} +module Dict = struct +include Hashtbl +let ( .%{} ) tabl index = find tabl index +let ( .%{}<- ) tabl index value = add tabl index value +end +let dict = + let dict = Dict.create 10 in + let () = + dict.Dict.%{"one"} <- 1; + let open Dict in + dict.%{"two"} <- 2 in + dict +\end{caml_example*} +\begin{caml_example}{toplevel} +dict.Dict.%{"one"};; +let open Dict in dict.%{"two"};; +\end{caml_example} + +\section{Empty variant types\label{s:empty-variants} } +(Introduced in 4.07.0) + +\begin{syntax} +type-representation: + ... + | '=' '|' +\end{syntax} +This extension allows user to define empty variants. +Empty variant type can be eliminated by refutation case of pattern matching. +\begin{caml_example*}{verbatim} +type t = | +let f (x: t) = match x with _ -> . +\end{caml_example*} diff --git a/manual/manual/refman/lex.etex b/manual/manual/refman/lex.etex new file mode 100644 index 00000000..f1ac3ea1 --- /dev/null +++ b/manual/manual/refman/lex.etex @@ -0,0 +1,300 @@ +\section{Lexical conventions} +\pdfsection{Lexical conventions} +%HEVEA\cutname{lex.html} +\subsubsection*{Blanks} + +The following characters are considered as blanks: space, +horizontal tabulation, carriage return, line feed and form feed. Blanks are +ignored, but they separate adjacent identifiers, literals and +keywords that would otherwise be confused as one single identifier, +literal or keyword. + +\subsubsection*{Comments} + +Comments are introduced by the two characters @"(*"@, with no +intervening blanks, and terminated by the characters @"*)"@, with +no intervening blanks. Comments are treated as blank characters. +Comments do not occur inside string or character literals. Nested +comments are handled correctly. + +\subsubsection*{Identifiers} + +\begin{syntax} +ident: ( letter || "_" ) { letter || "0" \ldots "9" || "_" || "'" } ; +capitalized-ident: ("A" \ldots "Z") { letter || "0" \ldots "9" || "_" || "'" } ; +lowercase-ident: + ("a" \ldots "z" || "_") { letter || "0" \ldots "9" || "_" || "'" } ; +letter: "A" \ldots "Z" || "a" \ldots "z" +\end{syntax} + +Identifiers are sequences of letters, digits, "_" (the underscore +character), and "'" (the single quote), starting with a +letter or an underscore. +Letters contain at least the 52 lowercase and uppercase +letters from the ASCII set. The current implementation +also recognizes as letters some characters from the ISO +8859-1 set (characters 192--214 and 216--222 as uppercase letters; +characters 223--246 and 248--255 as lowercase letters). This +feature is deprecated and should be avoided for future compatibility. + +All characters in an identifier are +meaningful. The current implementation accepts identifiers up to +16000000 characters in length. + +In many places, OCaml makes a distinction between capitalized +identifiers and identifiers that begin with a lowercase letter. The +underscore character is considered a lowercase letter for this +purpose. + +\subsubsection*{Integer literals} + +\begin{syntax} +integer-literal: + ["-"] ("0"\ldots"9") { "0"\ldots"9" || "_" } + | ["-"] ("0x"||"0X") ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + { "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f"||"_" } + | ["-"] ("0o"||"0O") ("0"\ldots"7") { "0"\ldots"7"||"_" } + | ["-"] ("0b"||"0B") ("0"\ldots"1") { "0"\ldots"1"||"_" } +\end{syntax} + +An integer literal is a sequence of one or more digits, optionally +preceded by a minus sign. By default, integer literals are in decimal +(radix 10). The following prefixes select a different radix: +\begin{tableau}{|l|l|}{Prefix}{Radix} +\entree{"0x", "0X"}{hexadecimal (radix 16)} +\entree{"0o", "0O"}{octal (radix 8)} +\entree{"0b", "0B"}{binary (radix 2)} +\end{tableau} +(The initial @"0"@ is the digit zero; the @"O"@ for octal is the letter O.) +The interpretation of integer literals that fall outside the range of +representable integer values is undefined. + +For convenience and readability, underscore characters (@"_"@) are accepted +(and ignored) within integer literals. + +\subsubsection*{Floating-point literals} + +\begin{syntax} +float-literal: + ["-"] ("0"\ldots"9") { "0"\ldots"9"||"_" } ["." { "0"\ldots"9"||"_" }] + [("e"||"E") ["+"||"-"] ("0"\ldots"9") { "0"\ldots"9"||"_" }] + | ["-"] ("0x"||"0X") + ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + { "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f"||"_" } \\ + ["." { "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f"||"_" }] + [("p"||"P") ["+"||"-"] ("0"\ldots"9") { "0"\ldots"9"||"_" }] +\end{syntax} + +Floating-point decimal literals consist in an integer part, a +fractional part and +an exponent part. The integer part is a sequence of one or more +digits, optionally preceded by a minus sign. The fractional part is a +decimal point followed by zero, one or more digits. +The exponent part is the character @"e"@ or @"E"@ followed by an +optional @"+"@ or @"-"@ sign, followed by one or more digits. It is +interpreted as a power of 10. +The fractional part or the exponent part can be omitted but not both, to +avoid ambiguity with integer literals. +The interpretation of floating-point literals that fall outside the +range of representable floating-point values is undefined. + +Floating-point hexadecimal literals are denoted with the @"0x"@ or @"0X"@ +prefix. The syntax is similar to that of floating-point decimal +literals, with the following differences. +The integer part and the fractional part use hexadecimal +digits. The exponent part starts with the character @"p"@ or @"P"@. +It is written in decimal and interpreted as a power of 2. + +For convenience and readability, underscore characters (@"_"@) are accepted +(and ignored) within floating-point literals. + +\subsubsection*{Character literals} +\label{s:characterliteral} + +\begin{syntax} +char-literal: + "'" regular-char "'" + | "'" escape-sequence "'" +; +escape-sequence: + "\" ( "\" || '"' || "'" || "n" || "t" || "b" || "r" || space ) + | "\" ("0"\ldots"9") ("0"\ldots"9") ("0"\ldots"9") + | "\x" ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + ("0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f") + | "\o" ("0"\ldots"3") ("0"\ldots"7") ("0"\ldots"7") +\end{syntax} + +Character literals are delimited by @"'"@ (single quote) characters. +The two single quotes enclose either one character different from +@"'"@ and @'\'@, or one of the escape sequences below: +\begin{tableau}{|l|l|}{Sequence}{Character denoted} +\entree{"\\\\"}{backslash ("\\")} +\entree{"\\\""}{double quote ("\"")} +\entree{"\\'"}{single quote ("'")} +\entree{"\\n"}{linefeed (LF)} +\entree{"\\r"}{carriage return (CR)} +\entree{"\\t"}{horizontal tabulation (TAB)} +\entree{"\\b"}{backspace (BS)} +\entree{"\\"\var{space}}{space (SPC)} +\entree{"\\"\var{ddd}}{the character with ASCII code \var{ddd} in decimal} +\entree{"\\x"\var{hh}}{the character with ASCII code \var{hh} in hexadecimal} +\entree{"\\o"\var{ooo}}{the character with ASCII code \var{ooo} in octal} +\end{tableau} + +\subsubsection*{String literals} +\label{s:stringliteral} + +\begin{syntax} +string-literal: + '"' { string-character } '"' +; +string-character: + regular-string-char + | escape-sequence + | "\u{" {{ "0"\ldots"9"||"A"\ldots"F"||"a"\ldots"f" }} "}" + | '\' newline { space || tab } +\end{syntax} + +String literals are delimited by @'"'@ (double quote) characters. +The two double quotes enclose a sequence of either characters +different from @'"'@ and @'\'@, or escape sequences from the +table given above for character literals, or a Unicode character +escape sequence. + +A Unicode character escape sequence is substituted by the UTF-8 +encoding of the specified Unicode scalar value. The Unicode scalar +value, an integer in the ranges 0x0000...0xD7FF or 0xE000...0x10FFFF, +is defined using 1 to 6 hexadecimal digits; leading zeros are allowed. + +To allow splitting long string literals across lines, the sequence +"\\"\var{newline}~\var{spaces-or-tabs} (a backslash at the end of a line +followed by any number of spaces and horizontal tabulations at the +beginning of the next line) is ignored inside string literals. + +The current implementation places practically no restrictions on the +length of string literals. + +\subsubsection*{Naming labels} +\label{s:labelname} + +To avoid ambiguities, naming labels in expressions cannot just be defined +syntactically as the sequence of the three tokens "~", @ident@ and +":", and have to be defined at the lexical level. + +\begin{syntax} +label-name: lowercase-ident +; +label: "~" label-name ":" +; +optlabel: "?" label-name ":" +\end{syntax} + +Naming labels come in two flavours: @label@ for normal arguments and +@optlabel@ for optional ones. They are simply distinguished by their +first character, either "~" or "?". + +Despite @label@ and @optlabel@ being lexical entities in expressions, +their expansions @'~' label-name ':'@ and @'?' label-name ':'@ will be +used in grammars, for the sake of readability. Note also that inside +type expressions, this expansion can be taken literally, {\em i.e.} +there are really 3 tokens, with optional blanks between them. + +\subsubsection*{Prefix and infix symbols} + +%% || '`' lowercase-ident '`' + +\begin{syntax} +infix-symbol: + ('=' || '<' || '>' || '@' || '^' || '|' || '&' || + '+' || '-' || '*' || '/' || '$' || '%') { operator-char } + | "#" {{ operator-char }} +; +prefix-symbol: + '!' { operator-char } + | ('?' || '~') {{ operator-char }} +; +operator-char: + '!' || '$' || '%' || '&' || '*' || '+' || '-' || '.' || + '/' || ':' || '<' || '=' || '>' || '?' || '@' || + '^' || '|' || '~' +\end{syntax} +See also the following language extensions: +\hyperref[s:ext-ops]{extension operators} and +\hyperref[s:index-operators]{extended indexing operators}. + +Sequences of ``operator characters'', such as "<=>" or "!!", +are read as a single token from the @infix-symbol@ or @prefix-symbol@ +class. These symbols are parsed as prefix and infix operators inside +expressions, but otherwise behave like normal identifiers. +%% Identifiers starting with a lowercase letter and enclosed +%% between backquote characters @'`' lowercase-ident '`'@ are also parsed +%% as infix operators. + +\subsubsection*{Keywords} + +The identifiers below are reserved as keywords, and cannot be employed +otherwise: +\begin{verbatim} + and as assert asr begin class + constraint do done downto else end + exception external false for fun function + functor if in include inherit initializer + land lazy let lor lsl lsr + lxor match method mod module mutable + new nonrec object of open or + private rec sig struct then to + true try type val virtual when + while with +\end{verbatim} +% +\goodbreak% +% +The following character sequences are also keywords: +% +%% FIXME the token >] is not used anywhere in the syntax +% +\begin{alltt} +" != # & && ' ( ) * + , -" +" -. -> . .. : :: := :> ; ;; <" +" <- = > >] >} ? [ [< [> [| ]" +" _ ` { {< | |] || } ~" +\end{alltt} +% +Note that the following identifiers are keywords of the Camlp4 +extensions and should be avoided for compatibility reasons. +% +\begin{verbatim} + parser value $ $$ $: <: << >> ?? +\end{verbatim} + +\subsubsection*{Ambiguities} + +Lexical ambiguities are resolved according to the ``longest match'' +rule: when a character sequence can be decomposed into two tokens in +several different ways, the decomposition retained is the one with the +longest first token. + +\subsubsection*{Line number directives} + +\begin{syntax} +linenum-directive: + '#' {{"0" \ldots "9"}} + | '#' {{"0" \ldots "9"}} '"' { string-character } '"' +\end{syntax} + +Preprocessors that generate OCaml source code can insert line number +directives in their output so that error messages produced by the +compiler contain line numbers and file names referring to the source +file before preprocessing, instead of after preprocessing. +A line number directive is composed of a @"#"@ (sharp sign), followed by +a positive integer (the source line number), optionally followed by a +character string (the source file name). +Line number directives are treated as blanks during lexical +analysis. + +% FIXME spaces and tabs are allowed before and after the number +% FIXME ``string-character'' is inaccurate: everything is allowed except +% CR, LF, and doublequote; moreover, backslash escapes are not +% interpreted (especially backslash-doublequote) +% FIXME any number of random characters are allowed (and ignored) at the +% end of the line, except CR and LF. diff --git a/manual/manual/refman/modtypes.etex b/manual/manual/refman/modtypes.etex new file mode 100644 index 00000000..cbcbaab5 --- /dev/null +++ b/manual/manual/refman/modtypes.etex @@ -0,0 +1,303 @@ +\section{Module types (module specifications)} +\pdfsection{Module types (module specifications)} +%HEVEA\cutname{modtypes.html} + +Module types are the module-level equivalent of type expressions: they +specify the general shape and type properties of modules. + +\ikwd{sig\@\texttt{sig}} +\ikwd{end\@\texttt{end}} +\ikwd{functor\@\texttt{functor}} +\ikwd{with\@\texttt{with}} +\ikwd{and\@\texttt{and}} +\ikwd{val\@\texttt{val}} +\ikwd{external\@\texttt{external}} +\ikwd{type\@\texttt{type}} +\ikwd{exception\@\texttt{exception}} +\ikwd{class\@\texttt{class}} +\ikwd{module\@\texttt{module}} +\ikwd{open\@\texttt{open}} +\ikwd{include\@\texttt{include}} + +\begin{syntax} +module-type: + modtype-path + | 'sig' { specification [';;'] } 'end' + | 'functor' '(' module-name ':' module-type ')' '->' module-type + | module-type '->' module-type + | module-type 'with' mod-constraint { 'and' mod-constraint } + | '(' module-type ')' +; +mod-constraint: + 'type' [type-params] typeconstr type-equation { type-constraint } + | 'module' module-path '=' extended-module-path +; +%BEGIN LATEX +\end{syntax} +\begin{syntax} +%END LATEX +specification: + 'val' value-name ':' typexpr + | 'external' value-name ':' typexpr '=' external-declaration + | type-definition + | 'exception' constr-decl + | class-specification + | classtype-definition + | 'module' module-name ':' module-type + | 'module' module-name { '(' module-name ':' module-type ')' } + ':' module-type + | 'module' 'type' modtype-name + | 'module' 'type' modtype-name '=' module-type + | 'open' module-path + | 'include' module-type +\end{syntax} +See also the following language extensions: +\hyperref[s:module-type-of]{recovering the type of a module}, +\hyperref[s:signature-substitution]{substitution inside a signature}, +\hyperref[s:module-alias]{type-level module aliases}, +\hyperref[s:attributes]{attributes}, +\hyperref[s:extension-nodes]{extension nodes} and +\hyperref[s:generative-functors]{generative functors}. + +\subsection{Simple module types} + +The expression @modtype-path@ is equivalent to the module type bound +to the name @modtype-path@. +The expression @'(' module-type ')'@ denotes the same type as +@module-type@. + +\subsection{Signatures} + +\ikwd{sig\@\texttt{sig}} +\ikwd{end\@\texttt{end}} + +Signatures are type specifications for structures. Signatures +@'sig' \ldots 'end'@ are collections of type specifications for value +names, type names, exceptions, module names and module type names. A +structure will match a signature if the structure provides definitions +(implementations) for all the names specified in the signature (and +possibly more), and these definitions meet the type requirements given +in the signature. + +An optional @";;"@ is allowed after each specification in a +signature. It serves as a syntactic separator with no semantic +meaning. + +\subsubsection*{Value specifications} + +\ikwd{val\@\texttt{val}} + +A specification of a value component in a signature is written +@'val' value-name ':' typexpr@, where @value-name@ is the name of the +value and @typexpr@ its expected type. + +\ikwd{external\@\texttt{external}} + +The form @'external' value-name ':' typexpr '=' external-declaration@ +is similar, except that it requires in addition the name to be +implemented as the external function specified in @external-declaration@ +(see chapter~\ref{c:intf-c}). + +\subsubsection*{Type specifications} + +\ikwd{type\@\texttt{type}} + +A specification of one or several type components in a signature is +written @'type' typedef { 'and' typedef }@ and consists of a sequence +of mutually recursive definitions of type names. + +Each type definition in the signature specifies an optional type +equation @'=' typexpr@ and an optional type representation +@'=' constr-decl \ldots@ or @'=' '{' field-decl \ldots '}'@. +The implementation of the type name in a matching structure must +be compatible with the type expression specified in the equation (if +given), and have the specified representation (if given). Conversely, +users of that signature will be able to rely on the type equation +or type representation, if given. More precisely, we have the +following four situations: + +\begin{description} +\item[Abstract type: no equation, no representation.] ~ \\ +Names that are defined as abstract types in a signature can be +implemented in a matching structure by any kind of type definition +(provided it has the same number of type parameters). The exact +implementation of the type will be hidden to the users of the +structure. In particular, if the type is implemented as a variant type +or record type, the associated constructors and fields will not be +accessible to the users; if the type is implemented as an +abbreviation, the type equality between the type name and the +right-hand side of the abbreviation will be hidden from the users of the +structure. Users of the structure consider that type as incompatible +with any other type: a fresh type has been generated. + +\item[Type abbreviation: an equation @'=' typexpr@, no representation.] ~ \\ +The type name must be implemented by a type compatible with @typexpr@. +All users of the structure know that the type name is +compatible with @typexpr@. + +\item[New variant type or record type: no equation, a representation.] ~ \\ +The type name must be implemented by a variant type or record type +with exactly the constructors or fields specified. All users of the +structure have access to the constructors or fields, and can use them +to create or inspect values of that type. However, users of the +structure consider that type as incompatible with any other type: a +fresh type has been generated. + +\item[Re-exported variant type or record type: an equation, +a representation.] ~ \\ +This case combines the previous two: the representation of the type is +made visible to all users, and no fresh type is generated. +\end{description} + +\subsubsection*{Exception specification} + +\ikwd{exception\@\texttt{exception}} + +The specification @'exception' constr-decl@ in a signature requires the +matching structure to provide an exception with the name and arguments +specified in the definition, and makes the exception available to all +users of the structure. + +\subsubsection*{Class specifications} + +\ikwd{class\@\texttt{class}} + +A specification of one or several classes in a signature is written +@'class' class-spec { 'and' class-spec }@ and consists of a sequence +of mutually recursive definitions of class names. + +Class specifications are described more precisely in +section~\ref{s:class-spec}. + +\subsubsection*{Class type specifications} + +\ikwd{class\@\texttt{class}} +\ikwd{type\@\texttt{type}} + +A specification of one or several classe types in a signature is +written @'class' 'type' classtype-def@ @{ 'and' classtype-def }@ and +consists of a sequence of mutually recursive definitions of class type +names. Class type specifications are described more precisely in +section~\ref{s:classtype}. + +\subsubsection*{Module specifications} + +\ikwd{module\@\texttt{module}} + +A specification of a module component in a signature is written +@'module' module-name ':' module-type@, where @module-name@ is the +name of the module component and @module-type@ its expected type. +Modules can be nested arbitrarily; in particular, functors can appear +as components of structures and functor types as components of +signatures. + +For specifying a module component that is a functor, one may write +\begin{center} +@'module' module-name '(' name_1 ':' module-type_1 ')' + \ldots '(' name_n ':' module-type_n ')' + ':' module-type@ +\end{center} +instead of +\begin{center} +@'module' module-name ':' + 'functor' '(' name_1 ':' module-type_1 ')' '->' \ldots + '->' module-type@ +\end{center} + +\subsubsection*{Module type specifications} + +\ikwd{type\@\texttt{type}} +\ikwd{module\@\texttt{module}} + +A module type component of a signature can be specified either as a +manifest module type or as an abstract module type. + +An abstract module type specification +@'module' 'type' modtype-name@ allows the name @modtype-name@ to be +implemented by any module type in a matching signature, but hides the +implementation of the module type to all users of the signature. + +A manifest module type specification +@'module' 'type' modtype-name '=' module-type@ +requires the name @modtype-name@ to be implemented by the module type +@module-type@ in a matching signature, but makes the equality between +@modtype-name@ and @module-type@ apparent to all users of the signature. + +\subsubsection{Opening a module path} + +\ikwd{open\@\texttt{open}} + +The expression @'open' module-path@ in a signature does not specify +any components. It simply affects the parsing of the following items +of the signature, allowing components of the module denoted by +@module-path@ to be referred to by their simple names @name@ instead of +path accesses @module-path '.' name@. The scope of the @"open"@ +stops at the end of the signature expression. + +\subsubsection{Including a signature} + +\ikwd{include\@\texttt{include}} + +The expression @'include' module-type@ in a signature performs textual +inclusion of the components of the signature denoted by @module-type@. +It behaves as if the components of the included signature were copied +at the location of the @'include'@. The @module-type@ argument must +refer to a module type that is a signature, not a functor type. + +\subsection{Functor types} + +\ikwd{functor\@\texttt{functor}} + +The module type expression +@'functor' '(' module-name ':' module-type_1 ')' '->' module-type_2@ +is the type of functors (functions from modules to modules) that take +as argument a module of type @module-type_1@ and return as result a +module of type @module-type_2@. The module type @module-type_2@ can +use the name @module-name@ to refer to type components of the actual +argument of the functor. If the type @module-type_2@ does not +depend on type components of @module-name@, the module type expression +can be simplified with the alternative short syntax +@ module-type_1 '->' module-type_2 @. +No restrictions are placed on the type of the functor argument; in +particular, a functor may take another functor as argument +(``higher-order'' functor). + +\subsection{The "with" operator} + +\ikwd{with\@\texttt{with}} + +Assuming @module-type@ denotes a signature, the expression +@module-type 'with' mod-constraint@ @{ 'and' mod-constraint }@ denotes +the same signature where type equations have been added to some of the +type specifications, as described by the constraints following the +"with" keyword. The constraint @'type' [type-parameters] typeconstr +'=' typexpr@ adds the type equation @'=' typexpr@ to the specification +of the type component named @typeconstr@ of the constrained signature. +The constraint @'module' module-path '=' extended-module-path@ adds +type equations to all type components of the sub-structure denoted by +@module-path@, making them equivalent to the corresponding type +components of the structure denoted by @extended-module-path@. + +For instance, if the module type name "S" is bound to the signature +\begin{verbatim} + sig type t module M: (sig type u end) end +\end{verbatim} +then "S with type t=int" denotes the signature +\begin{verbatim} + sig type t=int module M: (sig type u end) end +\end{verbatim} +and "S with module M = N" denotes the signature +\begin{verbatim} + sig type t module M: (sig type u=N.u end) end +\end{verbatim} +A functor taking two arguments of type "S" that share their "t" component +is written +\begin{verbatim} + functor (A: S) (B: S with type t = A.t) ... +\end{verbatim} + +Constraints are added left to right. After each constraint has been +applied, the resulting signature must be a subtype of the signature +before the constraint was applied. Thus, the @'with'@ operator can +only add information on the type components of a signature, but never +remove information. diff --git a/manual/manual/refman/modules.etex b/manual/manual/refman/modules.etex new file mode 100644 index 00000000..431ad83d --- /dev/null +++ b/manual/manual/refman/modules.etex @@ -0,0 +1,239 @@ +\section{Module\label{s:module-expr} expressions (module implementations)} +\pdfsection{Module expressions (module implementations)} +%HEVEA\cutname{modules.html} + +Module expressions are the module-level equivalent of value +expressions: they evaluate to modules, thus providing implementations +for the specifications expressed in module types. + +\ikwd{struct\@\texttt{struct}} +\ikwd{end\@\texttt{end}} +\ikwd{functor\@\texttt{functor}} +\ikwd{let\@\texttt{let}} +\ikwd{and\@\texttt{and}} +\ikwd{external\@\texttt{external}} +\ikwd{type\@\texttt{type}} +\ikwd{exception\@\texttt{exception}} +\ikwd{class\@\texttt{class}} +\ikwd{module\@\texttt{module}} +\ikwd{open\@\texttt{open}} +\ikwd{include\@\texttt{include}} + +\begin{syntax} +module-expr: + module-path + | 'struct' [ module-items ] 'end' + | 'functor' '(' module-name ':' module-type ')' '->' module-expr + | module-expr '(' module-expr ')' + | '(' module-expr ')' + | '(' module-expr ':' module-type ')' +; +module-items: + {';;'} ( definition || expr ) { {';;'} ( definition || ';;' expr) } {';;'} +; +%\end{syntax} \begin{syntax} +definition: + 'let' ['rec'] let-binding { 'and' let-binding } + | 'external' value-name ':' typexpr '=' external-declaration + | type-definition + | exception-definition + | class-definition + | classtype-definition + | 'module' module-name { '(' module-name ':' module-type ')' } + [ ':' module-type ] \\ '=' module-expr + | 'module' 'type' modtype-name '=' module-type + | 'open' module-path + | 'include' module-expr +\end{syntax} +See also the following language extensions: +\hyperref[s-recursive-modules]{recursive modules}, +\hyperref[s-first-class-modules]{first-class modules}, +\hyperref[s:explicit-overriding-open]{overriding in open statements}, +\hyperref[s:attributes]{attributes}, +\hyperref[s:extension-nodes]{extension nodes} and +\hyperref[s:generative-functors]{generative functors}. + +\subsection{Simple module expressions} + +The expression @module-path@ evaluates to the module bound to the name +@module-path@. + +The expression @'(' module-expr ')'@ evaluates to the same module as +@module-expr@. + +The expression @'(' module-expr ':' module-type ')'@ checks that the +type of @module-expr@ is a subtype of @module-type@, that is, that all +components specified in @module-type@ are implemented in +@module-expr@, and their implementation meets the requirements given +in @module-type@. In other terms, it checks that the implementation +@module-expr@ meets the type specification @module-type@. The whole +expression evaluates to the same module as @module-expr@, except that +all components not specified in @module-type@ are hidden and can no +longer be accessed. + +\subsection{Structures} + +\ikwd{struct\@\texttt{struct}} +\ikwd{end\@\texttt{end}} + +Structures @'struct' \ldots 'end'@ are collections of definitions for +value names, type names, exceptions, module names and module type +names. The definitions are evaluated in the order in which they appear +in the structure. The scopes of the bindings performed by the +definitions extend to the end of the structure. As a consequence, a +definition may refer to names bound by earlier definitions in the same +structure. + +For compatibility with toplevel phrases (chapter~\ref{c:camllight}), +optional @";;"@ are allowed after and before each definition in a structure. These +@";;"@ have no semantic meanings. Similarly, an @expr@ preceded by ";;" is allowed as +a component of a structure. It is equivalent to @'let' '_' '=' expr@, i.e. @expr@ is +evaluated for its side-effects but is not bound to any identifier. If @expr@ is +the first component of a structure, the preceding ";;" can be omitted. + +\subsubsection*{Value definitions} + +\ikwd{let\@\texttt{let}} + +A value definition @'let' ['rec'] let-binding { 'and' let-binding }@ +bind value names in the same way as a @'let' \ldots 'in' \ldots@ expression +(see section~\ref{s:localdef}). The value names appearing in the +left-hand sides of the bindings are bound to the corresponding values +in the right-hand sides. + +\ikwd{external\@\texttt{external}} + +A value definition @'external' value-name ':' typexpr '=' external-declaration@ +implements @value-name@ as the external function specified in +@external-declaration@ (see chapter~\ref{c:intf-c}). + +\subsubsection*{Type definitions} + +\ikwd{type\@\texttt{type}} + +A definition of one or several type components is written +@'type' typedef { 'and' typedef }@ and consists of a sequence +of mutually recursive definitions of type names. + +\subsubsection*{Exception definitions} + +\ikwd{exception\@\texttt{exception}} + +Exceptions are defined with the syntax @'exception' constr-decl@ +or @'exception' constr-name '=' constr@. + +\subsubsection*{Class definitions} + +\ikwd{class\@\texttt{class}} + +A definition of one or several classes is written @'class' +class-binding { 'and' class-binding }@ and consists of a sequence of +mutually recursive definitions of class names. Class definitions are +described more precisely in section~\ref{s:classdef}. + +\subsubsection*{Class type definitions} + +\ikwd{class\@\texttt{class}} +\ikwd{type\@\texttt{type}} + +A definition of one or several classes is written +@'class' 'type' classtype-def { 'and' classtype-def }@ and consists of +a sequence of mutually recursive definitions of class type names. +Class type definitions are described more precisely in +section~\ref{s:classtype}. + +\subsubsection*{Module definitions} + +\ikwd{module\@\texttt{module}} + +The basic form for defining a module component is +@'module' module-name '=' module-expr@, which evaluates @module-expr@ and binds +the result to the name @module-name@. + +One can write +\begin{center} +@'module' module-name ':' module-type '=' module-expr@ +\end{center} +instead of +\begin{center} +@'module' module-name '=' '(' module-expr ':' module-type ')'@. +\end{center} +Another derived form is +\begin{center} +@'module' module-name '(' name_1 ':' module-type_1 ')' \ldots + '(' name_n ':' module-type_n ')' '=' module-expr@ +\end{center} +which is equivalent to +\begin{center} +@'module' module-name '=' + 'functor' '(' name_1 ':' module-type_1 ')' '->' \ldots + '->' module-expr@ +\end{center} + +\subsubsection*{Module type definitions} + +\ikwd{type\@\texttt{type}} +\ikwd{module\@\texttt{module}} + +A definition for a module type is written +@'module' 'type' modtype-name '=' module-type@. +It binds the name @modtype-name@ to the module type denoted by the +expression @module-type@. + +\subsubsection*{Opening a module path} + +\ikwd{open\@\texttt{open}} + +The expression @'open' module-path@ in a structure does not define any +components nor perform any bindings. It simply affects the parsing of +the following items of the structure, allowing components of the +module denoted by @module-path@ to be referred to by their simple names +@name@ instead of path accesses @module-path '.' name@. The scope of +the @"open"@ stops at the end of the structure expression. + +\subsubsection*{Including the components of another structure} + +\ikwd{include\@\texttt{include}} + +The expression @'include' module-expr@ in a structure re-exports in +the current structure all definitions of the structure denoted by +@module-expr@. For instance, if the identifier "S" is bound to the +module +\begin{verbatim} + struct type t = int let x = 2 end +\end{verbatim} +the module expression +\begin{verbatim} + struct include S let y = (x + 1 : t) end +\end{verbatim} +is equivalent to the module expression +\begin{verbatim} + struct type t = S.t let x = S.x let y = (x + 1 : t) end +\end{verbatim} +The difference between @'open'@ and @'include'@ is that @'open'@ +simply provides short names for the components of the opened +structure, without defining any components of the current structure, +while @'include'@ also adds definitions for the components of the +included structure. + +\subsection{Functors} + +\subsubsection*{Functor definition} + +\ikwd{functor\@\texttt{functor}} + +The expression @'functor' '(' module-name ':' module-type ')' '->' +module-expr@ evaluates to a functor that takes as argument modules of +the type @module-type_1@, binds @module-name@ to these modules, +evaluates @module-expr@ in the extended environment, and returns the +resulting modules as results. No restrictions are placed on the type of the +functor argument; in particular, a functor may take another functor as +argument (``higher-order'' functor). + +\subsubsection*{Functor application} + +The expression @module-expr_1 '(' module-expr_2 ')'@ evaluates +@module-expr_1@ to a functor and @module-expr_2@ to a module, and +applies the former to the latter. The type of @module-expr_2@ must +match the type expected for the arguments of the functor @module-expr_1@. + diff --git a/manual/manual/refman/names.etex b/manual/manual/refman/names.etex new file mode 100644 index 00000000..4f85eed4 --- /dev/null +++ b/manual/manual/refman/names.etex @@ -0,0 +1,151 @@ +\section{Names} \label{s:names} +\pdfsection{Names} +%HEVEA\cutname{names.html} + +Identifiers are used to give names to several classes of language +objects and refer to these objects by name later: +\begin{itemize} +\item value names (syntactic class @value-name@), +\item value constructors and exception constructors (class @constr-name@), +\item labels (@label-name@, defined in section~\ref{s:labelname}), +\item polymorphic variant tags (@tag-name@), +\item type constructors (@typeconstr-name@), +\item record fields (@field-name@), +\item class names (@class-name@), +\item method names (@method-name@), +\item instance variable names (@inst-var-name@), +\item module names (@module-name@), +\item module type names (@modtype-name@). +\end{itemize} +These eleven name spaces are distinguished both by the context and by the +capitalization of the identifier: whether the first letter of the +identifier is in lowercase (written @lowercase-ident@ below) or in +uppercase (written @capitalized-ident@). Underscore is considered a +lowercase letter for this purpose. + +\subsubsection*{Naming objects} +\ikwd{mod\@\texttt{mod}} +\ikwd{land\@\texttt{land}} +\ikwd{lor\@\texttt{lor}} +\ikwd{lxor\@\texttt{lxor}} +\ikwd{lsl\@\texttt{lsl}} +\ikwd{lsr\@\texttt{lsr}} +\ikwd{asr\@\texttt{asr}} + +\begin{syntax} +value-name: + lowercase-ident + | '(' operator-name ')' +; +operator-name: + prefix-symbol || infix-op +; +infix-op: + infix-symbol + | '*' || '+' || '-' || '-.' || '=' || '!=' || '<' || '>' || 'or' || '||' + || '&' || '&&' || ':=' + | 'mod' || 'land' || 'lor' || 'lxor' || 'lsl' || 'lsr' || 'asr' +; +constr-name: + capitalized-ident +; +tag-name: + capitalized-ident +; +typeconstr-name: + lowercase-ident +; +field-name: + lowercase-ident +; +module-name: + capitalized-ident +; +modtype-name: + ident +; +class-name: + lowercase-ident +; +inst-var-name: + lowercase-ident +; +method-name: + lowercase-ident +\end{syntax} +See also the following language extension: +\hyperref[s:index-operators]{extended indexing operators}. + +As shown above, prefix and infix symbols as well as some keywords can +be used as value names, provided they are written between parentheses. +The capitalization rules are summarized in the table below. + +\begin{tableau}{|l|l|}{Name space}{Case of first letter} +\entree{Values}{lowercase} +\entree{Constructors}{uppercase} +\entree{Labels}{lowercase} +\entree{Polymorphic variant tags}{uppercase} +\entree{Exceptions}{uppercase} +\entree{Type constructors}{lowercase} +\entree{Record fields}{lowercase} +\entree{Classes}{lowercase} +\entree{Instance variables}{lowercase} +\entree{Methods}{lowercase} +\entree{Modules}{uppercase} +\entree{Module types}{any} +\end{tableau} + +{\it Note on polymorphic variant tags:\/} the current implementation accepts +lowercase variant tags in addition to capitalized variant tags, but we +suggest you avoid lowercase variant tags for portability and +compatibility with future OCaml versions. + +\subsubsection*{Referring to named objects} + +\begin{syntax} +value-path: + [ module-path '.' ] value-name +; +constr: + [ module-path '.' ] constr-name +; +typeconstr: + [ extended-module-path '.' ] typeconstr-name +; +field: + [ module-path '.' ] field-name +; +modtype-path: + [ extended-module-path '.' ] modtype-name +; +class-path: + [ module-path '.' ] class-name +; +classtype-path: + [ extended-module-path '.' ] class-name +; +module-path: + module-name { '.' module-name } +; +extended-module-path: + extended-module-name { '.' extended-module-name } +; +extended-module-name: + module-name { '(' extended-module-path ')' } +\end{syntax} + +A named object can be referred to either by its name (following the +usual static scoping rules for names) or by an access path @prefix '.' name@, +where @prefix@ designates a module and @name@ is the name of an object +defined in that module. The first component of the path, @prefix@, is +either a simple module name or an access path @name_1 '.' name_2 \ldots@, +in case the defining module is itself nested inside other modules. +For referring to type constructors, module types, or class types, +the @prefix@ can +also contain simple functor applications (as in the syntactic class +@extended-module-path@ above) in case the defining module is the +result of a functor application. + +Label names, tag names, method names and instance variable names need +not be qualified: the former three are global labels, while the latter +are local to a class. diff --git a/manual/manual/refman/patterns.etex b/manual/manual/refman/patterns.etex new file mode 100644 index 00000000..8abdce5e --- /dev/null +++ b/manual/manual/refman/patterns.etex @@ -0,0 +1,178 @@ +\section{Patterns} +\pdfsection{Patterns} +\ikwd{as\@\texttt{as}} +%HEVEA\cutname{patterns.html} +\begin{syntax} +pattern: + value-name + | '_' + | constant + | pattern 'as' value-name + | '(' pattern ')' + | '(' pattern ':' typexpr ')' + | pattern '|' pattern + | constr pattern + | "`"tag-name pattern + | "#"typeconstr + | pattern {{ ',' pattern }} + | '{' field [':' typexpr] ['=' pattern]% + { ';' field [':' typexpr] ['=' pattern] } [';' '_' ] [ ';' ] '}' + | '[' pattern { ';' pattern } [ ';' ] ']' + | pattern '::' pattern + | '[|' pattern { ';' pattern } [ ';' ] '|]' + | char-literal '..' char-literal +\end{syntax} +See also the following language extensions: \hyperref[s:lazypat]{lazy patterns}, +\hyperref[s:local-opens]{local opens}, +\hyperref[s-first-class-modules]{first-class modules}, +\hyperref[s:attributes]{attributes}, +\hyperref[s:extension-nodes]{extension nodes} and +\hyperref[s:exception-match]{exception cases in pattern matching}. + +The table below shows the relative precedences and associativity of +operators and non-closed pattern constructions. The constructions with +higher precedences come first. +\ikwd{as\@\texttt{as}} +\begin{tableau}{|l|l|}{Operator}{Associativity} +\entree{".."}{--} +\entree{"lazy" (see section~\ref{s:lazypat})}{--} +\entree{Constructor application, Tag application}{right} +\entree{"::"}{right} +\entree{","}{--} +\entree{"|"}{left} +\entree{"as"}{--} +\end{tableau} + +Patterns are templates that allow selecting data structures of a +given shape, and binding identifiers to components of the data +structure. This selection operation is called pattern matching; its +outcome is either ``this value does not match this pattern'', or +``this value matches this pattern, resulting in the following bindings +of names to values''. + +\subsubsection*{Variable patterns} + +A pattern that consists in a value name matches any value, +binding the name to the value. The pattern @"_"@ also matches +any value, but does not bind any name. + +Patterns are {\em linear\/}: a variable cannot be bound several times by +a given pattern. In particular, there is no way to test for equality +between two parts of a data structure using only a pattern (but +@"when"@ guards can be used for this purpose). + +\subsubsection*{Constant patterns} + +A pattern consisting in a constant matches the values that +are equal to this constant. + +%% FIXME for negative numbers, blanks are allowed between the minus +%% sign and the first digit. + +\subsubsection*{Alias patterns} +\ikwd{as\@\texttt{as}} + +The pattern @pattern_1 "as" value-name@ matches the same values as +@pattern_1@. If the matching against @pattern_1@ is successful, +the name @value-name@ is bound to the matched value, in addition to the +bindings performed by the matching against @pattern_1@. + +\subsubsection*{Parenthesized patterns} + +The pattern @"(" pattern_1 ")"@ matches the same values as +@pattern_1@. A type constraint can appear in a +parenthesized pattern, as in @"(" pattern_1 ":" typexpr ")"@. This +constraint forces the type of @pattern_1@ to be compatible with +@typexpr@. + +\subsubsection*{``Or'' patterns} + +The pattern @pattern_1 "|" pattern_2@ represents the logical ``or'' of +the two patterns @pattern_1@ and @pattern_2@. A value matches +@pattern_1 "|" pattern_2@ if it matches @pattern_1@ or +@pattern_2@. The two sub-patterns @pattern_1@ and @pattern_2@ +must bind exactly the same identifiers to values having the same types. +Matching is performed from left to right. +More precisely, +in case some value~$v$ matches @pattern_1 "|" pattern_2@, the bindings +performed are those of @pattern_1@ when $v$ matches @pattern_1@. +Otherwise, value~$v$ matches @pattern_2@ whose bindings are performed. + + +\subsubsection*{Variant patterns} + +The pattern @constr '(' pattern_1 ',' \ldots ',' pattern_n ')'@ matches +all variants whose +constructor is equal to @constr@, and whose arguments match +@pattern_1 \ldots pattern_n@. It is a type error if $n$ is not the +number of arguments expected by the constructor. + +The pattern @constr '_'@ matches all variants whose constructor is +@constr@. + +The pattern @pattern_1 "::" pattern_2@ matches non-empty lists whose +heads match @pattern_1@, and whose tails match @pattern_2@. + +The pattern @"[" pattern_1 ";" \ldots ";" pattern_n "]"@ matches lists +of length $n$ whose elements match @pattern_1@ \ldots @pattern_n@, +respectively. This pattern behaves like +@pattern_1 "::" \ldots "::" pattern_n "::" "[]"@. + +\subsubsection*{Polymorphic variant patterns} + +The pattern @"`"tag-name pattern_1@ matches all polymorphic variants +whose tag is equal to @tag-name@, and whose argument matches +@pattern_1@. + +\subsubsection*{Polymorphic variant abbreviation patterns} + +If the type @["('a,'b,"\ldots")"] typeconstr = "[" "`"tag-name_1 typexpr_1 "|" +\ldots "|" "`"tag-name_n typexpr_n"]"@ is defined, then the pattern @"#"typeconstr@ +is a shorthand for the following or-pattern: +@"(" "`"tag-name_1"(_" ":" typexpr_1")" "|" \ldots "|" "`"tag-name_n"(_" +":" typexpr_n"))"@. It matches all values of type @"[<" typeconstr "]"@. + +\subsubsection*{Tuple patterns} + +The pattern @pattern_1 "," \ldots "," pattern_n@ matches $n$-tuples +whose components match the patterns @pattern_1@ through @pattern_n@. That +is, the pattern matches the tuple values $(v_1, \ldots, v_n)$ such that +@pattern_i@ matches $v_i$ for \fromoneto{i}{n}. + +\subsubsection*{Record patterns} + +The pattern @"{" field_1 ["=" pattern_1] ";" \ldots ";" field_n ["=" +pattern_n] "}"@ matches records that define at least the fields +@field_1@ through @field_n@, and such that the value associated to +@field_i@ matches the pattern @pattern_i@, for \fromoneto{i}{n}. +A single identifier @field_k@ stands for @field_k '=' field_k @, +and a single qualified identifier @module-path '.' field_k@ stands +for @module-path '.' field_k '=' field_k @. +The record value can define more fields than @field_1@ \ldots +@field_n@; the values associated to these extra fields are not taken +into account for matching. Optionally, a record pattern can be terminated +by @';' '_'@ to convey the fact that not all fields of the record type are +listed in the record pattern and that it is intentional. +Optional type constraints can be added field by field with +@"{" field_1 ":" typexpr_1 "=" pattern_1 ";"% +\ldots ";"field_n ":" typexpr_n "=" pattern_n "}"@ to force the type +of @field_k@ to be compatible with @typexpr_k@. + + +\subsubsection*{Array patterns} + +The pattern @"[|" pattern_1 ";" \ldots ";" pattern_n "|]"@ +matches arrays of length $n$ such that the $i$-th array element +matches the pattern @pattern_i@, for \fromoneto{i}{n}. + +\subsubsection*{Range patterns} + +The pattern +@"'" @c@ "'" ".." "'" @d@ "'"@ is a shorthand for the pattern +\begin{center} +@"'" @c@ "'" "|" "'" @c@_1 "'" "|" "'" @c@_2 "'" "|" \ldots + "|" "'" @c@_n "'" "|" "'" @d@ "'"@ +\end{center} +where \nth{c}{1}, \nth{c}{2}, \ldots, \nth{c}{n} are the characters +that occur between \var{c} and \var{d} in the ASCII character set. For +instance, the pattern "'0'"@'..'@"'9'" matches all characters that are digits. diff --git a/manual/manual/refman/refman.etex b/manual/manual/refman/refman.etex new file mode 100644 index 00000000..a7daea02 --- /dev/null +++ b/manual/manual/refman/refman.etex @@ -0,0 +1,48 @@ +\chapter{The OCaml language} \label{c:refman} +\pdfchapterfold{-12}{Reference manual for the OCaml language} +%HEVEA\cutname{language.html} + +%better html output that way, sniff. +%HEVEA\subsection*{Foreword} +%BEGIN LATEX +\section*{Foreword} +%END LATEX + +This document is intended as a reference manual for the OCaml +language. It lists the language constructs, and gives their precise +syntax and informal semantics. It is by no means a tutorial +introduction to the language: there is not a single example. A good +working knowledge of OCaml is assumed. + +No attempt has been made at mathematical rigor: words are employed +with their intuitive meaning, without further definition. As a +consequence, the typing rules have been left out, by lack of the +mathematical framework required to express them, while they are +definitely part of a full formal definition of the language. + + +\subsection*{Notations} + +The syntax of the language is given in BNF-like notation. Terminal +symbols are set in typewriter font (@'like' 'this'@). +Non-terminal symbols are set in italic font (@like that@). +Square brackets @[\ldots]@ denote optional components. Curly brackets +@{\ldots}@ denotes zero, one or several repetitions of the enclosed +components. Curly brackets with a trailing plus sign @{{\ldots}}@ +denote one or several repetitions of the enclosed components. +Parentheses @(\ldots)@ denote grouping. + +%HEVEA\cutdef{section} +\input{lex} +\input{values} +\input{names} +\input{types} +\input{const} +\input{patterns} +\input{expr} +\input{typedecl} +\input{classes} +\input{modtypes} +\input{modules} +\input{compunit} +%HEVEA\cutend diff --git a/manual/manual/refman/typedecl.etex b/manual/manual/refman/typedecl.etex new file mode 100644 index 00000000..9961f182 --- /dev/null +++ b/manual/manual/refman/typedecl.etex @@ -0,0 +1,226 @@ +\section{Type and exception definitions} +%HEVEA\cutname{typedecl.html}% +\pdfsection{Type and exception definitions} + +\subsection{Type definitions} +\label{s:type-defs} + +Type definitions bind type constructors to data types: either +variant types, record types, type abbreviations, or abstract data +types. They also bind the value constructors and record fields +associated with the definition. + +\ikwd{type\@\texttt{type}} +\ikwd{and\@\texttt{and}} +\ikwd{nonrec\@\texttt{nonrec}} +\ikwd{of\@\texttt{of}} + +\begin{syntax} +type-definition: + 'type' ['nonrec'] typedef { 'and' typedef } +; +typedef: + [type-params] typeconstr-name type-information +; +type-information: + [type-equation] [type-representation] { type-constraint } +; +type-equation: + '=' typexpr +; +type-representation: + '=' ['|'] constr-decl { '|' constr-decl } + | '=' record-decl + | '=' '|' +; +type-params: + type-param + | '(' type-param { "," type-param } ')' +; +type-param: + [variance] "'" ident +; +variance: + '+' + | '-' +; +record-decl: + '{' field-decl { ';' field-decl } [';'] '}' +; +constr-decl: + (constr-name || '[]' || '(::)') [ 'of' constr-args ] +; +constr-args: + typexpr { '*' typexpr } +; +field-decl: + ['mutable'] field-name ':' poly-typexpr +; +type-constraint: + 'constraint' "'" ident '=' typexpr +\end{syntax} +\ikwd{mutable\@\texttt{mutable}} +\ikwd{constraint\@\texttt{constraint}} +See also the following language extensions: +\hyperref[s:private-types]{private types}, +\hyperref[s:gadts]{generalized algebraic datatypes}, +\hyperref[s:attributes]{attributes}, +\hyperref[s:extension-nodes]{extension nodes}, +\hyperref[s:extensible-variants]{extensible variant types} and +\hyperref[s:inline-records]{inline records}. + +Type definitions are introduced by the "type" keyword, and +consist in one or several simple definitions, possibly mutually +recursive, separated by the "and" keyword. Each simple definition +defines one type constructor. + +A simple definition consists in a lowercase identifier, possibly +preceded by one or several type parameters, and followed by an +optional type equation, then an optional type representation, and then +a constraint clause. The identifier is the name of the type +constructor being defined. + +In the right-hand side of type definitions, references to one of the +type constructor name being defined are considered as recursive, +unless "type" is followed by "nonrec". The "nonrec" keyword was +introduced in OCaml 4.02.2. + +The optional type parameters are either one type variable @"'" ident@, +for type constructors with one parameter, or a list of type variables +@"('"ident_1,\ldots,"'"ident_n")"@, for type constructors with several +parameters. Each type parameter may be prefixed by a variance +constraint @"+"@ (resp. @"-"@) indicating that the parameter is +covariant (resp. contravariant). These type parameters can appear in +the type expressions of the right-hand side of the definition, +optionally restricted by a variance constraint ; {\em i.e.\/} a +covariant parameter may only appear on the right side of a functional +arrow (more precisely, follow the left branch of an even number of +arrows), and a contravariant parameter only the left side (left branch of +an odd number of arrows). If the type has a representation or +an equation, and the parameter is free ({\em i.e.\/} not bound via a +type constraint to a constructed type), its variance constraint is +checked but subtyping {\em etc.\/} will use the inferred variance of the +parameter, which may be less restrictive; otherwise ({\em i.e.\/} for abstract +types or non-free parameters), the variance must be given explicitly, +and the parameter is invariant if no variance is given. + +The optional type equation @'=' typexpr@ makes the defined type +equivalent to the type expression @typexpr@: +one can be substituted for the other during typing. +If no type equation is given, a new type is generated: the defined type +is incompatible with any other type. + +The optional type representation describes the data structure +representing the defined type, by giving the list of associated +constructors (if it is a variant type) or associated fields (if it is +a record type). If no type representation is given, nothing is +assumed on the structure of the type besides what is stated in the +optional type equation. + +The type representation @'=' ['|'] constr-decl { '|' constr-decl }@ +describes a variant type. The constructor declarations +@constr-decl_1, \ldots, constr-decl_n@ describe the constructors +associated to this variant type. The constructor +declaration @constr-name 'of' typexpr_1 '*' \ldots '*' typexpr_n@ +declares the name @constr-name@ as a non-constant constructor, whose +arguments have types @typexpr_1@ \ldots @typexpr_n@. +The constructor declaration @constr-name@ +declares the name @constr-name@ as a constant +constructor. Constructor names must be capitalized. + +The type representation @'=' '{' field-decl { ';' field-decl } [';'] '}'@ +describes a record type. The field declarations @field-decl_1, \ldots, +field-decl_n@ describe the fields associated to this record type. +The field declaration @field-name ':' poly-typexpr@ declares +@field-name@ as a field whose argument has type @poly-typexpr@. +The field declaration @'mutable' field-name ':' poly-typexpr@ +\ikwd{mutable\@\texttt{mutable}} +behaves similarly; in addition, it allows physical modification of +this field. +Immutable fields are covariant, mutable fields are non-variant. +Both mutable and immutable fields may have explicitly polymorphic +types. The polymorphism of the contents is statically checked whenever +a record value is created or modified. Extracted values may have their +types instantiated. + +The two components of a type definition, the optional equation and the +optional representation, can be combined independently, giving +rise to four typical situations: + +\begin{description} +\item[Abstract type: no equation, no representation.] ~\\ +When appearing in a module signature, this definition specifies +nothing on the type constructor, besides its number of parameters: +its representation is hidden and it is assumed incompatible with any +other type. + +\item[Type abbreviation: an equation, no representation.] ~\\ +This defines the type constructor as an abbreviation for the type +expression on the right of the @'='@ sign. + +\item[New variant type or record type: no equation, a representation.] ~\\ +This generates a new type constructor and defines associated +constructors or fields, through which values of that type can be +directly built or inspected. + +\item[Re-exported variant type or record type: an equation, +a representation.] ~\\ +In this case, the type constructor is defined as an abbreviation for +the type expression given in the equation, but in addition the +constructors or fields given in the representation remain attached to +the defined type constructor. The type expression in the equation part +must agree with the representation: it must be of the same kind +(record or variant) and have exactly the same constructors or fields, +in the same order, with the same arguments. +\end{description} + +The type variables appearing as type parameters can optionally be +prefixed by "+" or "-" to indicate that the type constructor is +covariant or contravariant with respect to this parameter. This +variance information is used to decide subtyping relations when +checking the validity of @":>"@ coercions (see section \ref{s:coercions}). + +For instance, "type +'a t" declares "t" as an abstract type that is +covariant in its parameter; this means that if the type $\tau$ is a +subtype of the type $\sigma$, then $\tau " t"$ is a subtype of $\sigma +" t"$. Similarly, "type -'a t" declares that the abstract type "t" is +contravariant in its parameter: if $\tau$ is a subtype of $\sigma$, then +$\sigma " t"$ is a subtype of $\tau " t"$. If no "+" or "-" variance +annotation is given, the type constructor is assumed non-variant in the +corresponding parameter. For instance, the abstract type declaration +"type 'a t" means that $\tau " t"$ is neither a subtype nor a +supertype of $\sigma " t"$ if $\tau$ is subtype of $\sigma$. + +The variance indicated by the "+" and "-" annotations on parameters +is enforced only for abstract and private types, or when there are +type constraints. +Otherwise, for abbreviations, variant and record types without type +constraints, the variance properties of the type constructor +are inferred from its definition, and the variance annotations are +only checked for conformance with the definition. + +\ikwd{constraint\@\texttt{constraint}} +The construct @ 'constraint' "'" ident '=' typexpr @ allows the +specification of +type parameters. Any actual type argument corresponding to the type +parameter @ident@ has to be an instance of @typexpr@ (more precisely, +@ident@ and @typexpr@ are unified). Type variables of @typexpr@ can +appear in the type equation and the type declaration. + +\subsection{Exception definitions} \label{s:excdef} +\ikwd{exception\@\texttt{exception}} + +\begin{syntax} +exception-definition: + 'exception' constr-decl + | 'exception' constr-name '=' constr +\end{syntax} + +Exception definitions add new constructors to the built-in variant +type \verb"exn" of exception values. The constructors are declared as +for a definition of a variant type. + +The form @'exception' constr-decl@ +generates a new exception, distinct from all other exceptions in the system. +The form @'exception' constr-name '=' constr@ +gives an alternate name to an existing exception. diff --git a/manual/manual/refman/types.etex b/manual/manual/refman/types.etex new file mode 100644 index 00000000..231a1f87 --- /dev/null +++ b/manual/manual/refman/types.etex @@ -0,0 +1,243 @@ +\section{Type expressions} +\pdfsection{Type expressions} +%HEVEA\cutname{types.html} +\ikwd{as\@\texttt{as}} + +\begin{syntax} +typexpr: + "'" ident + | "_" + | '(' typexpr ')' + | [['?']label-name':'] typexpr '->' typexpr + | typexpr {{ '*' typexpr }} + | typeconstr + | typexpr typeconstr + | '(' typexpr { ',' typexpr } ')' typeconstr + | typexpr 'as' "'" ident + | polymorphic-variant-type + | '<' ['..'] '>' + | '<' method-type { ';' method-type } [';' || ';' '..'] '>' + | '#' class-path + | typexpr '#' class-path + | '(' typexpr { ',' typexpr } ')' '#' class-path +; +poly-typexpr: + typexpr + | {{ "'" ident }} '.' typexpr +; +method-type: + method-name ':' poly-typexpr +\end{syntax} +See also the following language extensions: +\hyperref[s-first-class-modules]{first-class modules}, +\hyperref[s:attributes]{attributes} and +\hyperref[s:extension-nodes]{extension nodes}. + +The table below shows the relative precedences and associativity of +operators and non-closed type constructions. The constructions with +higher precedences come first. +\ikwd{as\@\texttt{as}} +\begin{tableau}{|l|l|}{Operator}{Associativity} +\entree{Type constructor application}{--} +\entree{"#"}{--} +\entree{"*"}{--} +\entree{"->"}{right} +\entree{"as"}{--} +\end{tableau} + +Type expressions denote types in definitions of data types as well as +in type constraints over patterns and expressions. + +\subsubsection*{Type variables} + +The type expression @"'" ident@ stands for the type variable named +@ident@. The type expression @"_"@ stands for either an anonymous type +variable or anonymous type parameters. In data type definitions, type +variables are names for the data type parameters. In type constraints, +they represent unspecified types that can be instantiated by any type +to satisfy the type constraint. In general the scope of a named type +variable is the whole top-level phrase where it appears, and it can +only be generalized when leaving this scope. Anonymous variables have +no such restriction. In the following cases, the scope of named type +variables is restricted to the type expression where they appear: +1) for universal (explicitly polymorphic) type variables; +2) for type variables that only appear in public method specifications +(as those variables will be made universal, as described in +section~\ref{sec-methspec}); +3) for variables used as aliases, when the type they are aliased to +would be invalid in the scope of the enclosing definition ({\it i.e.} +when it contains free universal type variables, or locally +defined types.) + +\subsubsection*{Parenthesized types} + +The type expression @"(" typexpr ")"@ denotes the same type as +@typexpr@. + +\subsubsection*{Function types} + +The type expression @typexpr_1 '->' typexpr_2@ denotes the type of +functions mapping arguments of type @typexpr_1@ to results of type +@typexpr_2@. + +@label-name ':' typexpr_1 '->' typexpr_2@ denotes the same function type, but +the argument is labeled @label@. + +@'?' label-name ':' typexpr_1 '->' typexpr_2@ denotes the type of functions +mapping an optional labeled argument of type @typexpr_1@ to results of +type @typexpr_2@. That is, the physical type of the function will be +@typexpr_1 "option" '->' typexpr_2@. + +\subsubsection*{Tuple types} + +The type expression @typexpr_1 '*' \ldots '*' typexpr_n@ +denotes the type of tuples whose elements belong to types @typexpr_1, +\ldots typexpr_n@ respectively. + +\subsubsection*{Constructed types} + +Type constructors with no parameter, as in @typeconstr@, are type +expressions. + +The type expression @typexpr typeconstr@, where @typeconstr@ is a type +constructor with one parameter, denotes the application of the unary type +constructor @typeconstr@ to the type @typexpr@. + +The type expression @(typexpr_1,\ldots,typexpr_n) typeconstr@, where +@typeconstr@ is a type constructor with $n$ parameters, denotes the +application of the $n$-ary type constructor @typeconstr@ to the types +@typexpr_1@ through @typexpr_n@. + +In the type expression @ "_" typeconstr @, the anonymous type expression +@ "_" @ stands in for anonymous type parameters and is equivalent to +@ ("_", \ldots,"_") @ with as many repetitions of "_" as the arity of +@typeconstr@. + +\subsubsection*{Aliased and recursive types} + +\ikwd{as\@\texttt{as}} + +The type expression @typexpr 'as' "'" ident@ denotes the same type as +@typexpr@, and also binds the type variable @ident@ to type @typexpr@ both +in @typexpr@ and in other types. In general the scope of an alias is +the same as for a named type variable, and covers the whole enclosing +definition. If the type variable +@ident@ actually occurs in @typexpr@, a recursive type is created. Recursive +types for which there exists a recursive path that does not contain +an object or polymorphic variant type constructor are rejected, except +when the "-rectypes" mode is selected. + +If @"'" ident@ denotes an explicit polymorphic variable, and @typexpr@ +denotes either an object or polymorphic variant type, the row variable +of @typexpr@ is captured by @"'" ident@, and quantified upon. + +\subsubsection*{Polymorphic variant types} +\ikwd{of\@\texttt{of}} + +\begin{syntax} +polymorphic-variant-type: + '[' tag-spec-first { '|' tag-spec } ']' + | '[>' [ tag-spec ] { '|' tag-spec } ']' + | '[<' ['|'] tag-spec-full { '|' tag-spec-full } + [ '>' {{ '`'tag-name }} ] ']' +; +%\end{syntax} \begin{syntax} +tag-spec-first: + '`'tag-name [ 'of' typexpr ] + | [ typexpr ] '|' tag-spec +; +tag-spec: + '`'tag-name [ 'of' typexpr ] + | typexpr +; +tag-spec-full: + '`'tag-name [ 'of' ['&'] typexpr { '&' typexpr } ] + | typexpr +\end{syntax} + +Polymorphic variant types describe the values a polymorphic variant +may take. + +The first case is an exact variant type: all possible tags are +known, with their associated types, and they can all be present. +Its structure is fully known. + +The second case is an open variant type, describing a polymorphic +variant value: it gives the list of all tags the value could take, +with their associated types. This type is still compatible with a +variant type containing more tags. A special case is the unknown +type, which does not define any tag, and is compatible with any +variant type. + +The third case is a closed variant type. It gives information about +all the possible tags and their associated types, and which tags are +known to potentially appear in values. The exact variant type (first +case) is +just an abbreviation for a closed variant type where all possible tags +are also potentially present. + +In all three cases, tags may be either specified directly in the +@'`'tag-name ["of" typexpr]@ form, or indirectly through a type +expression, which must expand to an +exact variant type, whose tag specifications are inserted in its +place. + +Full specifications of variant tags are only used for non-exact closed +types. They can be understood as a conjunctive type for the argument: +it is intended to have all the types enumerated in the +specification. + +Such conjunctive constraints may be unsatisfiable. In such a case the +corresponding tag may not be used in a value of this type. This +does not mean that the whole type is not valid: one can still use +other available tags. +Conjunctive constraints are mainly intended as output from the type +checker. When they are used in source programs, unsolvable constraints +may cause early failures. + +\subsubsection*{Object types} + +An object type +@'<' [method-type { ';' method-type }] '>'@ +is a record of method types. + +Each method may have an explicit polymorphic type: @{{ "'" ident }} +'.' typexpr@. Explicit polymorphic variables have a local scope, and +an explicit polymorphic type can only be unified to an +equivalent one, where only the order and names of polymorphic +variables may change. + +The type @'<' {method-type ';'} '..' '>'@ is the +type of an object whose method names and types are described by +@method-type_1, \ldots, method-type_n@, and possibly some other +methods represented by the ellipsis. This ellipsis actually is +a special kind of type variable (called {\em row variable} in the +literature) that stands for any number of extra method types. + +\subsubsection*{\#-types} +\label{s:sharp-types} + +The type @'#' class-path@ is a special kind of abbreviation. This +abbreviation unifies with the type of any object belonging to a subclass +of class @class-path@. +% +It is handled in a special way as it usually hides a type variable (an +ellipsis, representing the methods that may be added in a subclass). +In particular, it vanishes when the ellipsis gets instantiated. +% +Each type expression @'#' class-path@ defines a new type variable, so +type @'#' class-path '->' '#' class-path@ is usually not the same as +type @('#' class-path 'as' "'" ident) '->' "'" ident@. +% + +Use of \#-types to abbreviate polymorphic variant types is deprecated. +If @@t@@ is an exact variant type then @"#"@t@@ translates to @"[<" @t@"]"@, +and @"#"@t@"[>" "`"tag_1 \dots"`"tag_k"]"@ translates to +@"[<" @t@ ">" "`"tag_1 \dots"`"tag_k"]"@ + +\subsubsection*{Variant and record types} + +There are no type expressions describing (defined) variant types nor +record types, since those are always named, i.e. defined before use +and referred to by name. Type definitions are described in +section~\ref{s:type-defs}. diff --git a/manual/manual/refman/values.etex b/manual/manual/refman/values.etex new file mode 100644 index 00000000..d5d01f0f --- /dev/null +++ b/manual/manual/refman/values.etex @@ -0,0 +1,97 @@ +\section{Values} +\pdfsection{Values} +%HEVEA\cutname{values.html} + +This section describes the kinds of values that are manipulated by +OCaml programs. + +\subsection{Base values} + +\subsubsection*{Integer numbers} + +Integer values are integer numbers from $-2^{30}$ to $2^{30}-1$, that +is $-1073741824$ to $1073741823$. The implementation may support a +wider range of integer values: on 64-bit platforms, the current +implementation supports integers ranging from $-2^{62}$ to $2^{62}-1$. + +\subsubsection*{Floating-point numbers} + +Floating-point values are numbers in floating-point representation. +The current implementation uses double-precision floating-point +numbers conforming to the IEEE 754 standard, with 53 bits of mantissa +and an exponent ranging from $-1022$ to $1023$. + +\subsubsection*{Characters} + +Character values are represented as 8-bit integers between 0 and 255. +Character codes between 0 and 127 are interpreted following the ASCII +standard. The current implementation interprets character codes +between 128 and 255 following the ISO 8859-1 standard. + +\subsubsection*{Character strings} \label{s:string-val} + +String values are finite sequences of characters. The current +implementation supports strings containing up to $2^{24} - 5$ +characters (16777211 characters); on 64-bit platforms, the limit is +$2^{57} - 9$. + +\subsection{Tuples} + +Tuples of values are written @'('@v@_1',' \ldots',' @v@_n')'@, standing for the +$n$-tuple of values @@v@_1@ to @@v@_n@. The current implementation +supports tuple of up to $2^{22} - 1$ elements (4194303 elements). + +\subsection{Records} + +Record values are labeled tuples of values. The record value written +@'{' field_1 '=' @v@_1';' \ldots';' field_n '=' @v@_n '}'@ associates the value +@@v@_i@ to the record field @field_i@, for $i = 1 \ldots n$. The current +implementation supports records with up to $2^{22} - 1$ fields +(4194303 fields). + +\subsection{Arrays} + +Arrays are finite, variable-sized sequences of values of the same +type. The current implementation supports arrays containing up to +$2^{22} - 1$ elements (4194303 elements) unless the elements are +floating-point numbers (2097151 elements in this case); on 64-bit +platforms, the limit is $2^{54} - 1$ for all arrays. + +\subsection{Variant values} + +Variant values are either a constant constructor, or a non-constant +constructor applied to a number of values. The former case is written +@constr@; the latter case is written @constr '('@v@_1',' ... ',' @v@_n +')'@, where the @@v@_i@ are said to be the arguments of the non-constant +constructor @constr@. The parentheses may be omitted if there is only +one argument. + +The following constants are treated like built-in constant +constructors: +\begin{tableau}{|l|l|}{Constant}{Constructor} +\entree{"false"}{the boolean false} +\entree{"true"}{the boolean true} +\entree{"()"}{the ``unit'' value} +\entree{"[]"}{the empty list} +\end{tableau} + +The current implementation limits each variant type to have at most +246 non-constant constructors and $2^{30}-1$ constant constructors. + +\subsection{Polymorphic variants} + +Polymorphic variants are an alternate form of variant values, not +belonging explicitly to a predefined variant type, and following +specific typing rules. They can be either constant, written +@"`"tag-name@, or non-constant, written @"`"tag-name'('@v@')'@. + +\subsection{Functions} + +Functional values are mappings from values to values. + +\subsection{Objects} + +Objects are composed of a hidden internal state which is a +record of instance variables, and a set of methods for accessing and +modifying these variables. The structure of an object is described by +the toplevel class that created it. diff --git a/manual/manual/style.css b/manual/manual/style.css new file mode 100644 index 00000000..8711a22e --- /dev/null +++ b/manual/manual/style.css @@ -0,0 +1,80 @@ +/* fira-sans-regular - latin */ +@font-face { + font-family: 'Fira Sans'; + font-style: normal; + font-weight: 400; + src: url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.eot'); /* IE9 Compat Modes */ + src: local('Fira Sans Regular'), local('FiraSans-Regular'), + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.woff2') format('woff2'), /* Super Modern Browsers */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.woff') format('woff'), /* Modern Browsers */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.ttf') format('truetype'), /* Safari, Android, iOS */ + url('/pub/docs/manual-ocaml/fonts/fira-sans-v8-latin-regular.svg#FiraSans') format('svg'); /* Legacy iOS */ +} + + +a:visited {color : #416DFF; text-decoration : none; } +a:link {color : #416DFF; text-decoration : none; } +a:hover {color : Black; text-decoration : underline; } +a:active {color : Black; text-decoration : underline; } +.keyword { font-weight : bold ; color : Red } +.keywordsign { color : #C04600 } +.comment { color : Green } +.constructor { color : Blue } +.type { color : #5C6585 } +.string { color : Maroon } +.warning { color : Red ; font-weight : bold } +.info { margin-left : 3em; margin-right : 3em } +.code { color : #465F91 ; } +h1 { font-size : 2rem ; text-align: center; } + +h2, h3, h4, h5, h6, div.h7, div.h8, div.h9 { + font-size: 1.75rem; + border: 1px solid #000; + margin-top: 20px; + margin-bottom: 2px; + text-align: center; + padding: 8px; + font-family: "Fira Sans", sans-serif; + font-weight: normal; +} +h1 { + font-family: "Fira Sans", sans-serif; + padding: 10px; +} + +h2 { background-color: #90BDFF; } +h3 { background-color: #90DDFF; } +h4 { background-color: #90EDFF; } +h5 { background-color: #90FDFF; } +h6 { background-color: #90BDFF; } +div.h7 { background-color: #90DDFF; } +div.h8 { background-color: #F0FFFF; } +div.h9 { background-color: #FFFFFF; } + +.typetable { border-style : hidden } +.indextable { border-style : hidden } +.paramstable { border-style : hidden ; padding: 5pt 5pt} +body { + background-color : #f7f7f7; + font-size: 1rem; + max-width: 800px; + width: 85%; + margin: auto; + padding-bottom: 30px; +} +td { + font-size: 1rem; +} +.navbar { /* previous - up - next */ + position: absolute; + left: 10px; + top: 10px; +} +tr { background-color : #f7f7f7 } +td.typefieldcomment { background-color : #f7f7f7 } +pre { margin-bottom: 4px; white-space: pre-wrap; } +div.sig_block {margin-left: 2em} +ul.info-attributes { list-style: none; margin: 0; padding: 0; } +div.info > p:first-child{ margin-top:0; } +div.info-desc > p:first-child { margin-top:0; margin-bottom:0; } diff --git a/manual/manual/texstuff/.cvsignore b/manual/manual/texstuff/.cvsignore new file mode 100644 index 00000000..84eade83 --- /dev/null +++ b/manual/manual/texstuff/.cvsignore @@ -0,0 +1,13 @@ +*.aux +*.dvi +*.idx +*.ilg +*.ind +*.log +*.toc +*.ipr +*.txt +*.pdf +*.ps +pdfmanual.out +manual.out diff --git a/manual/manual/texstuff/.gitignore b/manual/manual/texstuff/.gitignore new file mode 100644 index 00000000..84eade83 --- /dev/null +++ b/manual/manual/texstuff/.gitignore @@ -0,0 +1,13 @@ +*.aux +*.dvi +*.idx +*.ilg +*.ind +*.log +*.toc +*.ipr +*.txt +*.pdf +*.ps +pdfmanual.out +manual.out diff --git a/manual/manual/textman/.cvsignore b/manual/manual/textman/.cvsignore new file mode 100644 index 00000000..72475845 --- /dev/null +++ b/manual/manual/textman/.cvsignore @@ -0,0 +1,5 @@ +manual.txt +manual.hmanual.kwd +*.haux +*.hind +*.htoc diff --git a/manual/manual/textman/.gitignore b/manual/manual/textman/.gitignore new file mode 100644 index 00000000..72475845 --- /dev/null +++ b/manual/manual/textman/.gitignore @@ -0,0 +1,5 @@ +manual.txt +manual.hmanual.kwd +*.haux +*.hind +*.htoc diff --git a/manual/manual/tutorials/.cvsignore b/manual/manual/tutorials/.cvsignore new file mode 100644 index 00000000..81ccbe71 --- /dev/null +++ b/manual/manual/tutorials/.cvsignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/tutorials/.gitignore b/manual/manual/tutorials/.gitignore new file mode 100644 index 00000000..81ccbe71 --- /dev/null +++ b/manual/manual/tutorials/.gitignore @@ -0,0 +1,2 @@ +*.tex +*.htex diff --git a/manual/manual/tutorials/Makefile b/manual/manual/tutorials/Makefile new file mode 100644 index 00000000..b454374d --- /dev/null +++ b/manual/manual/tutorials/Makefile @@ -0,0 +1,31 @@ +FILES= coreexamples.tex lablexamples.tex objectexamples.tex moduleexamples.tex\ +advexamples.tex polymorphism.tex + +TOPDIR=../../.. +include $(TOPDIR)/Makefile.tools + +LD_PATH="$(TOPDIR)/otherlibs/str:$(TOPDIR)/otherlibs/unix" + +CAMLLATEX=$(SET_LD_PATH) $(OCAMLRUN) ../../tools/caml-tex2 +TEXQUOTE=../../tools/texquote2 + +ALLFILES=$(FILES) + +etex-files: $(ALLFILES) +all: $(ALLFILES) + +clean: + rm -f $(ALLFILES) + +.SUFFIXES: +.SUFFIXES: .etex .tex + +.etex.tex: + @$(CAMLLATEX) -caml "TERM=norepeat $(OCAML)" -n 80 -v false\ + -o $*.caml_tex_error.tex $*.etex\ + && mv $*.caml_tex_error.tex $*.gen.tex\ + && $(TEXQUOTE) < $*.gen.tex > $*.texquote_error.tex\ + && mv $*.texquote_error.tex $*.tex\ + || printf "Failure when generating %s\n" $*.tex + +$(ALLFILES): ../../tools/caml-tex2 $(TEXQUOTE) diff --git a/manual/manual/tutorials/advexamples.etex b/manual/manual/tutorials/advexamples.etex new file mode 100644 index 00000000..2be639d5 --- /dev/null +++ b/manual/manual/tutorials/advexamples.etex @@ -0,0 +1,647 @@ +\chapter{Advanced examples with classes and modules} +\pdfchapterfold{-3}{Tutorial: Advanced examples with classes and modules} +%HEVEA\cutname{advexamples.html} +\label{c:advexamples} + +{\it (Chapter written by Didier Rémy)} + +\bigskip + +\noindent + +In this chapter, we show some larger examples using objects, classes +and modules. We review many of the object features simultaneously on +the example of a bank account. We show how modules taken from the +standard library can be expressed as classes. Lastly, we describe a +programming pattern known as {\em virtual types} through the example +of window managers. + +\section{Extended example: bank accounts} +\pdfsection{Extended example: bank accounts} +\label{ss:bank-accounts} + +In this section, we illustrate most aspects of Object and inheritance +by refining, debugging, and specializing the following +initial naive definition of a simple bank account. (We reuse the +module "Euro" defined at the end of chapter~\ref{c:objectexamples}.) +\begin{caml_eval} +module type MONEY = + sig + type t + class c : float -> + object ('a) + val repr : t + method value : t + method print : unit + method times : float -> 'a + method leq : 'a -> bool + method plus : 'a -> 'a + end + end;; +module Euro : MONEY = + struct + type t = float + class c x = + object (self : 'a) + val repr = x + method value = repr + method print = print_float repr + method times k = {< repr = k *. x >} + method leq (p : 'a) = repr <= p#value + method plus (p : 'a) = {< repr = x +. p#value >} + end + end;; +\end{caml_eval} +\begin{caml_example}{toplevel} +let euro = new Euro.c;; +let zero = euro 0.;; +let neg x = x#times (-1.);; +class account = + object + val mutable balance = zero + method balance = balance + method deposit x = balance <- balance # plus x + method withdraw x = + if x#leq balance then (balance <- balance # plus (neg x); x) else zero + end;; +let c = new account in c # deposit (euro 100.); c # withdraw (euro 50.);; +\end{caml_example} +We now refine this definition with a method to compute interest. +\begin{caml_example}{toplevel} +class account_with_interests = + object (self) + inherit account + method private interest = self # deposit (self # balance # times 0.03) + end;; +\end{caml_example} +We make the method "interest" private, since clearly it should not be +called freely from the outside. Here, it is only made accessible to subclasses +that will manage monthly or yearly updates of the account. + +We should soon fix a bug in the current definition: the deposit method can +be used for withdrawing money by depositing negative amounts. We can +fix this directly: +\begin{caml_example}{toplevel} +class safe_account = + object + inherit account + method deposit x = if zero#leq x then balance <- balance#plus x + end;; +\end{caml_example} +However, the bug might be fixed more safely by the following definition: +\begin{caml_example}{toplevel} +class safe_account = + object + inherit account as unsafe + method deposit x = + if zero#leq x then unsafe # deposit x + else raise (Invalid_argument "deposit") + end;; +\end{caml_example} +In particular, this does not require the knowledge of the implementation of +the method "deposit". + +To keep track of operations, we extend the class with a mutable field +"history" and a private method "trace" to add an operation in the +log. Then each method to be traced is redefined. +\begin{caml_example}{toplevel} +type 'a operation = Deposit of 'a | Retrieval of 'a;; +class account_with_history = + object (self) + inherit safe_account as super + val mutable history = [] + method private trace x = history <- x :: history + method deposit x = self#trace (Deposit x); super#deposit x + method withdraw x = self#trace (Retrieval x); super#withdraw x + method history = List.rev history + end;; +\end{caml_example} +%% \label{ss:bank:initializer} +One may wish to open an account and simultaneously deposit some initial +amount. Although the initial implementation did not address this +requirement, it can be achieved by using an initializer. +\begin{caml_example}{toplevel} +class account_with_deposit x = + object + inherit account_with_history + initializer balance <- x + end;; +\end{caml_example} +A better alternative is: +\begin{caml_example}{toplevel} +class account_with_deposit x = + object (self) + inherit account_with_history + initializer self#deposit x + end;; +\end{caml_example} +Indeed, the latter is safer since the call to "deposit" will automatically +benefit from safety checks and from the trace. +Let's test it: +\begin{caml_example}{toplevel} +let ccp = new account_with_deposit (euro 100.) in +let _balance = ccp#withdraw (euro 50.) in +ccp#history;; +\end{caml_example} +Closing an account can be done with the following polymorphic function: +\begin{caml_example}{toplevel} +let close c = c#withdraw c#balance;; +\end{caml_example} +Of course, this applies to all sorts of accounts. + +Finally, we gather several versions of the account into a module "Account" +abstracted over some currency. +\begin{caml_example*}{toplevel} +let today () = (01,01,2000) (* an approximation *) +module Account (M:MONEY) = + struct + type m = M.c + let m = new M.c + let zero = m 0. + + class bank = + object (self) + val mutable balance = zero + method balance = balance + val mutable history = [] + method private trace x = history <- x::history + method deposit x = + self#trace (Deposit x); + if zero#leq x then balance <- balance # plus x + else raise (Invalid_argument "deposit") + method withdraw x = + if x#leq balance then + (balance <- balance # plus (neg x); self#trace (Retrieval x); x) + else zero + method history = List.rev history + end + + class type client_view = + object + method deposit : m -> unit + method history : m operation list + method withdraw : m -> m + method balance : m + end + + class virtual check_client x = + let y = if (m 100.)#leq x then x + else raise (Failure "Insufficient initial deposit") in + object (self) + initializer self#deposit y + method virtual deposit: m -> unit + end + + module Client (B : sig class bank : client_view end) = + struct + class account x : client_view = + object + inherit B.bank + inherit check_client x + end + + let discount x = + let c = new account x in + if today() < (1998,10,30) then c # deposit (m 100.); c + end + end;; +\end{caml_example*} +This shows the use of modules to group several class definitions that can in +fact be thought of as a single unit. This unit would be provided by a bank +for both internal and external uses. +This is implemented as a functor that abstracts over the currency so that +the same code can be used to provide accounts in different currencies. + +The class "bank" is the {\em real} implementation of the bank account (it +could have been inlined). This is the one that will be used for further +extensions, refinements, etc. Conversely, the client will only be given the client view. +\begin{caml_example*}{toplevel} +module Euro_account = Account(Euro);; +module Client = Euro_account.Client (Euro_account);; +new Client.account (new Euro.c 100.);; +\end{caml_example*} +Hence, the clients do not have direct access to the "balance", nor the +"history" of their own accounts. Their only way to change their balance is +to deposit or withdraw money. It is important to give the clients +a class and not just the ability to create accounts (such as the +promotional "discount" account), so that they can +personalize their account. +For instance, a client may refine the "deposit" and "withdraw" methods +so as to do his own financial bookkeeping, automatically. On the +other hand, the function "discount" is given as such, with no +possibility for further personalization. + +It is important to provide the client's view as a functor +"Client" so that client accounts can still be built after a possible +specialization of the "bank". +The functor "Client" may remain unchanged and be passed +the new definition to initialize a client's view of the extended account. +\begin{caml_example*}{toplevel} +module Investment_account (M : MONEY) = + struct + type m = M.c + module A = Account(M) + + class bank = + object + inherit A.bank as super + method deposit x = + if (new M.c 1000.)#leq x then + print_string "Would you like to invest?"; + super#deposit x + end + + module Client = A.Client + end;; +\end{caml_example*} +\begin{caml_eval} +module Euro_account = Investment_account (Euro);; +module Client = Euro_account.Client (Euro_account);; +new Client.account (new Euro.c 100.);; +\end{caml_eval} +The functor "Client" may also be redefined when some new features of the +account can be given to the client. +\begin{caml_example*}{toplevel} +module Internet_account (M : MONEY) = + struct + type m = M.c + module A = Account(M) + + class bank = + object + inherit A.bank + method mail s = print_string s + end + + class type client_view = + object + method deposit : m -> unit + method history : m operation list + method withdraw : m -> m + method balance : m + method mail : string -> unit + end + + module Client (B : sig class bank : client_view end) = + struct + class account x : client_view = + object + inherit B.bank + inherit A.check_client x + end + end + end;; +\end{caml_example*} +\begin{caml_eval} +module Euro_account = Internet_account (Euro);; +module Client = Euro_account.Client (Euro_account);; +new Client.account (new Euro.c 100.);; +\end{caml_eval} + + +\section{Simple modules as classes} +\pdfsection{Simple modules as classes} +\label{ss:modules-as-classes} + +One may wonder whether it is possible to treat primitive types such as +integers and strings as objects. Although this is usually uninteresting +for integers or strings, there may be some situations where +this is desirable. The class "money" above is such an example. +We show here how to do it for strings. + +\subsection{Strings} +\label{module:string} + +A naive definition of strings as objects could be: +\begin{caml_example}{toplevel} +class ostring s = + object + method get n = String.get s n + method print = print_string s + method escaped = new ostring (String.escaped s) + end;; +\end{caml_example} +However, the method "escaped" returns an object of the class "ostring", +and not an object of the current class. Hence, if the class is further +extended, the method "escaped" will only return an object of the parent +class. +\begin{caml_example}{toplevel} +class sub_string s = + object + inherit ostring s + method sub start len = new sub_string (String.sub s start len) + end;; +\end{caml_example} +As seen in section \ref{ss:binary-methods}, the solution is to use +functional update instead. We need to create an instance variable +containing the representation "s" of the string. +\begin{caml_example}{toplevel} +class better_string s = + object + val repr = s + method get n = String.get repr n + method print = print_string repr + method escaped = {< repr = String.escaped repr >} + method sub start len = {< repr = String.sub s start len >} + end;; +\end{caml_example} +As shown in the inferred type, the methods "escaped" and "sub" now return +objects of the same type as the one of the class. + +Another difficulty is the implementation of the method "concat". +In order to concatenate a string with another string of the same class, +one must be able to access the instance variable externally. Thus, a method +"repr" returning s must be defined. Here is the correct definition of +strings: +\begin{caml_example}{toplevel} +class ostring s = + object (self : 'mytype) + val repr = s + method repr = repr + method get n = String.get repr n + method print = print_string repr + method escaped = {< repr = String.escaped repr >} + method sub start len = {< repr = String.sub s start len >} + method concat (t : 'mytype) = {< repr = repr ^ t#repr >} + end;; +\end{caml_example} +Another constructor of the class string can be defined to return a new +string of a given length: +\begin{caml_example}{toplevel} +class cstring n = ostring (String.make n ' ');; +\end{caml_example} +Here, exposing the representation of strings is probably harmless. We do +could also hide the representation of strings as we hid the currency in the +class "money" of section~\ref{ss:friends}. + +\subsubsection{Stacks} +\label{module:stack} + +There is sometimes an alternative between using modules or classes for +parametric data types. +Indeed, there are situations when the two approaches are quite similar. +For instance, a stack can be straightforwardly implemented as a class: +\begin{caml_example}{toplevel} +exception Empty;; +class ['a] stack = + object + val mutable l = ([] : 'a list) + method push x = l <- x::l + method pop = match l with [] -> raise Empty | a::l' -> l <- l'; a + method clear = l <- [] + method length = List.length l + end;; +\end{caml_example} +However, writing a method for iterating over a stack is more +problematic. A method "fold" would have type +"('b -> 'a -> 'b) -> 'b -> 'b". Here "'a" is the parameter of the stack. +The parameter "'b" is not related to the class "'a stack" but to the +argument that will be passed to the method "fold". +%The intuition is that method "fold" should be polymorphic, i.e. of type +%"All ('a) ('b -> 'a -> 'b) -> 'b -> 'b". +A naive approach is to make "'b" an extra parameter of class "stack": +\begin{caml_example}{toplevel} +class ['a, 'b] stack2 = + object + inherit ['a] stack + method fold f (x : 'b) = List.fold_left f x l + end;; +\end{caml_example} +However, the method "fold" of a given object can only be +applied to functions that all have the same type: +\begin{caml_example}{toplevel} +let s = new stack2;; +s#fold ( + ) 0;; +s;; +\end{caml_example} +A better solution is to use polymorphic methods, which were +introduced in OCaml version 3.05. Polymorphic methods makes +it possible to treat the type variable "'b" in the type of "fold" as +universally quantified, giving "fold" the polymorphic type +"Forall 'b. ('b -> 'a -> 'b) -> 'b -> 'b". +An explicit type declaration on the method "fold" is required, since +the type checker cannot infer the polymorphic type by itself. +\begin{caml_example}{toplevel} +class ['a] stack3 = + object + inherit ['a] stack + method fold : 'b. ('b -> 'a -> 'b) -> 'b -> 'b + = fun f x -> List.fold_left f x l + end;; +\end{caml_example} + +% However, the nice correspondence between the implementations of stacks as +% modules or classes is a very particular case. + +% XXX Maps + +\subsection{Hashtbl} +\label{module:hashtbl} + +A simplified version of object-oriented hash tables should have the +following class type. +\begin{caml_example}{toplevel} +class type ['a, 'b] hash_table = + object + method find : 'a -> 'b + method add : 'a -> 'b -> unit + end;; +\end{caml_example} +A simple implementation, which is quite reasonable for small hash tables is +to use an association list: +\begin{caml_example}{toplevel} +class ['a, 'b] small_hashtbl : ['a, 'b] hash_table = + object + val mutable table = [] + method find key = List.assoc key table + method add key valeur = table <- (key, valeur) :: table + end;; +\end{caml_example} +A better implementation, and one that scales up better, is to use a +true hash table\ldots\ whose elements are small hash tables! +\begin{caml_example}{toplevel} +class ['a, 'b] hashtbl size : ['a, 'b] hash_table = + object (self) + val table = Array.init size (fun i -> new small_hashtbl) + method private hash key = + (Hashtbl.hash key) mod (Array.length table) + method find key = table.(self#hash key) # find key + method add key = table.(self#hash key) # add key + end;; +\end{caml_example} + +% problem + +% solution + +\subsection{Sets} +\label{module:set} + +Implementing sets leads to another difficulty. Indeed, the method +"union" needs to be able to access the internal representation of +another object of the same class. + +This is another instance of friend functions as seen in section +\ref{ss:friends}. Indeed, this is the same mechanism used in the module +"Set" in the absence of objects. + +In the object-oriented version of sets, we only need to add an additional +method "tag" to return the representation of a set. Since sets are +parametric in the type of elements, the method "tag" has a parametric type +"'a tag", concrete within +the module definition but abstract in its signature. +From outside, it will then be guaranteed that two objects with a method "tag" +of the same type will share the same representation. +\begin{caml_example*}{toplevel} +module type SET = + sig + type 'a tag + class ['a] c : + object ('b) + method is_empty : bool + method mem : 'a -> bool + method add : 'a -> 'b + method union : 'b -> 'b + method iter : ('a -> unit) -> unit + method tag : 'a tag + end + end;; +module Set : SET = + struct + let rec merge l1 l2 = + match l1 with + [] -> l2 + | h1 :: t1 -> + match l2 with + [] -> l1 + | h2 :: t2 -> + if h1 < h2 then h1 :: merge t1 l2 + else if h1 > h2 then h2 :: merge l1 t2 + else merge t1 l2 + type 'a tag = 'a list + class ['a] c = + object (_ : 'b) + val repr = ([] : 'a list) + method is_empty = (repr = []) + method mem x = List.exists (( = ) x) repr + method add x = {< repr = merge [x] repr >} + method union (s : 'b) = {< repr = merge repr s#tag >} + method iter (f : 'a -> unit) = List.iter f repr + method tag = repr + end + end;; +\end{caml_example*} + +\section{The subject/observer pattern} +\pdfsection{The subject/observer pattern} +\label{ss:subject-observer} + +The following example, known as the subject/observer pattern, is often +presented in the literature as a difficult inheritance problem with +inter-connected classes. +The general pattern amounts to the definition a pair of two +classes that recursively interact with one another. + +The class "observer" has a distinguished method "notify" that requires +two arguments, a subject and an event to execute an action. +\begin{caml_example}{toplevel} +class virtual ['subject, 'event] observer = + object + method virtual notify : 'subject -> 'event -> unit + end;; +\end{caml_example} +The class "subject" remembers a list of observers in an instance variable, +and has a distinguished method "notify_observers" to broadcast the message +"notify" to all observers with a particular event "e". +\begin{caml_example}{toplevel} +class ['observer, 'event] subject = + object (self) + val mutable observers = ([]:'observer list) + method add_observer obs = observers <- (obs :: observers) + method notify_observers (e : 'event) = + List.iter (fun x -> x#notify self e) observers + end;; +\end{caml_example} +The difficulty usually lies in defining instances of the pattern above +by inheritance. This can be done in a natural and obvious manner in +OCaml, as shown on the following example manipulating windows. +\begin{caml_example}{toplevel} +type event = Raise | Resize | Move;; +let string_of_event = function + Raise -> "Raise" | Resize -> "Resize" | Move -> "Move";; +let count = ref 0;; +class ['observer] window_subject = + let id = count := succ !count; !count in + object (self) + inherit ['observer, event] subject + val mutable position = 0 + method identity = id + method move x = position <- position + x; self#notify_observers Move + method draw = Printf.printf "{Position = %d}\n" position; + end;; +class ['subject] window_observer = + object + inherit ['subject, event] observer + method notify s e = s#draw + end;; +\end{caml_example} +As can be expected, the type of "window" is recursive. +\begin{caml_example}{toplevel} +let window = new window_subject;; +\end{caml_example} +However, the two classes of "window_subject" and "window_observer" are not +mutually recursive. +\begin{caml_example}{toplevel} +let window_observer = new window_observer;; +window#add_observer window_observer;; +window#move 1;; +\end{caml_example} + +Classes "window_observer" and "window_subject" can still be extended by +inheritance. For instance, one may enrich the "subject" with new +behaviors and refine the behavior of the observer. +\begin{caml_example}{toplevel} +class ['observer] richer_window_subject = + object (self) + inherit ['observer] window_subject + val mutable size = 1 + method resize x = size <- size + x; self#notify_observers Resize + val mutable top = false + method raise = top <- true; self#notify_observers Raise + method draw = Printf.printf "{Position = %d; Size = %d}\n" position size; + end;; +class ['subject] richer_window_observer = + object + inherit ['subject] window_observer as super + method notify s e = if e <> Raise then s#raise; super#notify s e + end;; +\end{caml_example} +We can also create a different kind of observer: +\begin{caml_example}{toplevel} +class ['subject] trace_observer = + object + inherit ['subject, event] observer + method notify s e = + Printf.printf + "\n" s#identity (string_of_event e) + end;; +\end{caml_example} +and attach several observers to the same object: +\begin{caml_example}{toplevel} +let window = new richer_window_subject;; +window#add_observer (new richer_window_observer);; +window#add_observer (new trace_observer);; +window#move 1; window#resize 2;; +\end{caml_example} + +%\subsection{Classes used as modules with inheritance} +% +% to be filled for next release... +% +% an example of stateless objects used to provide inheritance in modules +% + + +% LocalWords: objectexamples bsection init caml val int Oo succ incr ref +% LocalWords: typecheck leq bool cp eval sig struct ABSPOINT Abspoint iter neg +% LocalWords: accu mem rec repr Euro euro ccp inlined ostring len concat OCaml diff --git a/manual/manual/tutorials/coreexamples.etex b/manual/manual/tutorials/coreexamples.etex new file mode 100644 index 00000000..396265b0 --- /dev/null +++ b/manual/manual/tutorials/coreexamples.etex @@ -0,0 +1,761 @@ +\chapter{The core language} \label{c:core-xamples} +\pdfchapterfold{-9}{Tutorial: The core language} +%HEVEA\cutname{coreexamples.html} + +This part of the manual is a tutorial introduction to the +OCaml language. A good familiarity with programming in a conventional +languages (say, C or Java) is assumed, but no prior exposure to +functional languages is required. The present chapter introduces the +core language. Chapter~\ref{c:moduleexamples} deals with the +module system, chapter~\ref{c:objectexamples} with the +object-oriented features, chapter~\ref{c:labl-examples} with +extensions to the core language (labeled arguments and polymorphic +variants), and chapter~\ref{c:advexamples} gives some advanced examples. + +\section{Basics} +\pdfsection{Basics} + +For this overview of OCaml, we use the interactive system, which +is started by running "ocaml" from the Unix shell, or by launching the +"OCamlwin.exe" application under Windows. This tutorial is presented +as the transcript of a session with the interactive system: +lines starting with "#" represent user input; the system responses are +printed below, without a leading "#". + +Under the interactive system, the user types OCaml phrases terminated +by ";;" in response to the "#" prompt, and the system compiles them +on the fly, executes them, and prints the outcome of evaluation. +Phrases are either simple expressions, or "let" definitions of +identifiers (either values or functions). +\begin{caml_example}{toplevel} +1+2*3;; +let pi = 4.0 *. atan 1.0;; +let square x = x *. x;; +square (sin pi) +. square (cos pi);; +\end{caml_example} +The OCaml system computes both the value and the type for +each phrase. Even function parameters need no explicit type declaration: +the system infers their types from their usage in the +function. Notice also that integers and floating-point numbers are +distinct types, with distinct operators: "+" and "*" operate on +integers, but "+." and "*." operate on floats. +\begin{caml_example}{toplevel}[error] +1.0 * 2;; +\end{caml_example} + +Recursive functions are defined with the "let rec" binding: +\begin{caml_example}{toplevel} +let rec fib n = + if n < 2 then n else fib (n-1) + fib (n-2);; +fib 10;; +\end{caml_example} + +\section{Data types} +\pdfsection{Data types} + +In addition to integers and floating-point numbers, OCaml offers the +usual basic data types: booleans, characters, and immutable character strings. +\begin{caml_example}{toplevel} +(1 < 2) = false;; +'a';; +"Hello world";; +\end{caml_example} + +Predefined data structures include tuples, arrays, and lists. There are also +general mechanisms for defining your own data structures, such as records and +variants, which will be covered in more detail later; for now, we concentrate +on lists. Lists are either given in extension as a bracketed list of +semicolon-separated elements, or built from the empty list "[]" +(pronounce ``nil'') by adding elements in front using the "::" +(``cons'') operator. +\begin{caml_example}{toplevel} +let l = ["is"; "a"; "tale"; "told"; "etc."];; +"Life" :: l;; +\end{caml_example} +As with all other OCaml data structures, lists do not need to be +explicitly allocated and deallocated from memory: all memory +management is entirely automatic in OCaml. Similarly, there is no +explicit handling of pointers: the OCaml compiler silently introduces +pointers where necessary. + +As with most OCaml data structures, inspecting and destructuring lists +is performed by pattern-matching. List patterns have exactly the same +form as list expressions, with identifiers representing unspecified +parts of the list. As an example, here is insertion sort on a list: +\begin{caml_example}{toplevel} +let rec sort lst = + match lst with + [] -> [] + | head :: tail -> insert head (sort tail) +and insert elt lst = + match lst with + [] -> [elt] + | head :: tail -> if elt <= head then elt :: lst else head :: insert elt tail +;; +sort l;; +\end{caml_example} + +The type inferred for "sort", "'a list -> 'a list", means that "sort" +can actually apply to lists of any type, and returns a list of the +same type. The type "'a" is a {\em type variable}, and stands for any +given type. The reason why "sort" can apply to lists of any type is +that the comparisons ("=", "<=", etc.) are {\em polymorphic} in OCaml: +they operate between any two values of the same type. This makes +"sort" itself polymorphic over all list types. +\begin{caml_example}{toplevel} +sort [6;2;5;3];; +sort [3.14; 2.718];; +\end{caml_example} + +The "sort" function above does not modify its input list: it builds +and returns a new list containing the same elements as the input list, +in ascending order. There is actually no way in OCaml to modify +a list in-place once it is built: we say that lists are {\em immutable} +data structures. Most OCaml data structures are immutable, but a few +(most notably arrays) are {\em mutable}, meaning that they can be +modified in-place at any time. + +The OCaml notation for the type of a function with multiple arguments is \\ +"arg1_type -> arg2_type -> ... -> return_type". For example, +the type inferred for "insert", "'a -> 'a list -> 'a list", means that "insert" +takes two arguments, an element of any type "'a" and a list with elements of +the same type "'a" and returns a list of the same type. +\section{Functions as values} +\pdfsection{Functions as values} + +OCaml is a functional language: functions in the full mathematical +sense are supported and can be passed around freely just as any other +piece of data. For instance, here is a "deriv" function that takes any +float function as argument and returns an approximation of its +derivative function: +\begin{caml_example}{toplevel} +let deriv f dx = function x -> (f (x +. dx) -. f x) /. dx;; +let sin' = deriv sin 1e-6;; +sin' pi;; +\end{caml_example} +Even function composition is definable: +\begin{caml_example}{toplevel} +let compose f g = function x -> f (g x);; +let cos2 = compose square cos;; +\end{caml_example} + +Functions that take other functions as arguments are called +``functionals'', or ``higher-order functions''. Functionals are +especially useful to provide iterators or similar generic operations +over a data structure. For instance, the standard OCaml library +provides a "List.map" functional that applies a given function to each +element of a list, and returns the list of the results: +\begin{caml_example}{toplevel} +List.map (function n -> n * 2 + 1) [0;1;2;3;4];; +\end{caml_example} +This functional, along with a number of other list and array +functionals, is predefined because it is often useful, but there is +nothing magic with it: it can easily be defined as follows. +\begin{caml_example}{toplevel} +let rec map f l = + match l with + [] -> [] + | hd :: tl -> f hd :: map f tl;; +\end{caml_example} + +\section{Records and variants} +\pdfsection{Records and variants} +\label{s:tut-recvariants} + +User-defined data structures include records and variants. Both are +defined with the "type" declaration. Here, we declare a record type to +represent rational numbers. +\begin{caml_example}{toplevel} +type ratio = {num: int; denom: int};; +let add_ratio r1 r2 = + {num = r1.num * r2.denom + r2.num * r1.denom; + denom = r1.denom * r2.denom};; +add_ratio {num=1; denom=3} {num=2; denom=5};; +\end{caml_example} +Record fields can also be accessed through pattern-matching: +\begin{caml_example}{toplevel} +let integer_part r = + match r with + {num=num; denom=denom} -> num / denom;; +\end{caml_example} +Since there is only one case in this pattern matching, it +is safe to expand directly the argument "r" in a record pattern: +\begin{caml_example}{toplevel} +let integer_part {num=num; denom=denom} = num / denom;; +\end{caml_example} +Unneeded fields can be omitted: +\begin{caml_example}{toplevel} +let get_denom {denom=denom} = denom;; +\end{caml_example} +Optionally, missing fields can be made explicit by ending the list of +fields with a trailing wildcard "_":: +\begin{caml_example}{toplevel} +let get_num {num=num; _ } = num;; +\end{caml_example} +When both sides of the "=" sign are the same, it is possible to avoid +repeating the field name by eliding the "=field" part: +\begin{caml_example}{toplevel} +let integer_part {num; denom} = num / denom;; +\end{caml_example} +This short notation for fields also works when constructing records: +\begin{caml_example}{toplevel} +let ratio num denom = {num; denom};; +\end{caml_example} +At last, it is possible to update few fields of a record at once: +\begin{caml_example}{toplevel} + let integer_product integer ratio = { ratio with num = integer * ratio.num };; +\end{caml_example} +With this functional update notation, the record on the left-hand side +of "with" is copied except for the fields on the right-hand side which +are updated. + +The declaration of a variant type lists all possible forms for values +of that type. Each case is identified by a name, called a constructor, +which serves both for constructing values of the variant type and +inspecting them by pattern-matching. Constructor names are capitalized +to distinguish them from variable names (which must start with a +lowercase letter). For instance, here is a variant +type for doing mixed arithmetic (integers and floats): +\begin{caml_example}{toplevel} +type number = Int of int | Float of float | Error;; +\end{caml_example} +This declaration expresses that a value of type "number" is either an +integer, a floating-point number, or the constant "Error" representing +the result of an invalid operation (e.g. a division by zero). + +Enumerated types are a special case of variant types, where all +alternatives are constants: +\begin{caml_example}{toplevel} +type sign = Positive | Negative;; +let sign_int n = if n >= 0 then Positive else Negative;; +\end{caml_example} + +To define arithmetic operations for the "number" type, we use +pattern-matching on the two numbers involved: +\begin{caml_example}{toplevel} +let add_num n1 n2 = + match (n1, n2) with + (Int i1, Int i2) -> + (* Check for overflow of integer addition *) + if sign_int i1 = sign_int i2 && sign_int (i1 + i2) <> sign_int i1 + then Float(float i1 +. float i2) + else Int(i1 + i2) + | (Int i1, Float f2) -> Float(float i1 +. f2) + | (Float f1, Int i2) -> Float(f1 +. float i2) + | (Float f1, Float f2) -> Float(f1 +. f2) + | (Error, _) -> Error + | (_, Error) -> Error;; +add_num (Int 123) (Float 3.14159);; +\end{caml_example} + +Another interesting example of variant type is the built-in +"'a option" type which represents either a value of type "'a" or an +absence of value: +\begin{caml_example}{toplevel} +type 'a option = Some of 'a | None;; +\end{caml_example} +This type is particularly useful when defining function that can +fail in common situations, for instance +\begin{caml_example}{toplevel} +let safe_square_root x = if x > 0. then Some(sqrt x) else None;; +\end{caml_example} + +The most common usage of variant types is to describe recursive data +structures. Consider for example the type of binary trees: +\begin{caml_example}{toplevel} +type 'a btree = Empty | Node of 'a * 'a btree * 'a btree;; +\end{caml_example} +This definition reads as follows: a binary tree containing values of +type "'a" (an arbitrary type) is either empty, or is a node containing +one value of type "'a" and two subtrees also containing values of type +"'a", that is, two "'a btree". + +Operations on binary trees are naturally expressed as recursive functions +following the same structure as the type definition itself. For +instance, here are functions performing lookup and insertion in +ordered binary trees (elements increase from left to right): +\begin{caml_example}{toplevel} +let rec member x btree = + match btree with + Empty -> false + | Node(y, left, right) -> + if x = y then true else + if x < y then member x left else member x right;; +let rec insert x btree = + match btree with + Empty -> Node(x, Empty, Empty) + | Node(y, left, right) -> + if x <= y then Node(y, insert x left, right) + else Node(y, left, insert x right);; +\end{caml_example} + + +\subsection{Record and variant disambiguation} +( This subsection can be skipped on the first reading ) + +Astute readers may have wondered what happens when two or more record +fields or constructors share the same name + +\begin{caml_example*}{toplevel} +type first_record = { x:int; y:int; z:int } +type middle_record = { x:int; z:int } +type last_record = { x:int };; +type first_variant = A | B | C +type last_variant = A;; +\end{caml_example*} + +The answer is that when confronted with multiple options, OCaml tries to +use locally available information to disambiguate between the various fields +and constructors. First, if the type of the record or variant is known, +OCaml can pick unambiguously the corresponding field or constructor. +For instance: + +\begin{caml_example}{toplevel} +let look_at_x_then_z (r:first_record) = + let x = r.x in + x + r.z;; +let permute (x:first_variant) = match x with + | A -> (B:first_variant) + | B -> A + | C -> C;; +type wrapped = First of first_record +let f (First r) = r, r.x;; +\end{caml_example} + +In the first example, "(r:first_record)" is an explicit annotation +telling OCaml that the type of "r" is "first_record". With this +annotation, Ocaml knows that "r.x" refers to the "x" field of the first +record type. Similarly, the type annotation in the second example makes +it clear to OCaml that the constructors "A", "B" and "C" come from the +first variant type. Contrarily, in the last example, OCaml has inferred +by itself that the type of "r" can only be "first_record" and there are +no needs for explicit type annotations. + +Those explicit type annotations can in fact be used anywhere. +Most of the time they are unnecessary, but they are useful to guide +disambiguation, to debug unexpected type errors, or combined with some +of the more advanced features of OCaml described in later chapters. + +Secondly, for records, OCaml can also deduce the right record type by +looking at the whole set of fields used in a expression or pattern: +\begin{caml_example}{toplevel} +let project_and_rotate {x;y; _ } = { x= - y; y = x ; z = 0} ;; +\end{caml_example} +Since the fields "x" and "y" can only appear simultaneously in the first +record type, OCaml infers that the type of "project_and_rotate" is +"first_record -> first_record". + +In last resort, if there is not enough information to disambiguate between +different fields or constructors, Ocaml picks the last defined type +amongst all locally valid choices: + +\begin{caml_example}{toplevel} +let look_at_xz {x;z} = x;; +\end{caml_example} + +Here, OCaml has inferred that the possible choices for the type of +"{x;z}" are "first_record" and "middle_record", since the type +"last_record" has no field "z". Ocaml then picks the type "middle_record" +as the last defined type between the two possibilities. + +Beware that this last resort disambiguation is local: once Ocaml has +chosen a disambiguation, it sticks to this choice, even if it leads to +an ulterior type error: + +\begin{caml_example}{toplevel}[error] +let look_at_x_then_y r = + let x = r.x in (* Ocaml deduces [r: last_record] *) + x + r.y;; +let is_a_or_b x = match x with + | A -> true (* OCaml infers [x: last_variant] *) + | B -> true;; +\end{caml_example} + +Moreover, being the last defined type is a quite unstable position that +may change surreptitiously after adding or moving around a type +definition, or after opening a module (see chapter \ref{c:moduleexamples}). +Consequently, adding explicit type annotations to guide disambiguation is +more robust than relying on the last defined type disambiguation. + +\section{Imperative features} +\pdfsection{Imperative features} + +Though all examples so far were written in purely applicative style, +OCaml is also equipped with full imperative features. This includes the +usual "while" and "for" loops, as well as mutable data structures such +as arrays. Arrays are either created by listing semicolon-separated element +values between "[|" and "|]" brackets, or allocated and initialized with the +"Array.make" function, then filled up later by assignments. For instance, the +function below sums two vectors (represented as float arrays) componentwise. +\begin{caml_example}{toplevel} +let add_vect v1 v2 = + let len = min (Array.length v1) (Array.length v2) in + let res = Array.make len 0.0 in + for i = 0 to len - 1 do + res.(i) <- v1.(i) +. v2.(i) + done; + res;; +add_vect [| 1.0; 2.0 |] [| 3.0; 4.0 |];; +\end{caml_example} + +Record fields can also be modified by assignment, provided they are +declared "mutable" in the definition of the record type: +\begin{caml_example}{toplevel} +type mutable_point = { mutable x: float; mutable y: float };; +let translate p dx dy = + p.x <- p.x +. dx; p.y <- p.y +. dy;; +let mypoint = { x = 0.0; y = 0.0 };; +translate mypoint 1.0 2.0;; +mypoint;; +\end{caml_example} + +OCaml has no built-in notion of variable -- identifiers whose current +value can be changed by assignment. (The "let" binding is not an +assignment, it introduces a new identifier with a new scope.) +However, the standard library provides references, which are mutable +indirection cells, with operators "!" to fetch +the current contents of the reference and ":=" to assign the contents. +Variables can then be emulated by "let"-binding a reference. For +instance, here is an in-place insertion sort over arrays: +\begin{caml_example}{toplevel} +let insertion_sort a = + for i = 1 to Array.length a - 1 do + let val_i = a.(i) in + let j = ref i in + while !j > 0 && val_i < a.(!j - 1) do + a.(!j) <- a.(!j - 1); + j := !j - 1 + done; + a.(!j) <- val_i + done;; +\end{caml_example} + +References are also useful to write functions that maintain a current +state between two calls to the function. For instance, the following +pseudo-random number generator keeps the last returned number in a +reference: +\begin{caml_example}{toplevel} +let current_rand = ref 0;; +let random () = + current_rand := !current_rand * 25713 + 1345; + !current_rand;; +\end{caml_example} + +Again, there is nothing magical with references: they are implemented as +a single-field mutable record, as follows. +\begin{caml_example}{toplevel} +type 'a ref = { mutable contents: 'a };; +let ( ! ) r = r.contents;; +let ( := ) r newval = r.contents <- newval;; +\end{caml_example} + +In some special cases, you may need to store a polymorphic function in +a data structure, keeping its polymorphism. Doing this requires +user-provided type annotations, since polymorphism is only introduced +automatically for global definitions. However, you can explicitly give +polymorphic types to record fields. +\begin{caml_example}{toplevel} +type idref = { mutable id: 'a. 'a -> 'a };; +let r = {id = fun x -> x};; +let g s = (s.id 1, s.id true);; +r.id <- (fun x -> print_string "called id\n"; x);; +g r;; +\end{caml_example} + +\section{Exceptions} +\pdfsection{Exceptions} + +OCaml provides exceptions for signalling and handling exceptional +conditions. Exceptions can also be used as a general-purpose non-local +control structure, although this should not be overused since it can +make the code harder to understand. Exceptions are declared with the +"exception" construct, and signalled with the "raise" operator. For instance, +the function below for taking the head of a list uses an exception to +signal the case where an empty list is given. +\begin{caml_example}{toplevel} +exception Empty_list;; +let head l = + match l with + [] -> raise Empty_list + | hd :: tl -> hd;; +head [1;2];; +head [];; +\end{caml_example} + +Exceptions are used throughout the standard library to signal cases +where the library functions cannot complete normally. For instance, +the "List.assoc" function, which returns the data associated with a +given key in a list of (key, data) pairs, raises the predefined +exception "Not_found" when the key does not appear in the list: +\begin{caml_example}{toplevel} +List.assoc 1 [(0, "zero"); (1, "one")];; +List.assoc 2 [(0, "zero"); (1, "one")];; +\end{caml_example} + +Exceptions can be trapped with the "try"\ldots"with" construct: +\begin{caml_example}{toplevel} +let name_of_binary_digit digit = + try + List.assoc digit [0, "zero"; 1, "one"] + with Not_found -> + "not a binary digit";; +name_of_binary_digit 0;; +name_of_binary_digit (-1);; +\end{caml_example} + +The "with" part does pattern matching on the +exception value with the same syntax and behavior as "match". Thus, +several exceptions can be caught by one +"try"\ldots"with" construct. Also, finalization can be performed by +trapping all exceptions, performing the finalization, then re-raising +the exception: +\begin{caml_example}{toplevel} +let temporarily_set_reference ref newval funct = + let oldval = !ref in + try + ref := newval; + let res = funct () in + ref := oldval; + res + with x -> + ref := oldval; + raise x;; +\end{caml_example} + +\section{Symbolic processing of expressions} +\pdfsection{Symbolic processing of expressions} + +We finish this introduction with a more complete example +representative of the use of OCaml for symbolic processing: formal +manipulations of arithmetic expressions containing variables. The +following variant type describes the expressions we shall manipulate: +\begin{caml_example}{toplevel} +type expression = + Const of float + | Var of string + | Sum of expression * expression (* e1 + e2 *) + | Diff of expression * expression (* e1 - e2 *) + | Prod of expression * expression (* e1 * e2 *) + | Quot of expression * expression (* e1 / e2 *) +;; +\end{caml_example} + +We first define a function to evaluate an expression given an +environment that maps variable names to their values. For simplicity, +the environment is represented as an association list. +\begin{caml_example}{toplevel} +exception Unbound_variable of string;; +let rec eval env exp = + match exp with + Const c -> c + | Var v -> + (try List.assoc v env with Not_found -> raise (Unbound_variable v)) + | Sum(f, g) -> eval env f +. eval env g + | Diff(f, g) -> eval env f -. eval env g + | Prod(f, g) -> eval env f *. eval env g + | Quot(f, g) -> eval env f /. eval env g;; +eval [("x", 1.0); ("y", 3.14)] (Prod(Sum(Var "x", Const 2.0), Var "y"));; +\end{caml_example} + +Now for a real symbolic processing, we define the derivative of an +expression with respect to a variable "dv": +\begin{caml_example}{toplevel} +let rec deriv exp dv = + match exp with + Const c -> Const 0.0 + | Var v -> if v = dv then Const 1.0 else Const 0.0 + | Sum(f, g) -> Sum(deriv f dv, deriv g dv) + | Diff(f, g) -> Diff(deriv f dv, deriv g dv) + | Prod(f, g) -> Sum(Prod(f, deriv g dv), Prod(deriv f dv, g)) + | Quot(f, g) -> Quot(Diff(Prod(deriv f dv, g), Prod(f, deriv g dv)), + Prod(g, g)) +;; +deriv (Quot(Const 1.0, Var "x")) "x";; +\end{caml_example} + +\section{Pretty-printing} +\pdfsection{Pretty-printing} + +As shown in the examples above, the internal representation (also +called {\em abstract syntax\/}) of expressions quickly becomes hard to +read and write as the expressions get larger. We need a printer and a +parser to go back and forth between the abstract syntax and the {\em +concrete syntax}, which in the case of expressions is the familiar +algebraic notation (e.g. "2*x+1"). + +For the printing function, we take into account the usual precedence +rules (i.e. "*" binds tighter than "+") to avoid printing unnecessary +parentheses. To this end, we maintain the current operator precedence +and print parentheses around an operator only if its precedence is +less than the current precedence. +\begin{caml_example}{toplevel} +let print_expr exp = + (* Local function definitions *) + let open_paren prec op_prec = + if prec > op_prec then print_string "(" in + let close_paren prec op_prec = + if prec > op_prec then print_string ")" in + let rec print prec exp = (* prec is the current precedence *) + match exp with + Const c -> print_float c + | Var v -> print_string v + | Sum(f, g) -> + open_paren prec 0; + print 0 f; print_string " + "; print 0 g; + close_paren prec 0 + | Diff(f, g) -> + open_paren prec 0; + print 0 f; print_string " - "; print 1 g; + close_paren prec 0 + | Prod(f, g) -> + open_paren prec 2; + print 2 f; print_string " * "; print 2 g; + close_paren prec 2 + | Quot(f, g) -> + open_paren prec 2; + print 2 f; print_string " / "; print 3 g; + close_paren prec 2 + in print 0 exp;; +let e = Sum(Prod(Const 2.0, Var "x"), Const 1.0);; +print_expr e; print_newline ();; +print_expr (deriv e "x"); print_newline ();; +\end{caml_example} + +%%%%%%%%%%% Should be moved to the camlp4 documentation. +%% Parsing (transforming concrete syntax into abstract syntax) is usually +%% more delicate. OCaml offers several tools to help write parsers: +%% on the one hand, OCaml versions of the lexer generator Lex and the +%% parser generator Yacc (see chapter~\ref{c:ocamlyacc}), which handle +%% LALR(1) languages using push-down automata; on the other hand, a +%% predefined type of streams (of characters or tokens) and +%% pattern-matching over streams, which facilitate the writing of +%% recursive-descent parsers for LL(1) languages. An example using +%% "ocamllex" and "ocamlyacc" is given in +%% chapter~\ref{c:ocamlyacc}. Here, we will use stream parsers. +%% The syntactic support for stream parsers is provided by the Camlp4 +%% preprocessor, which can be loaded into the interactive toplevel via +%% the "#load" directives below. +%% +%% \begin{caml_example} +%% #load "dynlink.cma";; +%% #load "camlp4o.cma";; +%% open Genlex;; +%% let lexer = make_lexer ["("; ")"; "+"; "-"; "*"; "/"];; +%% \end{caml_example} +%% For the lexical analysis phase (transformation of the input text into +%% a stream of tokens), we use a ``generic'' lexer provided in the +%% standard library module "Genlex". The "make_lexer" function takes a +%% list of keywords and returns a lexing function that ``tokenizes'' an +%% input stream of characters. Tokens are either identifiers, keywords, +%% or literals (integer, floats, characters, strings). Whitespace and +%% comments are skipped. +%% \begin{caml_example} +%% let token_stream = lexer (Stream.of_string "1.0 +x");; +%% Stream.next token_stream;; +%% Stream.next token_stream;; +%% Stream.next token_stream;; +%% \end{caml_example} +%% +%% The parser itself operates by pattern-matching on the stream of +%% tokens. As usual with recursive descent parsers, we use several +%% intermediate parsing functions to reflect the precedence and +%% associativity of operators. Pattern-matching over streams is more +%% powerful than on regular data structures, as it allows recursive calls +%% to parsing functions inside the patterns, for matching sub-components of +%% the input stream. See the Camlp4 documentation for more details. +%% +%% %Already said above +%% %In order to use stream parsers at toplevel, we must first load the +%% %"camlp4" preprocessor. +%% %\begin{caml_example} +%% %#load"camlp4o.cma";; +%% %\end{caml_example} +%% %Then we are ready to define our parser. +%% \begin{caml_example}{toplevel} +%% let rec parse_expr = parser +%% [< e1 = parse_mult; e = parse_more_adds e1 >] -> e +%% and parse_more_adds e1 = parser +%% [< 'Kwd "+"; e2 = parse_mult; e = parse_more_adds (Sum(e1, e2)) >] -> e +%% | [< 'Kwd "-"; e2 = parse_mult; e = parse_more_adds (Diff(e1, e2)) >] -> e +%% | [< >] -> e1 +%% and parse_mult = parser +%% [< e1 = parse_simple; e = parse_more_mults e1 >] -> e +%% and parse_more_mults e1 = parser +%% [< 'Kwd "*"; e2 = parse_simple; e = parse_more_mults (Prod(e1, e2)) >] -> e +%% | [< 'Kwd "/"; e2 = parse_simple; e = parse_more_mults (Quot(e1, e2)) >] -> e +%% | [< >] -> e1 +%% and parse_simple = parser +%% [< 'Ident s >] -> Var s +%% | [< 'Int i >] -> Const(float i) +%% | [< 'Float f >] -> Const f +%% | [< 'Kwd "("; e = parse_expr; 'Kwd ")" >] -> e;; +%% let parse_expression = parser [< e = parse_expr; _ = Stream.empty >] -> e;; +%% \end{caml_example} +%% +%% Composing the lexer and parser, we finally obtain a function to read +%% an expression from a character string: +%% \begin{caml_example} +%% let read_expression s = parse_expression (lexer (Stream.of_string s));; +%% read_expression "2*(x+y)";; +%% \end{caml_example} +%% A small puzzle: why do we get different results in the following two +%% examples? +%% \begin{caml_example} +%% read_expression "x - 1";; +%% read_expression "x-1";; +%% \end{caml_example} +%% Answer: the generic lexer provided by "Genlex" recognizes negative +%% integer literals as one integer token. Hence, "x-1" is read as +%% the token "Ident \"x\"" followed by the token "Int(-1)"; this sequence +%% does not match any of the parser rules. On the other hand, +%% the second space in "x - 1" causes the lexer to return the three +%% expected tokens: "Ident \"x\"", then "Kwd \"-\"", then "Int(1)". + +\section{Standalone OCaml programs} +\pdfsection{Standalone OCaml programs} + +All examples given so far were executed under the interactive system. +OCaml code can also be compiled separately and executed +non-interactively using the batch compilers "ocamlc" and "ocamlopt". +The source code must be put in a file with extension ".ml". It +consists of a sequence of phrases, which will be evaluated at runtime +in their order of appearance in the source file. Unlike in interactive +mode, types and values are not printed automatically; the program must +call printing functions explicitly to produce some output. The ";;" used +in the interactive examples is not required in +source files created for use with OCaml compilers, but can be helpful +to mark the end of a top-level expression unambiguously even when +there are syntax errors. +Here is a +sample standalone program to print Fibonacci numbers: +\begin{verbatim} +(* File fib.ml *) +let rec fib n = + if n < 2 then 1 else fib (n-1) + fib (n-2);; +let main () = + let arg = int_of_string Sys.argv.(1) in + print_int (fib arg); + print_newline (); + exit 0;; +main ();; +\end{verbatim} +"Sys.argv" is an array of strings containing the command-line +parameters. "Sys.argv.(1)" is thus the first command-line parameter. +The program above is compiled and executed with the following shell +commands: +\begin{verbatim} +$ ocamlc -o fib fib.ml +$ ./fib 10 +89 +$ ./fib 20 +10946 +\end{verbatim} + +More complex standalone OCaml programs are typically composed of +multiple source files, and can link with precompiled libraries. +Chapters~\ref{c:camlc} and~\ref{c:nativecomp} explain how to use the +batch compilers "ocamlc" and "ocamlopt". Recompilation of +multi-file OCaml projects can be automated using third-party +build systems, such as the +\href{https://github.com/ocaml/ocamlbuild/}{ocamlbuild} +compilation manager. diff --git a/manual/manual/tutorials/lablexamples.etex b/manual/manual/tutorials/lablexamples.etex new file mode 100644 index 00000000..d4ceef17 --- /dev/null +++ b/manual/manual/tutorials/lablexamples.etex @@ -0,0 +1,491 @@ +\chapter{Labels and variants} \label{c:labl-examples} +\pdfchapterfold{-2}{Tutorial: Labels and variants} +%HEVEA\cutname{lablexamples.html} +{\it (Chapter written by Jacques Garrigue)} + +\bigskip + +\noindent This chapter gives an overview of the new features in +OCaml 3: labels, and polymorphic variants. + +\section{Labels} +\pdfsection{Labels} + +If you have a look at modules ending in "Labels" in the standard +library, you will see that function types have annotations you did not +have in the functions you defined yourself. + +\begin{caml_example}{toplevel} +ListLabels.map;; +StringLabels.sub;; +\end{caml_example} + +Such annotations of the form "name:" are called {\em labels}. They are +meant to document the code, allow more checking, and give more +flexibility to function application. +You can give such names to arguments in your programs, by prefixing them +with a tilde "~". + +\begin{caml_example}{toplevel} +let f ~x ~y = x - y;; +let x = 3 and y = 2 in f ~x ~y;; +\end{caml_example} + +When you want to use distinct names for the variable and the label +appearing in the type, you can use a naming label of the form +"~name:". This also applies when the argument is not a variable. + +\begin{caml_example}{toplevel} +let f ~x:x1 ~y:y1 = x1 - y1;; +f ~x:3 ~y:2;; +\end{caml_example} + +Labels obey the same rules as other identifiers in OCaml, that is you +cannot use a reserved keyword (like "in" or "to") as label. + +Formal parameters and arguments are matched according to their +respective labels\footnote{This correspond to the commuting label mode +of Objective Caml 3.00 through 3.02, with some additional flexibility +on total applications. The so-called classic mode ("-nolabels" +options) is now deprecated for normal use.}, the absence of label +being interpreted as the empty label. +% +This allows commuting arguments in applications. One can also +partially apply a function on any argument, creating a new function of +the remaining parameters. + +\begin{caml_example}{toplevel} +let f ~x ~y = x - y;; +f ~y:2 ~x:3;; +ListLabels.fold_left;; +ListLabels.fold_left [1;2;3] ~init:0 ~f:( + );; +ListLabels.fold_left ~init:0;; +\end{caml_example} + +If several arguments of a function bear the same label (or no label), +they will not commute among themselves, and order matters. But they +can still commute with other arguments. + +\begin{caml_example}{toplevel} +let hline ~x:x1 ~x:x2 ~y = (x1, x2, y);; +hline ~x:3 ~y:2 ~x:5;; +\end{caml_example} + +As an exception to the above parameter matching rules, if an +application is total (omitting all optional arguments), labels may be +omitted. +In practice, many applications are total, so that labels can often be +omitted. +\begin{caml_example}{toplevel} +f 3 2;; +ListLabels.map succ [1;2;3];; +\end{caml_example} +But beware that functions like "ListLabels.fold_left" whose result +type is a type variable will never be considered as totally applied. +\begin{caml_example}{toplevel}[error] +ListLabels.fold_left ( + ) 0 [1;2;3];; +\end{caml_example} + +When a function is passed as an argument to a higher-order function, +labels must match in both types. Neither adding nor removing labels +are allowed. +\begin{caml_example}{toplevel} +let h g = g ~x:3 ~y:2;; +h f;; +h ( + ) [@@expect error];; +\end{caml_example} +Note that when you don't need an argument, you can still use a wildcard +pattern, but you must prefix it with the label. +\begin{caml_example}{toplevel} +h (fun ~x:_ ~y -> y+1);; +\end{caml_example} + +\subsection{Optional arguments} + +An interesting feature of labeled arguments is that they can be made +optional. For optional parameters, the question mark "?" replaces the +tilde "~" of non-optional ones, and the label is also prefixed by "?" +in the function type. +Default values may be given for such optional parameters. + +\begin{caml_example}{toplevel} +let bump ?(step = 1) x = x + step;; +bump 2;; +bump ~step:3 2;; +\end{caml_example} + +A function taking some optional arguments must also take at least one +non-optional argument. The criterion for deciding whether an optional +argument has been omitted is the non-labeled application of an +argument appearing after this optional argument in the function type. +Note that if that argument is labeled, you will only be able to +eliminate optional arguments through the special case for total +applications. + +\begin{caml_example}{toplevel} +let test ?(x = 0) ?(y = 0) () ?(z = 0) () = (x, y, z);; +test ();; +test ~x:2 () ~z:3 ();; +\end{caml_example} + +Optional parameters may also commute with non-optional or unlabeled +ones, as long as they are applied simultaneously. By nature, optional +arguments do not commute with unlabeled arguments applied +independently. +\begin{caml_example}{toplevel} +test ~y:2 ~x:3 () ();; +test () () ~z:1 ~y:2 ~x:3;; +(test () ()) ~z:1 [@@expect error];; +\end{caml_example} +Here "(test () ())" is already "(0,0,0)" and cannot be further +applied. + +Optional arguments are actually implemented as option types. If +you do not give a default value, you have access to their internal +representation, "type 'a option = None | Some of 'a". You can then +provide different behaviors when an argument is present or not. + +\begin{caml_example}{toplevel} +let bump ?step x = + match step with + | None -> x * 2 + | Some y -> x + y +;; +\end{caml_example} + +It may also be useful to relay an optional argument from a function +call to another. This can be done by prefixing the applied argument +with "?". This question mark disables the wrapping of optional +argument in an option type. + +\begin{caml_example}{toplevel} +let test2 ?x ?y () = test ?x ?y () ();; +test2 ?x:None;; +\end{caml_example} + +\subsection{Labels and type inference} +\label{ss:label-inference} + +While they provide an increased comfort for writing function +applications, labels and optional arguments have the pitfall that they +cannot be inferred as completely as the rest of the language. + +You can see it in the following two examples. +\begin{caml_example}{toplevel} +let h' g = g ~y:2 ~x:3;; +h' f [@@expect error];; +let bump_it bump x = + bump ~step:2 x;; +bump_it bump 1 [@@expect error];; +\end{caml_example} +The first case is simple: "g" is passed "~y" and then "~x", but "f" +expects "~x" and then "~y". This is correctly handled if we know the +type of "g" to be "x:int -> y:int -> int" in advance, but otherwise +this causes the above type clash. The simplest workaround is to apply +formal parameters in a standard order. + +The second example is more subtle: while we intended the argument +"bump" to be of type "?step:int -> int -> int", it is inferred as +"step:int -> int -> 'a". +% +These two types being incompatible (internally normal and optional +arguments are different), a type error occurs when applying "bump_it" +to the real "bump". + +We will not try here to explain in detail how type inference works. +One must just understand that there is not enough information in the +above program to deduce the correct type of "g" or "bump". That is, +there is no way to know whether an argument is optional or not, or +which is the correct order, by looking only at how a function is +applied. The strategy used by the compiler is to assume that there are +no optional arguments, and that applications are done in the right +order. + +The right way to solve this problem for optional parameters is to add +a type annotation to the argument "bump". +\begin{caml_example}{toplevel} +let bump_it (bump : ?step:int -> int -> int) x = + bump ~step:2 x;; +bump_it bump 1;; +\end{caml_example} +In practice, such problems appear mostly when using objects whose +methods have optional arguments, so that writing the type of object +arguments is often a good idea. + +Normally the compiler generates a type error if you attempt to pass to +a function a parameter whose type is different from the expected one. +However, in the specific case where the expected type is a non-labeled +function type, and the argument is a function expecting optional +parameters, the compiler will attempt to transform the argument to +have it match the expected type, by passing "None" for all optional +parameters. + +\begin{caml_example}{toplevel} +let twice f (x : int) = f(f x);; +twice bump 2;; +\end{caml_example} + +This transformation is coherent with the intended semantics, +including side-effects. That is, if the application of optional +parameters shall produce side-effects, these are delayed until the +received function is really applied to an argument. + +\subsection{Suggestions for labeling} + +Like for names, choosing labels for functions is not an easy task. A +good labeling is a labeling which + +\begin{itemize} +\item makes programs more readable, +\item is easy to remember, +\item when possible, allows useful partial applications. +\end{itemize} + +We explain here the rules we applied when labeling OCaml +libraries. + +To speak in an ``object-oriented'' way, one can consider that each +function has a main argument, its {\em object}, and other arguments +related with its action, the {\em parameters}. To permit the +combination of functions through functionals in commuting label mode, the +object will not be labeled. Its role is clear from the function +itself. The parameters are labeled with names reminding of +their nature or their role. The best labels combine nature and +role. When this is not possible the role is to be preferred, since the +nature will +often be given by the type itself. Obscure abbreviations should be +avoided. +\begin{alltt} +"ListLabels.map : f:('a -> 'b) -> 'a list -> 'b list" +UnixLabels.write : file_descr -> buf:bytes -> pos:int -> len:int -> unit +\end{alltt} + +When there are several objects of same nature and role, they are all +left unlabeled. +\begin{alltt} +"ListLabels.iter2 : f:('a -> 'b -> 'c) -> 'a list -> 'b list -> unit" +\end{alltt} + +When there is no preferable object, all arguments are labeled. +\begin{alltt} +BytesLabels.blit : + src:bytes -> src_pos:int -> dst:bytes -> dst_pos:int -> len:int -> unit +\end{alltt} + +However, when there is only one argument, it is often left unlabeled. +\begin{alltt} +BytesLabels.create : int -> bytes +\end{alltt} +This principle also applies to functions of several arguments whose +return type is a type variable, as long as the role of each argument +is not ambiguous. Labeling such functions may lead to awkward error +messages when one attempts to omit labels in an application, as we +have seen with "ListLabels.fold_left". + +Here are some of the label names you will find throughout the +libraries. + +\begin{tableau}{|l|l|}{Label}{Meaning} +\entree{"f:"}{a function to be applied} +\entree{"pos:"}{a position in a string, array or byte sequence} +\entree{"len:"}{a length} +\entree{"buf:"}{a byte sequence or string used as buffer} +\entree{"src:"}{the source of an operation} +\entree{"dst:"}{the destination of an operation} +\entree{"init:"}{the initial value for an iterator} +\entree{"cmp:"}{a comparison function, {\it e.g.} "Pervasives.compare"} +\entree{"mode:"}{an operation mode or a flag list} +\end{tableau} + +All these are only suggestions, but keep in mind that the +choice of labels is essential for readability. Bizarre choices will +make the program harder to maintain. + +In the ideal, the right function name with right labels should be +enough to understand the function's meaning. Since one can get this +information with OCamlBrowser or the "ocaml" toplevel, the documentation +is only used when a more detailed specification is needed. + +\begin{caml_eval} +#label false;; +\end{caml_eval} + + +\section{Polymorphic variants} +\pdfsection{Polymorphic variants} + +Variants as presented in section~\ref{s:tut-recvariants} are a +powerful tool to build data structures and algorithms. However they +sometimes lack flexibility when used in modular programming. This is +due to the fact that every constructor is assigned to a unique type +when defined and used. Even if the same name appears in the definition +of multiple types, the constructor itself belongs to only one type. +Therefore, one cannot decide that a given constructor belongs to +multiple types, or consider a value of some type to belong to some +other type with more constructors. + +With polymorphic variants, this original assumption is removed. That +is, a variant tag does not belong to any type in particular, the type +system will just check that it is an admissible value according to its +use. You need not define a type before using a variant tag. A variant +type will be inferred independently for each of its uses. + +\subsection*{Basic use} + +In programs, polymorphic variants work like usual ones. You just have +to prefix their names with a backquote character "`". +\begin{caml_example}{toplevel} +[`On; `Off];; +`Number 1;; +let f = function `On -> 1 | `Off -> 0 | `Number n -> n;; +List.map f [`On; `Off];; +\end{caml_example} +"[>`Off|`On] list" means that to match this list, you should at +least be able to match "`Off" and "`On", without argument. +"[<`On|`Off|`Number of int]" means that "f" may be applied to "`Off", +"`On" (both without argument), or "`Number" $n$ where +$n$ is an integer. +The ">" and "<" inside the variant types show that they may still be +refined, either by defining more tags or by allowing less. As such, they +contain an implicit type variable. Because each of the variant types +appears only once in the whole type, their implicit type variables are +not shown. + +The above variant types were polymorphic, allowing further refinement. +When writing type annotations, one will most often describe fixed +variant types, that is types that cannot be refined. This is +also the case for type abbreviations. Such types do not contain "<" or +">", but just an enumeration of the tags and their associated types, +just like in a normal datatype definition. +\begin{caml_example}{toplevel} +type 'a vlist = [`Nil | `Cons of 'a * 'a vlist];; +let rec map f : 'a vlist -> 'b vlist = function + | `Nil -> `Nil + | `Cons(a, l) -> `Cons(f a, map f l) +;; +\end{caml_example} + +\subsection*{Advanced use} + +Type-checking polymorphic variants is a subtle thing, and some +expressions may result in more complex type information. + +\begin{caml_example}{toplevel} +let f = function `A -> `C | `B -> `D | x -> x;; +f `E;; +\end{caml_example} +Here we are seeing two phenomena. First, since this matching is open +(the last case catches any tag), we obtain the type "[> `A | `B]" +rather than "[< `A | `B]" in a closed matching. Then, since "x" is +returned as is, input and return types are identical. The notation "as +'a" denotes such type sharing. If we apply "f" to yet another tag +"`E", it gets added to the list. + +\begin{caml_example}{toplevel} +let f1 = function `A x -> x = 1 | `B -> true | `C -> false +let f2 = function `A x -> x = "a" | `B -> true ;; +let f x = f1 x && f2 x;; +\end{caml_example} +Here "f1" and "f2" both accept the variant tags "`A" and "`B", but the +argument of "`A" is "int" for "f1" and "string" for "f2". In "f"'s +type "`C", only accepted by "f1", disappears, but both argument types +appear for "`A" as "int & string". This means that if we +pass the variant tag "`A" to "f", its argument should be {\em both} +"int" and "string". Since there is no such value, "f" cannot be +applied to "`A", and "`B" is the only accepted input. + +Even if a value has a fixed variant type, one can still give it a +larger type through coercions. Coercions are normally written with +both the source type and the destination type, but in simple cases the +source type may be omitted. +\begin{caml_example}{toplevel} +type 'a wlist = [`Nil | `Cons of 'a * 'a wlist | `Snoc of 'a wlist * 'a];; +let wlist_of_vlist l = (l : 'a vlist :> 'a wlist);; +let open_vlist l = (l : 'a vlist :> [> 'a vlist]);; +fun x -> (x :> [`A|`B|`C]);; +\end{caml_example} + +You may also selectively coerce values through pattern matching. +\begin{caml_example}{toplevel} +let split_cases = function + | `Nil | `Cons _ as x -> `A x + | `Snoc _ as x -> `B x +;; +\end{caml_example} +When an or-pattern composed of variant tags is wrapped inside an +alias-pattern, the alias is given a type containing only the tags +enumerated in the or-pattern. This allows for many useful idioms, like +incremental definition of functions. + +\begin{caml_example}{toplevel} +let num x = `Num x +let eval1 eval (`Num x) = x +let rec eval x = eval1 eval x ;; +let plus x y = `Plus(x,y) +let eval2 eval = function + | `Plus(x,y) -> eval x + eval y + | `Num _ as x -> eval1 eval x +let rec eval x = eval2 eval x ;; +\end{caml_example} + +To make this even more comfortable, you may use type definitions as +abbreviations for or-patterns. That is, if you have defined "type +myvariant = [`Tag1 of int | `Tag2 of bool]", then the pattern "#myvariant" is +equivalent to writing "(`Tag1(_ : int) | `Tag2(_ : bool))". +\begin{caml_eval} +type myvariant = [`Tag1 of int | `Tag2 of bool];; +\end{caml_eval} + +Such abbreviations may be used alone, +\begin{caml_example}{toplevel} +let f = function + | #myvariant -> "myvariant" + | `Tag3 -> "Tag3";; +\end{caml_example} +or combined with with aliases. +\begin{caml_example}{toplevel} +let g1 = function `Tag1 _ -> "Tag1" | `Tag2 _ -> "Tag2";; +let g = function + | #myvariant as x -> g1 x + | `Tag3 -> "Tag3";; +\end{caml_example} + +\subsection{Weaknesses of polymorphic variants} + +After seeing the power of polymorphic variants, one may wonder why +they were added to core language variants, rather than replacing them. + +The answer is twofold. One first aspect is that while being pretty +efficient, the lack of static type information allows for less +optimizations, and makes polymorphic variants slightly heavier than +core language ones. However noticeable differences would only +appear on huge data structures. + +More important is the fact that polymorphic variants, while being +type-safe, result in a weaker type discipline. That is, core language +variants do actually much more than ensuring type-safety, they also +check that you use only declared constructors, that all constructors +present in a data-structure are compatible, and they enforce typing +constraints to their parameters. + +For this reason, you must be more careful about making types explicit +when you use polymorphic variants. When you write a library, this is +easy since you can describe exact types in interfaces, but for simple +programs you are probably better off with core language variants. + +Beware also that some idioms make trivial errors very hard to find. +For instance, the following code is probably wrong but the compiler +has no way to see it. +\begin{caml_example}{toplevel} +type abc = [`A | `B | `C] ;; +let f = function + | `As -> "A" + | #abc -> "other" ;; +let f : abc -> string = f ;; +\end{caml_example} +You can avoid such risks by annotating the definition itself. +\begin{caml_example}{toplevel}[error] +let f : abc -> string = function + | `As -> "A" + | #abc -> "other" ;; +\end{caml_example} diff --git a/manual/manual/tutorials/moduleexamples.etex b/manual/manual/tutorials/moduleexamples.etex new file mode 100644 index 00000000..a9a4f648 --- /dev/null +++ b/manual/manual/tutorials/moduleexamples.etex @@ -0,0 +1,385 @@ +\chapter{The module system} \label{c:moduleexamples} +\pdfchapterfold{-5}{Tutorial: The module system} +%HEVEA\cutname{moduleexamples.html} + +This chapter introduces the module system of OCaml. + +\section{Structures} +\pdfsection{Structures} + +A primary motivation for modules is to package together related +definitions (such as the definitions of a data type and associated +operations over that type) and enforce a consistent naming scheme for +these definitions. This avoids running out of names or accidentally +confusing names. Such a package is called a {\em structure} and +is introduced by the "struct"\ldots"end" construct, which contains an +arbitrary sequence of definitions. The structure is usually given a +name with the "module" binding. Here is for instance a structure +packaging together a type of priority queues and their operations: +\begin{caml_example}{toplevel} +module PrioQueue = + struct + type priority = int + type 'a queue = Empty | Node of priority * 'a * 'a queue * 'a queue + let empty = Empty + let rec insert queue prio elt = + match queue with + Empty -> Node(prio, elt, Empty, Empty) + | Node(p, e, left, right) -> + if prio <= p + then Node(prio, elt, insert right p e, left) + else Node(p, e, insert right prio elt, left) + exception Queue_is_empty + let rec remove_top = function + Empty -> raise Queue_is_empty + | Node(prio, elt, left, Empty) -> left + | Node(prio, elt, Empty, right) -> right + | Node(prio, elt, (Node(lprio, lelt, _, _) as left), + (Node(rprio, relt, _, _) as right)) -> + if lprio <= rprio + then Node(lprio, lelt, remove_top left, right) + else Node(rprio, relt, left, remove_top right) + let extract = function + Empty -> raise Queue_is_empty + | Node(prio, elt, _, _) as queue -> (prio, elt, remove_top queue) + end;; +\end{caml_example} +Outside the structure, its components can be referred to using the +``dot notation'', that is, identifiers qualified by a structure name. +For instance, "PrioQueue.insert" is the function "insert" defined +inside the structure "PrioQueue" and "PrioQueue.queue" is the type +"queue" defined in "PrioQueue". +\begin{caml_example}{toplevel} +PrioQueue.insert PrioQueue.empty 1 "hello";; +\end{caml_example} + +Another possibility is to open the module, which brings all +identifiers defined inside the module in the scope of the current +structure. + +\begin{caml_example}{toplevel} + open PrioQueue;; + insert empty 1 "hello";; +\end{caml_example} + +Opening a module enables lighter access to its components, at the +cost of making it harder to identify in which module a identifier +has been defined. In particular, opened modules can shadow +identifiers present in the current scope, potentially leading +to confusing errors: + +\begin{caml_example}{toplevel} + let empty = [] + open PrioQueue;; + let x = 1 :: empty [@@expect error];; +\end{caml_example} + + +A partial solution to this conundrum is to open modules locally, +making the components of the module available only in the +concerned expression. This can also make the code easier to read +-- the open statement is closer to where it is used-- and to refactor +-- the code fragment is more self-contained. +Two constructions are available for this purpose: +\begin{caml_example}{toplevel} + let open PrioQueue in + insert empty 1 "hello";; +\end{caml_example} +and +\begin{caml_example}{toplevel} + PrioQueue.(insert empty 1 "hello");; +\end{caml_example} +In the second form, when the body of a local open is itself delimited +by parentheses, braces or bracket, the parentheses of the local open +can be omitted. For instance, +\begin{caml_example}{toplevel} + PrioQueue.[empty] = PrioQueue.([empty]);; + PrioQueue.[|empty|] = PrioQueue.([|empty|]);; + PrioQueue.{ contents = empty } = PrioQueue.({ contents = empty });; +\end{caml_example} +becomes +\begin{caml_example}{toplevel} + PrioQueue.[insert empty 1 "hello"];; +\end{caml_example} + +It is also possible to copy the components of a module inside +another module by using an "include" statement. This can be +particularly useful to extend existing modules. As an illustration, +we could add functions that returns an optional value rather than +an exception when the priority queue is empty. +\begin{caml_example}{toplevel} + module PrioQueueOpt = + struct + include PrioQueue + + let remove_top_opt x = + try Some(remove_top x) with Queue_is_empty -> None + + let extract_opt x = + try Some(extract x) with Queue_is_empty -> None + end;; +\end{caml_example} + +\section{Signatures} +\pdfsection{Signatures} + +Signatures are interfaces for structures. A signature specifies +which components of a structure are accessible from the outside, and +with which type. It can be used to hide some components of a structure +(e.g. local function definitions) or export some components with a +restricted type. For instance, the signature below specifies the three +priority queue operations "empty", "insert" and "extract", but not the +auxiliary function "remove_top". Similarly, it makes the "queue" type +abstract (by not providing its actual representation as a concrete type). +\begin{caml_example}{toplevel} +module type PRIOQUEUE = + sig + type priority = int (* still concrete *) + type 'a queue (* now abstract *) + val empty : 'a queue + val insert : 'a queue -> int -> 'a -> 'a queue + val extract : 'a queue -> int * 'a * 'a queue + exception Queue_is_empty + end;; +\end{caml_example} +Restricting the "PrioQueue" structure by this signature results in +another view of the "PrioQueue" structure where the "remove_top" +function is not accessible and the actual representation of priority +queues is hidden: +\begin{caml_example}{toplevel} +module AbstractPrioQueue = (PrioQueue : PRIOQUEUE);; +AbstractPrioQueue.remove_top [@@expect error];; +AbstractPrioQueue.insert AbstractPrioQueue.empty 1 "hello";; +\end{caml_example} +The restriction can also be performed during the definition of the +structure, as in +\begin{verbatim} +module PrioQueue = (struct ... end : PRIOQUEUE);; +\end{verbatim} +An alternate syntax is provided for the above: +\begin{verbatim} +module PrioQueue : PRIOQUEUE = struct ... end;; +\end{verbatim} + +Like for modules, it is possible to include a signature to copy +its components inside the current signature. For instance, we +can extend the PRIOQUEUE signature with the "extract_opt" +function: + +\begin{caml_example}{toplevel} +module type PRIOQUEUE_WITH_OPT = + sig + include PRIOQUEUE + val extract_opt : 'a queue -> (int * 'a * 'a queue) option + end;; +\end{caml_example} + + +\section{Functors} +\pdfsection{Functors} + +Functors are ``functions'' from modules to modules. Functors let you create +parameterized modules and then provide other modules as parameter(s) to get +a specific implementation. For instance, a "Set" module implementing sets +as sorted lists could be parameterized to work with any module that provides +an element type and a comparison function "compare" (such as "OrderedString"): + +\begin{caml_example}{toplevel} +type comparison = Less | Equal | Greater;; +module type ORDERED_TYPE = + sig + type t + val compare: t -> t -> comparison + end;; +module Set = + functor (Elt: ORDERED_TYPE) -> + struct + type element = Elt.t + type set = element list + let empty = [] + let rec add x s = + match s with + [] -> [x] + | hd::tl -> + match Elt.compare x hd with + Equal -> s (* x is already in s *) + | Less -> x :: s (* x is smaller than all elements of s *) + | Greater -> hd :: add x tl + let rec member x s = + match s with + [] -> false + | hd::tl -> + match Elt.compare x hd with + Equal -> true (* x belongs to s *) + | Less -> false (* x is smaller than all elements of s *) + | Greater -> member x tl + end;; +\end{caml_example} +By applying the "Set" functor to a structure implementing an ordered +type, we obtain set operations for this type: +\begin{caml_example}{toplevel} +module OrderedString = + struct + type t = string + let compare x y = if x = y then Equal else if x < y then Less else Greater + end;; +module StringSet = Set(OrderedString);; +StringSet.member "bar" (StringSet.add "foo" StringSet.empty);; +\end{caml_example} + +\section{Functors and type abstraction} +\pdfsection{Functors and type abstraction} + +As in the "PrioQueue" example, it would be good style to hide the +actual implementation of the type "set", so that users of the +structure will not rely on sets being lists, and we can switch later +to another, more efficient representation of sets without breaking +their code. This can be achieved by restricting "Set" by a suitable +functor signature: +\begin{caml_example}{toplevel} +module type SETFUNCTOR = + functor (Elt: ORDERED_TYPE) -> + sig + type element = Elt.t (* concrete *) + type set (* abstract *) + val empty : set + val add : element -> set -> set + val member : element -> set -> bool + end;; +module AbstractSet = (Set : SETFUNCTOR);; +module AbstractStringSet = AbstractSet(OrderedString);; +AbstractStringSet.add "gee" AbstractStringSet.empty;; +\end{caml_example} + +In an attempt to write the type constraint above more elegantly, +one may wish to name the signature of the structure +returned by the functor, then use that signature in the constraint: +\begin{caml_example}{toplevel} +module type SET = + sig + type element + type set + val empty : set + val add : element -> set -> set + val member : element -> set -> bool + end;; +module WrongSet = (Set : functor(Elt: ORDERED_TYPE) -> SET);; +module WrongStringSet = WrongSet(OrderedString);; +WrongStringSet.add "gee" WrongStringSet.empty [@@expect error];; +\end{caml_example} +The problem here is that "SET" specifies the type "element" +abstractly, so that the type equality between "element" in the result +of the functor and "t" in its argument is forgotten. Consequently, +"WrongStringSet.element" is not the same type as "string", and the +operations of "WrongStringSet" cannot be applied to strings. +As demonstrated above, it is important that the type "element" in the +signature "SET" be declared equal to "Elt.t"; unfortunately, this is +impossible above since "SET" is defined in a context where "Elt" does +not exist. To overcome this difficulty, OCaml provides a +"with type" construct over signatures that allows enriching a signature +with extra type equalities: +\begin{caml_example}{toplevel} +module AbstractSet2 = + (Set : functor(Elt: ORDERED_TYPE) -> (SET with type element = Elt.t));; +\end{caml_example} + +As in the case of simple structures, an alternate syntax is provided +for defining functors and restricting their result: +\begin{verbatim} +module AbstractSet2(Elt: ORDERED_TYPE) : (SET with type element = Elt.t) = + struct ... end;; +\end{verbatim} + +Abstracting a type component in a functor result is a powerful +technique that provides a high degree of type safety, as we now +illustrate. Consider an ordering over character strings that is +different from the standard ordering implemented in the +"OrderedString" structure. For instance, we compare strings without +distinguishing upper and lower case. +\begin{caml_example}{toplevel} +module NoCaseString = + struct + type t = string + let compare s1 s2 = + OrderedString.compare (String.lowercase_ascii s1) (String.lowercase_ascii s2) + end;; +module NoCaseStringSet = AbstractSet(NoCaseString);; +NoCaseStringSet.add "FOO" AbstractStringSet.empty [@@expect error];; +\end{caml_example} +Note that the two types "AbstractStringSet.set" and +"NoCaseStringSet.set" are not compatible, and values of these +two types do not match. This is the correct behavior: even though both +set types contain elements of the same type (strings), they are built +upon different orderings of that type, and different invariants need +to be maintained by the operations (being strictly increasing for the +standard ordering and for the case-insensitive ordering). Applying +operations from "AbstractStringSet" to values of type +"NoCaseStringSet.set" could give incorrect results, or build +lists that violate the invariants of "NoCaseStringSet". + +\section{Modules and separate compilation} +\pdfsection{Modules and separate compilation} + +All examples of modules so far have been given in the context of the +interactive system. However, modules are most useful for large, +batch-compiled programs. For these programs, it is a practical +necessity to split the source into several files, called compilation +units, that can be compiled separately, thus minimizing recompilation +after changes. + +In OCaml, compilation units are special cases of structures +and signatures, and the relationship between the units can be +explained easily in terms of the module system. A compilation unit \var{A} +comprises two files: +\begin{itemize} +\item the implementation file \var{A}".ml", which contains a sequence +of definitions, analogous to the inside of a "struct"\ldots"end" +construct; +\item the interface file \var{A}".mli", which contains a sequence of +specifications, analogous to the inside of a "sig"\ldots"end" +construct. +\end{itemize} +These two files together define a structure named \var{A} as if +the following definition was entered at top-level: +\begin{alltt} +module \var{A}: sig (* \hbox{contents of file} \var{A}.mli *) end + = struct (* \hbox{contents of file} \var{A}.ml *) end;; +\end{alltt} +The files that define the compilation units can be compiled separately +using the "ocamlc -c" command (the "-c" option means ``compile only, do +not try to link''); this produces compiled interface files (with +extension ".cmi") and compiled object code files (with extension +".cmo"). When all units have been compiled, their ".cmo" files are +linked together using the "ocamlc" command. For instance, the following +commands compile and link a program composed of two compilation units +"Aux" and "Main": +\begin{verbatim} +$ ocamlc -c Aux.mli # produces aux.cmi +$ ocamlc -c Aux.ml # produces aux.cmo +$ ocamlc -c Main.mli # produces main.cmi +$ ocamlc -c Main.ml # produces main.cmo +$ ocamlc -o theprogram Aux.cmo Main.cmo +\end{verbatim} +The program behaves exactly as if the following phrases were entered +at top-level: +\begin{alltt} +module Aux: sig (* \rminalltt{contents of} Aux.mli *) end + = struct (* \rminalltt{contents of} Aux.ml *) end;; +module Main: sig (* \rminalltt{contents of} Main.mli *) end + = struct (* \rminalltt{contents of} Main.ml *) end;; +\end{alltt} +In particular, "Main" can refer to "Aux": the definitions and +declarations contained in "Main.ml" and "Main.mli" can refer to +definition in "Aux.ml", using the "Aux."\var{ident} notation, provided +these definitions are exported in "Aux.mli". + +The order in which the ".cmo" files are given to "ocamlc" during the +linking phase determines the order in which the module definitions +occur. Hence, in the example above, "Aux" appears first and "Main" can +refer to it, but "Aux" cannot refer to "Main". + +Note that only top-level structures can be mapped to +separately-compiled files, but neither functors nor module types. +However, all module-class objects can appear as components of a +structure, so the solution is to put the functor or module type +inside a structure, which can then be mapped to a file. diff --git a/manual/manual/tutorials/objectexamples.etex b/manual/manual/tutorials/objectexamples.etex new file mode 100644 index 00000000..9166c3cd --- /dev/null +++ b/manual/manual/tutorials/objectexamples.etex @@ -0,0 +1,1293 @@ +\chapter{Objects in OCaml} +\label{c:objectexamples} +\pdfchapterfold{-15}{Tutorial: Objects in OCaml} +%HEVEA\cutname{objectexamples.html} +{\it (Chapter written by J\'er\^ome Vouillon, Didier R\'emy and Jacques Garrigue)} + +\bigskip + +\noindent This chapter gives an overview of the object-oriented features of +OCaml. + +Note that the relationship between object, class and type in OCaml is +different than in mainstream object-oriented languages such as Java and +C++, so you shouldn't assume that similar keywords mean the same thing. +Object-oriented features are used much less frequently in OCaml than +in those languages. OCaml has alternatives that are often more appropriate, +such as modules and functors. Indeed, many OCaml programs do not use objects +at all. + + +\begin{htmlonly} + +\ref{ss:classes-and-objects} Classes and objects \\ +\ref{ss:immediate-objects} Immediate objects \\ +\ref{ss:reference-to-self} Reference to self \\ +\ref{ss:initializers} Initializers \\ +\ref{ss:virtual-methods} Virtual methods \\ +\ref{ss:private-methods} Private methods \\ +\ref{ss:class-interfaces} Class interfaces \\ +\ref{ss:inheritance} Inheritance \\ +\ref{ss:multiple-inheritance} Multiple inheritance \\ +\ref{ss:parameterized-classes} Parameterized classes \\ +\ref{ss:polymorphic-methods} Polymorphic methods \\ +\ref{ss:using-coercions} Using coercions \\ +\ref{ss:functional-objects} Functional objects \\ +\ref{ss:cloning-objects} Cloning objects \\ +\ref{ss:recursive-classes} Recursive classes \\ +\ref{ss:binary-methods} Binary methods \\ +\ref{ss:friends} Friends \\ + +%%\ref{s:advanced-examples} {\bf Advanced examples} +%% +%%\ref{ss:bank-accounts} An extended example of bank accounts \\ +%%\ref{ss:modules-as-classes} Simple modules as classes: +%% \ref{module:string} Strings +%% \ref{module:stack} Stacks +%% \ref{module:hashtbl} Hash tables +%% \ref{module:set} Sets \\ +%%\ref{ss:subject-observer} The subject/observer pattern \\ + +\end{htmlonly} + +\section{Classes and objects} +\pdfsection{Classes and objects} +\label{ss:classes-and-objects} + +The class "point" below defines one instance variable "x" and two methods +"get_x" and "move". The initial value of the instance variable is "0". +The variable "x" is declared mutable, so the method "move" can change +its value. +\begin{caml_example}{toplevel} +class point = + object + val mutable x = 0 + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} + +We now create a new point "p", instance of the "point" class. +\begin{caml_example}{toplevel} +let p = new point;; +\end{caml_example} +Note that the type of "p" is "point". This is an abbreviation +automatically defined by the class definition above. It stands for the +object type " unit>", listing the methods +of class "point" along with their types. + +We now invoke some methods of "p": +\begin{caml_example}{toplevel} +p#get_x;; +p#move 3;; +p#get_x;; +\end{caml_example} + +The evaluation of the body of a class only takes place at object +creation time. Therefore, in the following example, the instance +variable "x" is initialized to different values for two different +objects. +\begin{caml_example}{toplevel} +let x0 = ref 0;; +class point = + object + val mutable x = incr x0; !x0 + method get_x = x + method move d = x <- x + d + end;; +new point#get_x;; +new point#get_x;; +\end{caml_example} + +The class "point" can also be abstracted over the initial values of +the "x" coordinate. +\begin{caml_example}{toplevel} +class point = fun x_init -> + object + val mutable x = x_init + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} +Like in function definitions, the definition above can be +abbreviated as: +\begin{caml_example}{toplevel} +class point x_init = + object + val mutable x = x_init + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} +An instance of the class "point" is now a function that expects an +initial parameter to create a point object: +\begin{caml_example}{toplevel} +new point;; +let p = new point 7;; +\end{caml_example} +The parameter "x_init" is, of course, visible in the whole body of the +definition, including methods. For instance, the method "get_offset" +in the class below returns the position of the object relative to its +initial position. +\begin{caml_example}{toplevel} +class point x_init = + object + val mutable x = x_init + method get_x = x + method get_offset = x - x_init + method move d = x <- x + d + end;; +\end{caml_example} +%Instance variables can only be used inside methods. For instance it would +%not be possible to define +%\begin{caml_example}{toplevel} +%class point x_init = +% object +% val mutable x = x_init +% val origin = x +% method get_offset = x - origin +% method move d = x <- x + d +% end;; +%\end{caml_example} +Expressions can be evaluated and bound before defining the object body +of the class. This is useful to enforce invariants. For instance, +points can be automatically adjusted to the nearest point on a grid, +as follows: +\begin{caml_example}{toplevel} +class adjusted_point x_init = + let origin = (x_init / 10) * 10 in + object + val mutable x = origin + method get_x = x + method get_offset = x - origin + method move d = x <- x + d + end;; +\end{caml_example} +(One could also raise an exception if the "x_init" coordinate is not +on the grid.) In fact, the same effect could here be obtained by +calling the definition of class "point" with the value of the +"origin". +\begin{caml_example}{toplevel} +class adjusted_point x_init = point ((x_init / 10) * 10);; +\end{caml_example} +An alternate solution would have been to define the adjustment in +a special allocation function: +\begin{caml_example}{toplevel} +let new_adjusted_point x_init = new point ((x_init / 10) * 10);; +\end{caml_example} +However, the former pattern is generally more appropriate, since +the code for adjustment is part of the definition of the class and will be +inherited. + +This ability provides class constructors as can be found in other +languages. Several constructors can be defined this way to build objects of +the same class but with different initialization patterns; an +alternative is to use initializers, as described below in section +\ref{ss:initializers}. + +\section{Immediate objects} +\pdfsection{Immediate objects} +\label{ss:immediate-objects} + +There is another, more direct way to create an object: create it +without going through a class. + +The syntax is exactly the same as for class expressions, but the +result is a single object rather than a class. All the constructs +described in the rest of this section also apply to immediate objects. +\begin{caml_example}{toplevel} +let p = + object + val mutable x = 0 + method get_x = x + method move d = x <- x + d + end;; +p#get_x;; +p#move 3;; +p#get_x;; +\end{caml_example} + +Unlike classes, which cannot be defined inside an expression, +immediate objects can appear anywhere, using variables from their +environment. +\begin{caml_example}{toplevel} +let minmax x y = + if x < y then object method min = x method max = y end + else object method min = y method max = x end;; +\end{caml_example} + +Immediate objects have two weaknesses compared to classes: their types +are not abbreviated, and you cannot inherit from them. But these two +weaknesses can be advantages in some situations, as we will see +in sections \ref{ss:reference-to-self} and \ref{ss:parameterized-classes}. + +\section{Reference to self} +\pdfsection{Reference to self} +\label{ss:reference-to-self} + +A method or an initializer can invoke methods on self (that is, +the current object). For that, self must be explicitly bound, here to +the variable "s" ("s" could be any identifier, even though we will +often choose the name "self".) +\begin{caml_example}{toplevel} +class printable_point x_init = + object (s) + val mutable x = x_init + method get_x = x + method move d = x <- x + d + method print = print_int s#get_x + end;; +let p = new printable_point 7;; +p#print;; +\end{caml_example} +Dynamically, the variable "s" is bound at the invocation of a method. In +particular, when the class "printable_point" is inherited, the variable +"s" will be correctly bound to the object of the subclass. + +A common problem with self is that, as its type may be extended in +subclasses, you cannot fix it in advance. Here is a simple example. +\begin{caml_example}{toplevel} +let ints = ref [];; +class my_int = + object (self) + method n = 1 + method register = ints := self :: !ints + end [@@expect error];; +\end{caml_example} +You can ignore the first two lines of the error message. What matters +is the last one: putting self into an external reference would make it +impossible to extend it through inheritance. +We will see in section \ref{ss:using-coercions} a workaround to this +problem. +Note however that, since immediate objects are not extensible, the +problem does not occur with them. +\begin{caml_example}{toplevel} +let my_int = + object (self) + method n = 1 + method register = ints := self :: !ints + end;; +\end{caml_example} + +\section{Initializers} +\pdfsection{Initializers} +\label{ss:initializers} + +Let-bindings within class definitions are evaluated before the object +is constructed. It is also possible to evaluate an expression +immediately after the object has been built. Such code is written as +an anonymous hidden method called an initializer. Therefore, it can +access self and the instance variables. +\begin{caml_example}{toplevel} +class printable_point x_init = + let origin = (x_init / 10) * 10 in + object (self) + val mutable x = origin + method get_x = x + method move d = x <- x + d + method print = print_int self#get_x + initializer print_string "new point at "; self#print; print_newline () + end;; +let p = new printable_point 17;; +\end{caml_example} +Initializers cannot be overridden. On the contrary, all initializers are +evaluated sequentially. +Initializers are particularly useful to enforce invariants. +Another example can be seen in section \ref{ss:bank-accounts}. + + +\section{Virtual methods} +\pdfsection{Virtual methods and variables} +\label{ss:virtual-methods} + +It is possible to declare a method without actually defining it, using +the keyword "virtual". This method will be provided later in +subclasses. A class containing virtual methods must be flagged +"virtual", and cannot be instantiated (that is, no object of this class +can be created). It still defines type abbreviations (treating virtual methods +as other methods.) +\begin{caml_example}{toplevel} +class virtual abstract_point x_init = + object (self) + method virtual get_x : int + method get_offset = self#get_x - x_init + method virtual move : int -> unit + end;; +class point x_init = + object + inherit abstract_point x_init + val mutable x = x_init + method get_x = x + method move d = x <- x + d + end;; +\end{caml_example} + +Instance variables can also be declared as virtual, with the same effect +as with methods. +\begin{caml_example}{toplevel} +class virtual abstract_point2 = + object + val mutable virtual x : int + method move d = x <- x + d + end;; +class point2 x_init = + object + inherit abstract_point2 + val mutable x = x_init + method get_offset = x - x_init + end;; +\end{caml_example} + +\section{Private methods} +\pdfsection{Private methods} +\label{ss:private-methods} + +Private methods are methods that do not appear in object interfaces. +They can only be invoked from other methods of the same object. +\begin{caml_example}{toplevel} +class restricted_point x_init = + object (self) + val mutable x = x_init + method get_x = x + method private move d = x <- x + d + method bump = self#move 1 + end;; +let p = new restricted_point 0;; +p#move 10 [@@expect error] ;; +p#bump;; +\end{caml_example} +Note that this is not the same thing as private and protected methods +in Java or C++, which can be called from other objects of the same +class. This is a direct consequence of the independence between types +and classes in OCaml: two unrelated classes may produce +objects of the same type, and there is no way at the type level to +ensure that an object comes from a specific class. However a possible +encoding of friend methods is given in section \ref{ss:friends}. + +Private methods are inherited (they are by default visible in subclasses), +unless they are hidden by signature matching, as described below. + +Private methods can be made public in a subclass. +\begin{caml_example}{toplevel} +class point_again x = + object (self) + inherit restricted_point x + method virtual move : _ + end;; +\end{caml_example} +The annotation "virtual" here is only used to mention a method without +providing its definition. Since we didn't add the "private" +annotation, this makes the method public, keeping the original +definition. + +An alternative definition is +\begin{caml_example}{toplevel} +class point_again x = + object (self : < move : _; ..> ) + inherit restricted_point x + end;; +\end{caml_example} +The constraint on self's type is requiring a public "move" method, and +this is sufficient to override "private". + +One could think that a private method should remain private in a subclass. +However, since the method is visible in a subclass, it is always possible +to pick its code and define a method of the same name that runs that +code, so yet another (heavier) solution would be: +\begin{caml_example}{toplevel} +class point_again x = + object + inherit restricted_point x as super + method move = super#move + end;; +\end{caml_example} + +Of course, private methods can also be virtual. Then, the keywords must +appear in this order "method private virtual". + +\section{Class interfaces} +\pdfsection{Class interfaces} +\label{ss:class-interfaces} + + +%XXX Differentiate class type and class interface ? + +Class interfaces are inferred from class definitions. They may also +be defined directly and used to restrict the type of a class. Like class +declarations, they also define a new type abbreviation. +\begin{caml_example}{toplevel} +class type restricted_point_type = + object + method get_x : int + method bump : unit +end;; +fun (x : restricted_point_type) -> x;; +\end{caml_example} +In addition to program documentation, class interfaces can be used to +constrain the type of a class. Both concrete instance variables and concrete +private methods can be hidden by a class type constraint. Public +methods and virtual members, however, cannot. +\begin{caml_example}{toplevel} +class restricted_point' x = (restricted_point x : restricted_point_type);; +\end{caml_example} +Or, equivalently: +\begin{caml_example}{toplevel} +class restricted_point' = (restricted_point : int -> restricted_point_type);; +\end{caml_example} +The interface of a class can also be specified in a module +signature, and used to restrict the inferred signature of a module. +\begin{caml_example}{toplevel} +module type POINT = sig + class restricted_point' : int -> + object + method get_x : int + method bump : unit + end +end;; +module Point : POINT = struct + class restricted_point' = restricted_point +end;; +\end{caml_example} + +\section{Inheritance} +\pdfsection{Inheritance} +\label{ss:inheritance} + +We illustrate inheritance by defining a class of colored points that +inherits from the class of points. This class has all instance +variables and all methods of class "point", plus a new instance +variable "c" and a new method "color". +\begin{caml_example}{toplevel} +class colored_point x (c : string) = + object + inherit point x + val c = c + method color = c + end;; +let p' = new colored_point 5 "red";; +p'#get_x, p'#color;; +\end{caml_example} +A point and a colored point have incompatible types, since a point has +no method "color". However, the function "get_x" below is a generic +function applying method "get_x" to any object "p" that has this +method (and possibly some others, which are represented by an ellipsis +in the type). Thus, it applies to both points and colored points. +\begin{caml_example}{toplevel} +let get_succ_x p = p#get_x + 1;; +get_succ_x p + get_succ_x p';; +\end{caml_example} +Methods need not be declared previously, as shown by the example: +\begin{caml_example}{toplevel} +let set_x p = p#set_x;; +let incr p = set_x p (get_succ_x p);; +\end{caml_example} + +\section{Multiple inheritance} +\pdfsection{Multiple inheritance} +\label{ss:multiple-inheritance} + +Multiple inheritance is allowed. Only the last definition of a method +is kept: the redefinition in a subclass of a method that was visible in +the parent class overrides the definition in the parent class. +Previous definitions of a method can be reused by binding the related +ancestor. Below, "super" is bound to the ancestor "printable_point". +The name "super" is a pseudo value identifier that can only be used to +invoke a super-class method, as in "super#print". +\begin{caml_example}{toplevel} +class printable_colored_point y c = + object (self) + val c = c + method color = c + inherit printable_point y as super + method! print = + print_string "("; + super#print; + print_string ", "; + print_string (self#color); + print_string ")" + end;; +let p' = new printable_colored_point 17 "red";; +p'#print;; +\end{caml_example} +A private method that has been hidden in the parent class is no longer +visible, and is thus not overridden. Since initializers are treated as +private methods, all initializers along the class hierarchy are evaluated, +in the order they are introduced. + +Note that for clarity's sake, the method "print" is explicitly marked as +overriding another definition by annotating the "method" keyword with +an exclamation mark "!". If the method "print" were not overriding the +"print" method of "printable_point", the compiler would raise an error: +\begin{caml_example}{toplevel}[error] + object + method! m = () + end;; +\end{caml_example} + +This explicit overriding annotation also works +for "val" and "inherit": +\begin{caml_example}{toplevel} +class another_printable_colored_point y c c' = + object (self) + inherit printable_point y + inherit! printable_colored_point y c + val! c = c' + end;; +\end{caml_example} + +\section{Parameterized classes} +\pdfsection{Parameterized classes} +\label{ss:parameterized-classes} + +Reference cells can be implemented as objects. +The naive definition fails to typecheck: +\begin{caml_example}{toplevel}[error] +class oref x_init = + object + val mutable x = x_init + method get = x + method set y = x <- y + end;; +\end{caml_example} +The reason is that at least one of the methods has a polymorphic type +(here, the type of the value stored in the reference cell), thus +either the class should be parametric, or the method type should be +constrained to a monomorphic type. A monomorphic instance of the class could +be defined by: +\begin{caml_example}{toplevel} +class oref (x_init:int) = + object + val mutable x = x_init + method get = x + method set y = x <- y + end;; +\end{caml_example} +Note that since immediate objects do not define a class type, they have +no such restriction. +\begin{caml_example}{toplevel} +let new_oref x_init = + object + val mutable x = x_init + method get = x + method set y = x <- y + end;; +\end{caml_example} +On the other hand, a class for polymorphic references must explicitly +list the type parameters in its declaration. Class type parameters are +listed between "[" and "]". The type parameters must also be +bound somewhere in the class body by a type constraint. +\begin{caml_example}{toplevel} +class ['a] oref x_init = + object + val mutable x = (x_init : 'a) + method get = x + method set y = x <- y + end;; +let r = new oref 1 in r#set 2; (r#get);; +\end{caml_example} +The type parameter in the declaration may actually be constrained in the +body of the class definition. In the class type, the actual value of +the type parameter is displayed in the "constraint" clause. +\begin{caml_example}{toplevel} +class ['a] oref_succ (x_init:'a) = + object + val mutable x = x_init + 1 + method get = x + method set y = x <- y + end;; +\end{caml_example} +Let us consider a more complex example: define a circle, whose center +may be any kind of point. We put an additional type +constraint in method "move", since no free variables must remain +unaccounted for by the class type parameters. +\begin{caml_example}{toplevel} +class ['a] circle (c : 'a) = + object + val mutable center = c + method center = center + method set_center c = center <- c + method move = (center#move : int -> unit) + end;; +\end{caml_example} +An alternate definition of "circle", using a "constraint" clause in +the class definition, is shown below. The type "#point" used below in +the "constraint" clause is an abbreviation produced by the definition +of class "point". This abbreviation unifies with the type of any +object belonging to a subclass of class "point". It actually expands to +"< get_x : int; move : int -> unit; .. >". This leads to the following +alternate definition of "circle", which has slightly stronger +constraints on its argument, as we now expect "center" to have a +method "get_x". +\begin{caml_example}{toplevel} +class ['a] circle (c : 'a) = + object + constraint 'a = #point + val mutable center = c + method center = center + method set_center c = center <- c + method move = center#move + end;; +\end{caml_example} +The class "colored_circle" is a specialized version of class +"circle" that requires the type of the center to unify with +"#colored_point", and adds a method "color". Note that when specializing a +parameterized class, the instance of type parameter must always be +explicitly given. It is again written between "[" and "]". +\begin{caml_example}{toplevel} +class ['a] colored_circle c = + object + constraint 'a = #colored_point + inherit ['a] circle c + method color = center#color + end;; +\end{caml_example} + +\section{Polymorphic methods} +\pdfsection{Polymorphic methods} +\label{ss:polymorphic-methods} + +While parameterized classes may be polymorphic in their contents, they +are not enough to allow polymorphism of method use. + +A classical example is defining an iterator. +\begin{caml_example}{toplevel} +List.fold_left;; +class ['a] intlist (l : int list) = + object + method empty = (l = []) + method fold f (accu : 'a) = List.fold_left f accu l + end;; +\end{caml_example} +At first look, we seem to have a polymorphic iterator, however this +does not work in practice. +\begin{caml_example}{toplevel} +let l = new intlist [1; 2; 3];; +l#fold (fun x y -> x+y) 0;; +l;; +l#fold (fun s x -> s ^ string_of_int x ^ " ") "" [@@expect error];; +\end{caml_example} +Our iterator works, as shows its first use for summation. However, +since objects themselves are not polymorphic (only their constructors +are), using the "fold" method fixes its type for this individual object. +Our next attempt to use it as a string iterator fails. + +The problem here is that quantification was wrongly located: it is +not the class we want to be polymorphic, but the "fold" method. +This can be achieved by giving an explicitly polymorphic type in the +method definition. +\begin{caml_example}{toplevel} +class intlist (l : int list) = + object + method empty = (l = []) + method fold : 'a. ('a -> int -> 'a) -> 'a -> 'a = + fun f accu -> List.fold_left f accu l + end;; +let l = new intlist [1; 2; 3];; +l#fold (fun x y -> x+y) 0;; +l#fold (fun s x -> s ^ string_of_int x ^ " ") "";; +\end{caml_example} +As you can see in the class type shown by the compiler, while +polymorphic method types must be fully explicit in class definitions +(appearing immediately after the method name), quantified type +variables can be left implicit in class descriptions. Why require types +to be explicit? The problem is that "(int -> int -> int) -> int -> +int" would also be a valid type for "fold", and it happens to be +incompatible with the polymorphic type we gave (automatic +instantiation only works for toplevel types variables, not for inner +quantifiers, where it becomes an undecidable problem.) So the compiler +cannot choose between those two types, and must be helped. + +However, the type can be completely omitted in the class definition if +it is already known, through inheritance or type constraints on self. +Here is an example of method overriding. +\begin{caml_example*}{toplevel} +class intlist_rev l = + object + inherit intlist l + method! fold f accu = List.fold_left f accu (List.rev l) + end;; +\end{caml_example*} +The following idiom separates description and definition. +\begin{caml_example*}{toplevel} +class type ['a] iterator = + object method fold : ('b -> 'a -> 'b) -> 'b -> 'b end;; +class intlist l = + object (self : int #iterator) + method empty = (l = []) + method fold f accu = List.fold_left f accu l + end;; +\end{caml_example*} +Note here the "(self : int #iterator)" idiom, which ensures that this +object implements the interface "iterator". + +Polymorphic methods are called in exactly the same way as normal +methods, but you should be aware of some limitations of type +inference. Namely, a polymorphic method can only be called if its +type is known at the call site. Otherwise, the method will be assumed +to be monomorphic, and given an incompatible type. +\begin{caml_example}{toplevel} +let sum lst = lst#fold (fun x y -> x+y) 0;; +sum l [@@expect error];; +\end{caml_example} +The workaround is easy: you should put a type constraint on the +parameter. +\begin{caml_example}{toplevel} +let sum (lst : _ #iterator) = lst#fold (fun x y -> x+y) 0;; +\end{caml_example} +Of course the constraint may also be an explicit method type. +Only occurences of quantified variables are required. +\begin{caml_example}{toplevel} +let sum lst = + (lst : < fold : 'a. ('a -> _ -> 'a) -> 'a -> 'a; .. >)#fold (+) 0;; +\end{caml_example} + +Another use of polymorphic methods is to allow some form of implicit +subtyping in method arguments. We have already seen in section +\ref{ss:inheritance} how some functions may be polymorphic in the +class of their argument. This can be extended to methods. +\begin{caml_example}{toplevel} +class type point0 = object method get_x : int end;; +class distance_point x = + object + inherit point x + method distance : 'a. (#point0 as 'a) -> int = + fun other -> abs (other#get_x - x) + end;; +let p = new distance_point 3 in +(p#distance (new point 8), p#distance (new colored_point 1 "blue"));; +\end{caml_example} +Note here the special syntax "(#point0 as 'a)" we have to use to +quantify the extensible part of "#point0". As for the variable binder, +it can be omitted in class specifications. If you want polymorphism +inside object field it must be quantified independently. +\begin{caml_example}{toplevel} +class multi_poly = + object + method m1 : 'a. (< n1 : 'b. 'b -> 'b; .. > as 'a) -> _ = + fun o -> o#n1 true, o#n1 "hello" + method m2 : 'a 'b. (< n2 : 'b -> bool; .. > as 'a) -> 'b -> _ = + fun o x -> o#n2 x + end;; +\end{caml_example} +In method "m1", "o" must be an object with at least a method "n1", +itself polymorphic. In method "m2", the argument of "n2" and "x" must +have the same type, which is quantified at the same level as "'a". + +\section{Using coercions} +\pdfsection{Using coercions} +\label{ss:using-coercions} + +Subtyping is never implicit. There are, however, two ways to perform +subtyping. The most general construction is fully explicit: both the +domain and the codomain of the type coercion must be given. + +We have seen that points and colored points have incompatible types. +For instance, they cannot be mixed in the same list. However, a +colored point can be coerced to a point, hiding its "color" method: +\begin{caml_example}{toplevel} +let colored_point_to_point cp = (cp : colored_point :> point);; +let p = new point 3 and q = new colored_point 4 "blue";; +let l = [p; (colored_point_to_point q)];; +\end{caml_example} +An object of type "t" can be seen as an object of type "t'" +only if "t" is a subtype of "t'". For instance, a point cannot be +seen as a colored point. +\begin{caml_example}{toplevel}[error] +(p : point :> colored_point);; +\end{caml_example} +Indeed, narrowing coercions without runtime checks would be unsafe. +Runtime type checks might raise exceptions, and they would require +the presence of type information at runtime, which is not the case in +the OCaml system. +For these reasons, there is no such operation available in the language. + +Be aware that subtyping and inheritance are not related. Inheritance is a +syntactic relation between classes while subtyping is a semantic relation +between types. For instance, the class of colored points could have been +defined directly, without inheriting from the class of points; the type of +colored points would remain unchanged and thus still be a subtype of +points. +% Conversely, the class "int_comparable" inherits from class +%"comparable", but type "int_comparable" is not a subtype of "comparable". +%\begin{caml_example}{toplevel} +%function x -> (x : int_comparable :> comparable);; +%\end{caml_example} + +The domain of a coercion can often be omitted. For instance, one can +define: +\begin{caml_example}{toplevel} +let to_point cp = (cp :> point);; +\end{caml_example} +In this case, the function "colored_point_to_point" is an instance of the +function "to_point". This is not always true, however. The fully +explicit coercion is more precise and is sometimes unavoidable. +Consider, for example, the following class: +\begin{caml_example}{toplevel} +class c0 = object method m = {< >} method n = 0 end;; +\end{caml_example} +The object type "c0" is an abbreviation for " as 'a". +Consider now the type declaration: +\begin{caml_example}{toplevel} +class type c1 = object method m : c1 end;; +\end{caml_example} +The object type "c1" is an abbreviation for the type " as 'a". +The coercion from an object of type "c0" to an object of type "c1" is +correct: +\begin{caml_example}{toplevel} +fun (x:c0) -> (x : c0 :> c1);; +\end{caml_example} +%%% FIXME come up with a better example. +% However, the domain of the coercion cannot be omitted here: +% \begin{caml_example}{toplevel} +% fun (x:c0) -> (x :> c1);; +% \end{caml_example} +However, the domain of the coercion cannot always be omitted. +In that case, the solution is to use the explicit form. +% +Sometimes, a change in the class-type definition can also solve the problem +\begin{caml_example}{toplevel} +class type c2 = object ('a) method m : 'a end;; +fun (x:c0) -> (x :> c2);; +\end{caml_example} +While class types "c1" and "c2" are different, both object types +"c1" and "c2" expand to the same object type (same method names and types). +Yet, when the domain of a coercion is left implicit and its co-domain +is an abbreviation of a known class type, then the class type, rather +than the object type, is used to derive the coercion function. This +allows leaving the domain implicit in most cases when coercing form a +subclass to its superclass. +% +The type of a coercion can always be seen as below: +\begin{caml_example}{toplevel} +let to_c1 x = (x :> c1);; +let to_c2 x = (x :> c2);; +\end{caml_example} +Note the difference between these two coercions: in the case of "to_c2", +the type +"#c2 = < m : 'a; .. > as 'a" is polymorphically recursive (according +to the explicit recursion in the class type of "c2"); hence the +success of applying this coercion to an object of class "c0". +On the other hand, in the first case, "c1" was only expanded and +unrolled twice to obtain "< m : < m : c1; .. >; .. >" (remember "#c1 = +< m : c1; .. >"), without introducing recursion. +You may also note that the type of "to_c2" is "#c2 -> c2" while +the type of "to_c1" is more general than "#c1 -> c1". This is not always true, +since there are class types for which some instances of "#c" are not subtypes +of "c", as explained in section~\ref{ss:binary-methods}. Yet, for +parameterless classes the coercion "(_ :> c)" is always more general than +"(_ : #c :> c)". +%If a class type exposes the type of self through one of its parameters, this +%is no longer true. Here is a counter-example. +%\begin{caml_example}{toplevel} +%class type ['a] c = object ('a) method m : 'a end;; +%let to_c x = (x :> _ c);; +%\end{caml_example} + + +A common problem may occur when one tries to define a coercion to a +class "c" while defining class "c". The problem is due to the type +abbreviation not being completely defined yet, and so its subtypes are not +clearly known. Then, a coercion "(_ :> c)" or "(_ : #c :> c)" is taken to be +the identity function, as in +\begin{caml_example}{toplevel} +function x -> (x :> 'a);; +\end{caml_example} +As a consequence, if the coercion is applied to "self", as in the +following example, the type of "self" is unified with the closed type +"c" (a closed object type is an object type without ellipsis). This +would constrain the type of self be closed and is thus rejected. +Indeed, the type of self cannot be closed: this would prevent any +further extension of the class. Therefore, a type error is generated +when the unification of this type with another type would result in a +closed object type. +\begin{caml_example}{toplevel}[error] +class c = object method m = 1 end +and d = object (self) + inherit c + method n = 2 + method as_c = (self :> c) +end;; +\end{caml_example} +However, the most common instance of this problem, coercing self to +its current class, is detected as a special case by the type checker, +and properly typed. +\begin{caml_example}{toplevel} +class c = object (self) method m = (self :> c) end;; +\end{caml_example} +This allows the following idiom, keeping a list of all objects +belonging to a class or its subclasses: +\begin{caml_example}{toplevel} +let all_c = ref [];; +class c (m : int) = + object (self) + method m = m + initializer all_c := (self :> c) :: !all_c + end;; +\end{caml_example} +This idiom can in turn be used to retrieve an object whose type has +been weakened: +\begin{caml_example}{toplevel} +let rec lookup_obj obj = function [] -> raise Not_found + | obj' :: l -> + if (obj :> < >) = (obj' :> < >) then obj' else lookup_obj obj l ;; +let lookup_c obj = lookup_obj obj !all_c;; +\end{caml_example} +The type "< m : int >" we see here is just the expansion of "c", due +to the use of a reference; we have succeeded in getting back an object +of type "c". + +\medskip +The previous coercion problem can often be avoided by first +defining the abbreviation, using a class type: +\begin{caml_example}{toplevel} +class type c' = object method m : int end;; +class c : c' = object method m = 1 end +and d = object (self) + inherit c + method n = 2 + method as_c = (self :> c') +end;; +\end{caml_example} +It is also possible to use a virtual class. Inheriting from this class +simultaneously forces all methods of "c" to have the same +type as the methods of "c'". +\begin{caml_example}{toplevel} +class virtual c' = object method virtual m : int end;; +class c = object (self) inherit c' method m = 1 end;; +\end{caml_example} +One could think of defining the type abbreviation directly: +\begin{caml_example*}{toplevel} +type c' = ;; +\end{caml_example*} +However, the abbreviation "#c'" cannot be defined directly in a similar way. +It can only be defined by a class or a class-type definition. +This is because a "#"-abbreviation carries an implicit anonymous +variable ".." that cannot be explicitly named. +The closer you get to it is: +\begin{caml_example*}{toplevel} +type 'a c'_class = 'a constraint 'a = < m : int; .. >;; +\end{caml_example*} +with an extra type variable capturing the open object type. + +\section{Functional objects} +\pdfsection{Functional objects} +\label{ss:functional-objects} + +It is possible to write a version of class "point" without assignments +on the instance variables. The override construct "{< ... >}" returns a copy of +``self'' (that is, the current object), possibly changing the value of +some instance variables. +\begin{caml_example}{toplevel} +class functional_point y = + object + val x = y + method get_x = x + method move d = {< x = x + d >} + end;; +let p = new functional_point 7;; +p#get_x;; +(p#move 3)#get_x;; +p#get_x;; +\end{caml_example} +Note that the type abbreviation "functional_point" is recursive, which can +be seen in the class type of "functional_point": the type of self is "'a" +and "'a" appears inside the type of the method "move". + +The above definition of "functional_point" is not equivalent +to the following: +\begin{caml_example}{toplevel} +class bad_functional_point y = + object + val x = y + method get_x = x + method move d = new bad_functional_point (x+d) + end;; +\end{caml_example} +While objects of either class will behave the same, objects of their +subclasses will be different. In a subclass of "bad_functional_point", +the method "move" will +keep returning an object of the parent class. On the contrary, in a +subclass of "functional_point", the method "move" will return an +object of the subclass. + +Functional update is often used in conjunction with binary methods +as illustrated in section \ref{module:string}. + +\section{Cloning objects} +\pdfsection{Cloning objects} +\label{ss:cloning-objects} + +Objects can also be cloned, whether they are functional or imperative. +The library function "Oo.copy" makes a shallow copy of an object. That is, +it returns a new object that has the same methods and instance +variables as its argument. The +instance variables are copied but their contents are shared. +Assigning a new value to an instance variable of the copy (using a method +call) will not affect instance variables of the original, and conversely. +A deeper assignment (for example if the instance variable is a reference cell) +will of course affect both the original and the copy. + +The type of "Oo.copy" is the following: +\begin{caml_example}{toplevel} +Oo.copy;; +\end{caml_example} +The keyword "as" in that type binds the type variable "'a" to +the object type "< .. >". Therefore, "Oo.copy" takes an object with +any methods (represented by the ellipsis), and returns an object of +the same type. The type of "Oo.copy" is different from type "< .. > -> +< .. >" as each ellipsis represents a different set of methods. +Ellipsis actually behaves as a type variable. +\begin{caml_example}{toplevel} +let p = new point 5;; +let q = Oo.copy p;; +q#move 7; (p#get_x, q#get_x);; +\end{caml_example} +In fact, "Oo.copy p" will behave as "p#copy" assuming that a public +method "copy" with body "{< >}" has been defined in the class of "p". + +Objects can be compared using the generic comparison functions "=" and "<>". +Two objects are equal if and only if they are physically equal. In +particular, an object and its copy are not equal. +\begin{caml_example}{toplevel} +let q = Oo.copy p;; +p = q, p = p;; +\end{caml_example} +Other generic comparisons such as ("<", "<=", ...) can also be used on +objects. The +relation "<" defines an unspecified but strict ordering on objects. The +ordering relationship between two objects is fixed once for all after the +two objects have been created and it is not affected by mutation of fields. + +Cloning and override have a non empty intersection. +They are interchangeable when used within an object and without +overriding any field: +\begin{caml_example}{toplevel} +class copy = + object + method copy = {< >} + end;; +class copy = + object (self) + method copy = Oo.copy self + end;; +\end{caml_example} +Only the override can be used to actually override fields, and +only the "Oo.copy" primitive can be used externally. + +Cloning can also be used to provide facilities for saving and +restoring the state of objects. +\begin{caml_example}{toplevel} +class backup = + object (self : 'mytype) + val mutable copy = None + method save = copy <- Some {< copy = None >} + method restore = match copy with Some x -> x | None -> self + end;; +\end{caml_example} +The above definition will only backup one level. +The backup facility can be added to any class by using multiple inheritance. +\begin{caml_example}{toplevel} +class ['a] backup_ref x = object inherit ['a] oref x inherit backup end;; +let rec get p n = if n = 0 then p # get else get (p # restore) (n-1);; +let p = new backup_ref 0 in +p # save; p # set 1; p # save; p # set 2; +[get p 0; get p 1; get p 2; get p 3; get p 4];; +\end{caml_example} +We can define a variant of backup that retains all copies. (We also +add a method "clear" to manually erase all copies.) +\begin{caml_example}{toplevel} +class backup = + object (self : 'mytype) + val mutable copy = None + method save = copy <- Some {< >} + method restore = match copy with Some x -> x | None -> self + method clear = copy <- None + end;; +\end{caml_example} +\begin{caml_example}{toplevel} +class ['a] backup_ref x = object inherit ['a] oref x inherit backup end;; +let p = new backup_ref 0 in +p # save; p # set 1; p # save; p # set 2; +[get p 0; get p 1; get p 2; get p 3; get p 4];; +\end{caml_example} + + + +\section{Recursive classes} +\pdfsection{Recursive classes} +\label{ss:recursive-classes} + +Recursive classes can be used to define objects whose types are +mutually recursive. +\begin{caml_example}{toplevel} +class window = + object + val mutable top_widget = (None : widget option) + method top_widget = top_widget + end +and widget (w : window) = + object + val window = w + method window = window + end;; +\end{caml_example} +Although their types are mutually recursive, the classes "widget" and +"window" are themselves independent. + + +\section{Binary methods} +\pdfsection{Binary methods} +\label{ss:binary-methods} + +A binary method is a method which takes an argument of the same type +as self. The class "comparable" below is a template for classes with a +binary method "leq" of type "'a -> bool" where the type variable "'a" +is bound to the type of self. Therefore, "#comparable" expands to "< +leq : 'a -> bool; .. > as 'a". We see here that the binder "as" also +allows writing recursive types. +\begin{caml_example}{toplevel} +class virtual comparable = + object (_ : 'a) + method virtual leq : 'a -> bool + end;; +\end{caml_example} +We then define a subclass "money" of "comparable". The class "money" +simply wraps floats as comparable objects. We will extend it below with +more operations. We have to use a type constraint on the class parameter "x" +because the primitive "<=" is a polymorphic function in +OCaml. The "inherit" clause ensures that the type of objects +of this class is an instance of "#comparable". +\begin{caml_example}{toplevel} +class money (x : float) = + object + inherit comparable + val repr = x + method value = repr + method leq p = repr <= p#value + end;; +\end{caml_example} +% not explained: mutability can be hidden +Note that the type "money" is not a subtype of type +"comparable", as the self type appears in contravariant position +in the type of method "leq". +Indeed, an object "m" of class "money" has a method "leq" +that expects an argument of type "money" since it accesses +its "value" method. Considering "m" of type "comparable" would allow a +call to method "leq" on "m" with an argument that does not have a method +"value", which would be an error. + +Similarly, the type "money2" below is not a subtype of type "money". +\begin{caml_example}{toplevel} +class money2 x = + object + inherit money x + method times k = {< repr = k *. repr >} + end;; +\end{caml_example} +It is however possible to define functions that manipulate objects of +type either "money" or "money2": the function "min" +will return the minimum of any two objects whose type unifies with +"#comparable". The type of "min" is not the same as "#comparable -> +#comparable -> #comparable", as the abbreviation "#comparable" hides a +type variable (an ellipsis). Each occurrence of this abbreviation +generates a new variable. +\begin{caml_example}{toplevel} +let min (x : #comparable) y = + if x#leq y then x else y;; +\end{caml_example} +This function can be applied to objects of type "money" +or "money2". +\begin{caml_example}{toplevel} +(min (new money 1.3) (new money 3.1))#value;; +(min (new money2 5.0) (new money2 3.14))#value;; +\end{caml_example} + +More examples of binary methods can be found in sections +\ref{module:string} and \ref{module:set}. + +Note the use of override for method "times". +Writing "new money2 (k *. repr)" instead of "{< repr = k *. repr >}" +would not behave well with inheritance: in a subclass "money3" of "money2" +the "times" method would return an object of class "money2" but not of class +"money3" as would be expected. + +The class "money" could naturally carry another binary method. Here is a +direct definition: +\begin{caml_example}{toplevel} +class money x = + object (self : 'a) + val repr = x + method value = repr + method print = print_float repr + method times k = {< repr = k *. x >} + method leq (p : 'a) = repr <= p#value + method plus (p : 'a) = {< repr = x +. p#value >} + end;; +\end{caml_example} + +\section{Friends} +\pdfsection{Friends} +\label{ss:friends} + +The above class "money" reveals a problem that often occurs with binary +methods. In order to interact with other objects of the same class, the +representation of "money" objects must be revealed, using a method such as +"value". If we remove all binary methods (here "plus" and "leq"), +the representation can easily be hidden inside objects by removing the method +"value" as well. However, this is not possible as soon as some binary +method requires access to the representation of objects of the same +class (other than self). +\begin{caml_example}{toplevel} +class safe_money x = + object (self : 'a) + val repr = x + method print = print_float repr + method times k = {< repr = k *. x >} + end;; +\end{caml_example} +Here, the representation of the object is known only to a particular object. +To make it available to other objects of the same class, we are forced to +make it available to the whole world. However we can easily restrict the +visibility of the representation using the module system. +\begin{caml_example*}{toplevel} +module type MONEY = + sig + type t + class c : float -> + object ('a) + val repr : t + method value : t + method print : unit + method times : float -> 'a + method leq : 'a -> bool + method plus : 'a -> 'a + end + end;; +module Euro : MONEY = + struct + type t = float + class c x = + object (self : 'a) + val repr = x + method value = repr + method print = print_float repr + method times k = {< repr = k *. x >} + method leq (p : 'a) = repr <= p#value + method plus (p : 'a) = {< repr = x +. p#value >} + end + end;; +\end{caml_example*} +Another example of friend functions may be found in section +\ref{module:set}. These examples occur when a group of objects (here +objects of the same class) and functions should see each others internal +representation, while their representation should be hidden from the +outside. The solution is always to define all friends in the same module, +give access to the representation and use a signature constraint to make the +representation abstract outside the module. + + + +% LocalWords: typecheck monomorphic uncaptured Subtyping subtyping leq repr Oo +% LocalWords: val sig bool Euro struct OCaml Vouillon Didier int ref incr init +% LocalWords: succ mytype rec + diff --git a/manual/manual/tutorials/polymorphism.etex b/manual/manual/tutorials/polymorphism.etex new file mode 100644 index 00000000..5402ec36 --- /dev/null +++ b/manual/manual/tutorials/polymorphism.etex @@ -0,0 +1,477 @@ + +\chapter{Polymorphism and its limitations}% +\label{c:polymorphism} +\pdfchapterfold{0}{Tutorial: Polymorphism limitations} +%HEVEA\cutname{polymorphism.html} + +\bigskip + +\noindent This chapter covers more advanced questions related to the +limitations of polymorphic functions and types. There are some situations +in OCaml where the type inferred by the type checker may be less generic +than expected. Such non-genericity can stem either from interactions +between side-effect and typing or the difficulties of implicit polymorphic +recursion and higher-rank polymorphism. + +This chapter details each of these situations and, if it is possible, +how to recover genericity. + +\section{Weak polymorphism and mutation} +\subsection{Weakly polymorphic types} +\label{ss:weaktypes} +Maybe the most frequent examples of non-genericity derive from the +interactions between polymorphic types and mutation. A simple example +appears when typing the following expression +\begin{caml_example}{toplevel} +let store = ref None ;; +\end{caml_example} +Since the type of "None" is "'a option" and the function "ref" has type +"'b -> 'b ref", a natural deduction for the type of "store" would be +"'a option ref". However, the inferred type, "'_weak1 option ref", is +different. Type variables whose name starts with a "_weak" prefix like +"'_weak1" are weakly polymorphic type variables, sometimes shortened as +weak type variables. +A weak type variable is a placeholder for a single type that is currently +unknown. Once the specific type "t" behind the placeholder type "'_weak1" +is known, all occurrences of "'_weak1" will be replaced by "t". For instance, +we can define another option reference and store an "int" inside: +\begin{caml_example}{toplevel} +let another_store = ref None ;; +another_store := Some 0; +another_store ;; +\end{caml_example} +After storing an "int" inside "another_store", the type of "another_store" has +been updated from "'_weak2 option ref" to "int option ref". +This distinction between weakly and generic polymorphic type variable protects +OCaml programs from unsoundness and runtime errors. To understand from where +unsoundness might come, consider this simple function which swaps a value "x" +with the value stored inside a "store" reference, if there is such value: +\begin{caml_example}{toplevel} +let swap store x = match !store with + | None -> store := Some x; x + | Some y -> store := Some x; y;; +\end{caml_example} +We can apply this function to our store +\begin{caml_example}{toplevel} +let one = swap store 1 +let one_again = swap store 2 +let two = swap store 3;; +\end{caml_example} +After these three swaps the stored value is "3". Everything is fine up to +now. We can then try to swap "3" with a more interesting value, for +instance a function: +\begin{caml_example}{toplevel}[error] +let error = swap store (fun x -> x);; +\end{caml_example} +At this point, the type checker rightfully complains that it is not +possible to swap an integer and a function, and that an "int" should always +be traded for another "int". Furthermore, the type checker prevents us to +change manually the type of the value stored by "store": +\begin{caml_example}{toplevel}[error] +store := Some (fun x -> x);; +\end{caml_example} +Indeed, looking at the type of store, we see that the weak type "'_weak1" has +been replaced by the type "int" +\begin{caml_example}{toplevel} +store;; +\end{caml_example} +Therefore, after placing an "int" in "store", we cannot use it to store any +value other than an "int". More generally, weak types protect the program from +undue mutation of values with a polymorphic type. + +%todo: fix indentation in pdfmanual +Moreover, weak types cannot appear in the signature of toplevel modules: +types must be known at compilation time. Otherwise, different compilation +units could replace the weak type with different and incompatible types. +For this reason, compiling the following small piece of code +\begin{verbatim} +let option_ref = ref None +\end{verbatim} +yields a compilation error +\begin{verbatim} +Error: The type of this expression, '_weak1 option ref, + contains type variables that cannot be generalized +\end{verbatim} +To solve this error, it is enough to add an explicit type annotation to +specify the type at declaration time: +\begin{verbatim} +let option_ref: int option ref = ref None +\end{verbatim} +This is in any case a good practice for such global mutable variables. +Otherwise, they will pick out the type of first use. If there is a mistake +at this point, this can result in confusing type errors when later, correct +uses are flagged as errors. + +\subsection{The value restriction}\label{ss:valuerestriction} + +Identifying the exact context in which polymorphic types should be +replaced by weak types in a modular way is a difficult question. Indeed +the type system must handle the possibility that functions may hide persistent +mutable states. For instance, the following function uses an internal reference +to implement a delayed identity function +\begin{caml_example}{toplevel} +let make_fake_id () = + let store = ref None in + fun x -> swap store x ;; +let fake_id = make_fake_id();; +\end{caml_example} +It would be unsound to apply this "fake_id" function to values with different +types. The function "fake_id" is therefore rightfully assigned the type +"'_weak3 -> '_weak3" rather than "'a -> 'a". At the same time, it ought to +be possible to use a local mutable state without impacting the type of a +function. +%todo: add an example? + +To circumvent these dual difficulties, the type checker considers that any value +returned by a function might rely on persistent mutable states behind the scene +and should be given a weak type. This restriction on the type of mutable +values and the results of function application is called the value restriction. +Note that this value restriction is conservative: there are situations where the +value restriction is too cautious and gives a weak type to a value that could be +safely generalized to a polymorphic type: +\begin{caml_example}{toplevel} +let not_id = (fun x -> x) (fun x -> x);; +\end{caml_example} +Quite often, this happens when defining function using higher order function. +To avoid this problem, a solution is to add an explicit argument to the +function: +\begin{caml_example}{toplevel} +let id_again = fun x -> (fun x -> x) (fun x -> x) x;; +\end{caml_example} +With this argument, "id_again" is seen as a function definition by the type +checker and can therefore be generalized. This kind of manipulation is called +eta-expansion in lambda calculus and is sometimes referred under this name. + +\subsection{The relaxed value restriction} + +There is another partial solution to the problem of unnecessary weak type, +which is implemented directly within the type checker. Briefly, it is possible +to prove that weak types that only appear as type parameters in covariant +positions --also called positive positions-- can be safely generalized to +polymorphic types. For instance, the type "'a list" is covariant in "'a": +\begin{caml_example}{toplevel} + let f () = [];; + let empty = f ();; +\end{caml_example} +Remark that the type inferred for "empty" is "'a list" and not "'_weak5 list" +that should have occurred with the value restriction since "f ()" is a +function application. + +The value restriction combined with this generalization for covariant type +parameters is called the relaxed value restriction. + +%question: is here the best place for describing variance? +\subsection{Variance and value restriction} +Variance describes how type constructors behave with respect to subtyping. +Consider for instance a pair of type "x" and "xy" with "x" a subtype of "xy", +denoted "x :> xy": +\begin{caml_example}{toplevel} + type x = [ `X ];; + type xy = [ `X | `Y ];; +\end{caml_example} +As "x" is a subtype of "xy", we can convert a value of type "x" +to a value of type "xy": +\begin{caml_example}{toplevel} + let x:x = `X;; + let x' = ( x :> xy);; +\end{caml_example} +Similarly, if we have a value of type "x list", we can convert it to a value +of type "xy list", since we could convert each element one by one: +\begin{caml_example}{toplevel} + let l:x list = [`X; `X];; + let l' = ( l :> xy list);; +\end{caml_example} +In other words, "x :> xy" implies that "x list :> xy list", therefore +the type constructor "'a list" is covariant (it preserves subtyping) +in its parameter "'a". + +Contrarily, if we have a function that can handle values of type "xy" +\begin{caml_example}{toplevel} + let f: xy -> unit = function + | `X -> () + | `Y -> ();; +\end{caml_example} +it can also handle values of type "x": +\begin{caml_example}{toplevel} + let f' = (f :> x -> unit);; +\end{caml_example} +Note that we can rewrite the type of "f" and "f'" as +\begin{caml_example}{toplevel} + type 'a proc = 'a -> unit + let f' = (f: xy proc :> x proc);; +\end{caml_example} +In this case, we have "x :> xy" implies "xy proc :> x proc". Notice +that the second subtyping relation reverse the order of "x" and "xy": +the type constructor "'a proc" is contravariant in its parameter "'a". +More generally, the function type constructor "'a -> 'b" is covariant in +its return type "'b" and contravariant in its argument type "'a". + +A type constructor can also be invariant in some of its type parameters, +neither covariant nor contravariant. A typical example is a reference: +\begin{caml_example}{toplevel} + let x: x ref = ref `X;; +\end{caml_example} +If we were able to coerce "x" to the type "xy ref" as a variable "xy", +we could use "xy" to store the value "`Y" inside the reference and then use +the "x" value to read this content as a value of type "x", +which would break the type system. + +More generally, as soon as a type variable appears in a position describing +mutable state it becomes invariant. As a corollary, covariant variables will +never denote mutable locations and can be safely generalized. +For a better description, interested readers can consult the original +article by Jacques Garrigue on +\url{http://www.math.nagoya-u.ac.jp/~garrigue/papers/morepoly-long.pdf} + +Together, the relaxed value restriction and type parameter covariance +help to avoid eta-expansion in many situations. + +\subsection{Abstract data types} +Moreover, when the type definitions are exposed, the type checker +is able to infer variance information on its own and one can benefit from +the relaxed value restriction even unknowingly. However, this is not the case +anymore when defining new abstract types. As an illustration, we can define a +module type collection as: +\begin{caml_example}{toplevel} +module type COLLECTION = sig + type 'a t + val empty: unit -> 'a t +end + +module Implementation = struct + type 'a t = 'a list + let empty ()= [] +end;; + +module List2: COLLECTION = Implementation;; +\end{caml_example} + +In this situation, when coercing the module "List2" to the module type +"COLLECTION", the type checker forgets that "'a List2.t" was covariant +in "'a". Consequently, the relaxed value restriction does not apply anymore: + +\begin{caml_example}{toplevel} + List2.empty ();; +\end{caml_example} + +To keep the relaxed value restriction, we need to declare the abstract type +"'a COLLECTION.t" as covariant in "'a": +\begin{caml_example}{toplevel} +module type COLLECTION = sig + type +'a t + val empty: unit -> 'a t +end + +module List2: COLLECTION = Implementation;; +\end{caml_example} + +We then recover polymorphism: + +\begin{caml_example}{toplevel} + List2.empty ();; +\end{caml_example} + +\section{Polymorphic recursion}\label{s:polymorphic-recursion} + +The second major class of non-genericity is directly related to the problem +of type inference for polymorphic functions. In some circumstances, the type +inferred by OCaml might be not general enough to allow the definition of +some recursive functions, in particular for recursive function acting on +non-regular algebraic data type. + +With a regular polymorphic algebraic data type, the type parameters of +the type constructor are constant within the definition of the type. For +instance, we can look at arbitrarily nested list defined as: +\begin{caml_example}{toplevel} + type 'a regular_nested = List of 'a list | Nested of 'a regular_nested list + let l = Nested[ List [1]; Nested [List[2;3]]; Nested[Nested[]] ];; +\end{caml_example} +Note that the type constructor "regular_nested" always appears as +"'a regular_nested" in the definition above, with the same parameter +"'a". Equipped with this type, one can compute a maximal depth with +a classic recursive function +\begin{caml_example}{toplevel} + let rec maximal_depth = function + | List _ -> 1 + | Nested [] -> 0 + | Nested (a::q) -> 1 + max (maximal_depth a) (maximal_depth (Nested q));; +\end{caml_example} + +Non-regular recursive algebraic data types correspond to polymorphic algebraic +data types whose parameter types vary between the left and right side of +the type definition. For instance, it might be interesting to define a datatype +that ensures that all lists are nested at the same depth: +\begin{caml_example}{toplevel} + type 'a nested = List of 'a list | Nested of 'a list nested;; +\end{caml_example} +Intuitively, a value of type "'a nested" is a list of list \dots of list of +elements "a" with "k" nested list. We can then adapt the "maximal_depth" +function defined on "regular_depth" into a "depth" function that computes this +"k". As a first try, we may define +\begin{caml_example}{toplevel}[error] +let rec depth = function + | List _ -> 1 + | Nested n -> 1 + depth n;; +\end{caml_example} +The type error here comes from the fact that during the definition of "depth", +the type checker first assigns to "depth" the type "'a -> 'b ". +When typing the pattern matching, "'a -> 'b" becomes "'a nested -> 'b", then +"'a nested -> int" once the "List" branch is typed. +However, when typing the application "depth n" in the "Nested" branch, +the type checker encounters a problem: "depth n" is applied to +"'a list nested", it must therefore have the type +"'a list nested -> 'b". Unifying this constraint with the previous one +leads to the impossible constraint "'a list nested = 'a nested". +In other words, within its definition, the recursive function "depth" is +applied to values of type "'a t" with different types "'a" due to the +non-regularity of the type constructor "nested". This creates a problem because +the type checker had introduced a new type variable "'a" only at the +\emph{definition} of the function "depth" whereas, here, we need a +different type variable for every \emph{application} of the function "depth". + +\subsection{Explicitly polymorphic annotations} +The solution of this conundrum is to use an explicitly polymorphic type +annotation for the type "'a": +\begin{caml_example}{toplevel} +let rec depth: 'a. 'a nested -> int = function + | List _ -> 1 + | Nested n -> 1 + depth n;; +depth ( Nested(List [ [7]; [8] ]) );; +\end{caml_example} +In the type of "depth", "'a.'a nested -> int", the type variable "'a" +is universally quantified. In other words, "'a.'a nested -> int" reads as +``for all type "'a", "depth" maps "'a nested" values to integers''. +Whereas the standard type "'a nested -> int" can be interpreted +as ``let be a type variable "'a", then "depth" maps "'a nested" values +to integers''. There are two major differences with these two type +expressions. First, the explicit polymorphic annotation indicates to the +type checker that it needs to introduce a new type variable every times +the function "depth" is applied. This solves our problem with the definition +of the function "depth". + +Second, it also notifies the type checker that the type of the function should +be polymorphic. Indeed, without explicit polymorphic type annotation, the +following type annotation is perfectly valid +\begin{caml_example}{toplevel} + let sum: 'a -> 'b -> 'c = fun x y -> x + y;; +\end{caml_example} +since "'a","'b" and "'c" denote type variables that may or may not be +polymorphic. Whereas, it is an error to unify an explicitly polymorphic type +with a non-polymorphic type: +\begin{caml_example}{toplevel}[error] + let sum: 'a 'b 'c. 'a -> 'b -> 'c = fun x y -> x + y;; +\end{caml_example} + +An important remark here is that it is not needed to explicit fully +the type of "depth": it is sufficient to add annotations only for the +universally quantified type variables: +\begin{caml_example}{toplevel} +let rec depth: 'a. 'a nested -> _ = function + | List _ -> 1 + | Nested n -> 1 + depth n;; +depth ( Nested(List [ [7]; [8] ]) );; +\end{caml_example} + +%todo: add a paragraph on the interaction with locally abstract type + +\subsection{More examples} +With explicit polymorphic annotations, it becomes possible to implement +any recursive function that depends only on the structure of the nested +lists and not on the type of the elements. For instance, a more complex +example would be to compute the total number of elements of the nested +lists: +\begin{caml_example}{toplevel} + let len nested = + let map_and_sum f = List.fold_left (fun acc x -> acc + f x) 0 in + let rec len: 'a. ('a list -> int ) -> 'a nested -> int = + fun nested_len n -> + match n with + | List l -> nested_len l + | Nested n -> len (map_and_sum nested_len) n + in + len List.length nested;; +len (Nested(Nested(List [ [ [1;2]; [3] ]; [ []; [4]; [5;6;7]]; [[]] ])));; +\end{caml_example} + +Similarly, it may be necessary to use more than one explicitly +polymorphic type variables, like for computing the nested list of +list lengths of the nested list: +\begin{caml_example}{toplevel} +let shape n = + let rec shape: 'a 'b. ('a nested -> int nested) -> + ('b list list -> 'a list) -> 'b nested -> int nested + = fun nest nested_shape -> + function + | List l -> raise + (Invalid_argument "shape requires nested_list of depth greater than 1") + | Nested (List l) -> nest @@ List (nested_shape l) + | Nested n -> + let nested_shape = List.map nested_shape in + let nest x = nest (Nested x) in + shape nest nested_shape n in + shape (fun n -> n ) (fun l -> List.map List.length l ) n;; + +shape (Nested(Nested(List [ [ [1;2]; [3] ]; [ []; [4]; [5;6;7]]; [[]] ])));; +\end{caml_example} + +\section{Higher-rank polymorphic functions} + +Explicit polymorphic annotations are however not sufficient to cover all +the cases where the inferred type of a function is less general than +expected. A similar problem arises when using polymorphic functions as arguments +of higher-order functions. For instance, we may want to compute the average +depth or length of two nested lists: +\begin{caml_example}{toplevel} + let average_depth x y = (depth x + depth y) / 2;; + let average_len x y = (len x + len y) / 2;; + let one = average_len (List [2]) (List [[]]);; +\end{caml_example} +It would be natural to factorize these two definitions as: +\begin{caml_example}{toplevel} + let average f x y = (f x + f y) / 2;; +\end{caml_example} +However, the type of "average len" is less generic than the type of +"average_len", since it requires the type of the first and second argument to +be the same: +\begin{caml_example}{toplevel} + average_len (List [2]) (List [[]]);; + average len (List [2]) (List [[]])[@@expect error];; +\end{caml_example} + +As previously with polymorphic recursion, the problem stems from the fact that +type variables are introduced only at the start of the "let" definitions. When +we compute both "f x" and "f y", the type of "x" and "y" are unified together. +To avoid this unification, we need to indicate to the type checker +that f is polymorphic in its first argument. In some sense, we would want +"average" to have type +\begin{verbatim} +val average: ('a. 'a nested -> int) -> 'a nested -> 'b nested -> int +\end{verbatim} +Note that this syntax is not valid within OCaml: "average" has an universally +quantified type "'a" inside the type of one of its argument whereas for +polymorphic recursion the universally quantified type was introduced before +the rest of the type. This position of the universally quantified type means +that "average" is a second-rank polymorphic function. This kind of higher-rank +functions is not directly supported by OCaml: type inference for second-rank +polymorphic function and beyond is undecidable; therefore using this kind of +higher-rank functions requires to handle manually these universally quantified +types. + +In OCaml, there are two ways to introduce this kind of explicit universally +quantified types: universally quantified record fields, +\begin{caml_example}{toplevel} + type 'a nested_reduction = { f:'elt. 'elt nested -> 'a };; + let boxed_len = { f = len };; +\end{caml_example} +and universally quantified object methods: +\begin{caml_example}{toplevel} + let obj_len = object method f:'a. 'a nested -> 'b = len end;; +\end{caml_example} +To solve our problem, we can therefore use either the record solution: +\begin{caml_example}{toplevel} + let average nsm x y = (nsm.f x + nsm.f y) / 2 ;; +\end{caml_example} +or the object one: +\begin{caml_example}{toplevel} + let average (obj: _ > ) x y = (obj#f x + obj#f y) / 2 ;; +\end{caml_example} diff --git a/manual/styles/altindex.sty b/manual/styles/altindex.sty new file mode 100644 index 00000000..d236e714 --- /dev/null +++ b/manual/styles/altindex.sty @@ -0,0 +1,39 @@ +%% An attempt to have several index files +%% +%% Defines \altindex{filename}{word to index} +%% and \makealtindex{filename} +%% +%% It is possible to define a macro for each index as follows: +%% \newcommand{\myindex}{\altindex{myindexfile}} +%% +%% This code is not really clean, there are still a number of things +%% that I don't understand... but it works. + +%% \makealtindex{filename} opens filename.idx for writing. + +\def\makealtindex#1{\if@filesw + \expandafter\newwrite\csname @#1altindexfile\endcsname + \immediate\openout\expandafter\csname @#1altindexfile\endcsname=#1.idx + \typeout{Writing alternate index file #1.idx}\fi} + +%% \@wraltindex makes the assumes that a trailing `\fi' will get bound +%% to #2. So, it `eats' it as second parameter and reinserts it. +%% Quick and dirty, I know... +%% Writes the index entry #3 into #1. + +\def\@wraltindex#1#2#3{\let\thepage\relax + \xdef\@gtempa{\write#1{\string + \indexentry{#3}{\thepage}}}\fi\endgroup\@gtempa + \if@nobreak \ifvmode\nobreak\fi\fi\@esphack} + +%% \altindex{filename}{index entry} does nothing if +%% \@altindexfile is \relax (i.e. filename.idx not open). +%% Otherwise, writes the index entry, and closes the whole stuff (some +%% groups, and some \if). + +\def\altindex#1{\@bsphack\begingroup + \def\protect##1{\string##1\space}\@sanitize + \@ifundefined{@#1altindexfile}% + {\endgroup\@esphack}% + {\@wraltindex{\expandafter\csname @#1altindexfile\endcsname}} +} diff --git a/manual/styles/caml-sl.sty b/manual/styles/caml-sl.sty new file mode 100644 index 00000000..6bcfefe8 --- /dev/null +++ b/manual/styles/caml-sl.sty @@ -0,0 +1,61 @@ +% CAML style option, for use with the caml-latex filter. + +\typeout{Document Style option `caml-sl' <7 Apr 92>.} +\newcommand{\hash}{\#} +{\catcode`\^^M=\active % + \gdef\@camlinputline#1^^M{\normalsize\tt\hash{} #1\par} % + \gdef\@camloutputline#1^^M{\small\ttfamily\slshape#1\par} } % +\def\@camlblankline{\medskip} +\chardef\@camlbackslash="5C +\def\@bunderline{\setbox0\hbox\bgroup\let\par\@parinunderline} + +\def \@parinunderline {\futurelet \@next \@@parinunderline} +\def \@@parinunderline {\ifx \@next \? \let \@do \@@par@inunderline \else \let \@do \@@@parinunderline \fi \@do} +\def \@@par@inunderline #1{\@eunderline\@oldpar\?\@bunderline} +\def \@@@parinunderline {\@eunderline\@oldpar\@bunderline} +\def\@eunderline{\egroup\underline{\box0}} +\def\@camlnoop{} + +\def\caml{ + \bgroup + \parindent 0pt + \parskip 0pt + \let\do\@makeother\dospecials + \catcode13=\active % 13 = ^M = CR + \catcode92=0 % 92 = \ + \catcode32=\active % 32 = SPC + \frenchspacing + \@vobeyspaces + \let\@oldpar\par + \let\?\@camlinputline + \let\:\@camloutputline + \let\;\@camlblankline + \let\<\@bunderline + \let\>\@eunderline + \let\\\@camlbackslash + \let\-\@camlnoop +} + +\def\endcaml{ + \egroup + \addvspace{\medskipamount} +} + +% Caml-example related command +\def\camlexample#1{ + \ifnum\pdfstrcmp{#1}{toplevel}=0 + \renewcommand{\hash}{\#} + \else + \renewcommand{\hash}{} + \fi + \begin{flushleft} +} +\def\endcamlexample{\end{flushleft}\renewcommand{\hash}{\#}} +\def\camlinput{} +\def\endcamlinput{} +\def\camloutput{} +\def\endcamloutput{} +\def\camlerror{} +\def\endcamlerror{} +\def\camlwarn{} +\def\endcamlwarn{} diff --git a/manual/styles/caml.sty b/manual/styles/caml.sty new file mode 100644 index 00000000..3f5753ca --- /dev/null +++ b/manual/styles/caml.sty @@ -0,0 +1,31 @@ +% CAML style option, for use with the caml-latex filter. + +\typeout{Document Style option `caml' <7 Apr 92>.} + +{\catcode`\^^M=\active % + \gdef\@camlinputline#1^^M{\tt\##1\par} % + \gdef\@camloutputline#1^^M{\tt#1\par} } % +\def\@camlblankline{\medskip} +\chardef\@camlbackslash="5C + +\def\caml{ + \bgroup + \flushleft + \parindent 0pt + \parskip 0pt + \let\do\@makeother\dospecials + \catcode`\^^M=\active + \catcode`\\=0 + \catcode`\ \active + \frenchspacing + \@vobeyspaces + \let\?\@camlinputline + \let\:\@camloutputline + \let\;\@camlblankline + \let\\\@camlbackslash +} + +\def\endcaml{ + \endflushleft + \egroup\noindent +} diff --git a/manual/styles/doc.tfm b/manual/styles/doc.tfm new file mode 100644 index 00000000..d010f29e Binary files /dev/null and b/manual/styles/doc.tfm differ diff --git a/manual/styles/docbf.tfm b/manual/styles/docbf.tfm new file mode 100644 index 00000000..d010f29e Binary files /dev/null and b/manual/styles/docbf.tfm differ diff --git a/manual/styles/docit.tfm b/manual/styles/docit.tfm new file mode 100644 index 00000000..d010f29e Binary files /dev/null and b/manual/styles/docit.tfm differ diff --git a/manual/styles/docmi.tfm b/manual/styles/docmi.tfm new file mode 100644 index 00000000..d010f29e Binary files /dev/null and b/manual/styles/docmi.tfm differ diff --git a/manual/styles/docrm.tfm b/manual/styles/docrm.tfm new file mode 100644 index 00000000..d010f29e Binary files /dev/null and b/manual/styles/docrm.tfm differ diff --git a/manual/styles/doctt.tfm b/manual/styles/doctt.tfm new file mode 100644 index 00000000..d010f29e Binary files /dev/null and b/manual/styles/doctt.tfm differ diff --git a/manual/styles/fullpage.sty b/manual/styles/fullpage.sty new file mode 100644 index 00000000..6ecbeb76 --- /dev/null +++ b/manual/styles/fullpage.sty @@ -0,0 +1,2 @@ +\marginparwidth 0pt \oddsidemargin 0pt \evensidemargin 0pt \marginparsep 0pt +\topmargin 0pt \textwidth 6.5in \textheight 8.5 in diff --git a/manual/styles/html.sty b/manual/styles/html.sty new file mode 100644 index 00000000..6a9e9253 --- /dev/null +++ b/manual/styles/html.sty @@ -0,0 +1,222 @@ +% LaTeX2HTML Version 0.6.4 : html.sty +% +% This file contains definitions of LaTeX commands which are +% processed in a special way by the translator. +% For example, there are commands for embedding external hypertext links, +% for cross-references between documents or for including +% raw HTML. +% This file includes the comments.sty file v2.0 by Victor Eijkhout +% In most cases these commands do nothing when processed by LaTeX. + +% Modifications: +% +% nd = Nikos Drakos +% jz = Jelle van Zeijl + +% jz 22-APR-94 - Added support for htmlref +% nd - Created + + + +% Exit if the style file is already loaded +% (suggested by Lee Shombert +\ifx \htmlstyloaded\relax \endinput\else\let\htmlstyloaded\relax\fi + +%%% LINKS TO EXTERNAL DOCUMENTS +% +% This can be used to provide links to arbitrary documents. +% The first argumment should be the text that is going to be +% highlighted and the second argument a URL. +% The hyperlink will appear as a hyperlink in the HTML +% document and as a footnote in the dvi or ps files. +% +\newcommand{\htmladdnormallinkfoot}[2]{ #1\footnote{#2}} + +% This is an alternative definition of the command above which +% will ignore the URL in the dvi or ps files. +\newcommand{\htmladdnormallink}[2]{ #1 } + +% This command takes as argument a URL pointing to an image. +% The image will be embedded in the HTML document but will +% be ignored in the dvi and ps files. +% +\newcommand{\htmladdimg}[1]{ } + +%%% CROSS-REFERENCES BETWEEN (LOCAL OR REMOTE) DOCUMENTS +% +% This can be used to refer to symbolic labels in other Latex +% documents that have already been processed by the translator. +% The arguments should be: +% #1 : the URL to the directory containing the external document +% #2 : the path to the labels.pl file of the external document. +% If the external document lives on a remote machine then labels.pl +% must be copied on the local machine. +% +%e.g. \externallabels{http://cbl.leeds.ac.uk/nikos/WWW/doc/tex2html/latex2html} +% {/usr/cblelca/nikos/tmp/labels.pl} +% The arguments are ignored in the dvi and ps files. +% +\newcommand{\externallabels}[2]{ } + +% This complements the \externallabels command above. The argument +% should be a label defined in another latex document and will be +% ignored in the dvi and ps files. +% +\newcommand{\externalref}[1]{ } + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Comment.sty version 2.0, 19 June 1992 +% selectively in/exclude pieces of text: the user can define new +% comment versions, and each is controlled separately. +% This style can be used with plain TeX or LaTeX, and probably +% most other packages too. +% +% Examples of use in LaTeX and TeX follow \endinput +% +% Author +% Victor Eijkhout +% Department of Computer Science +% University Tennessee at Knoxville +% 104 Ayres Hall +% Knoxville, TN 37996 +% USA +% +% eijkhout@cs.utk.edu +% +% Usage: all text included in between +% \comment ... \endcomment +% or \begin{comment} ... \end{comment} +% is discarded. The closing command should appear on a line +% of its own. No starting spaces, nothing after it. +% This environment should work with arbitrary amounts +% of comment. +% +% Other 'comment' environments are defined by +% and are selected/deselected with +% \includecomment{versiona} +% \excludecoment{versionb} +% +% These environments are used as +% \versiona ... \endversiona +% or \begin{versiona} ... \end{versiona} +% with the closing command again on a line of its own. +% +% Basic approach: +% to comment something out, scoop up every line in verbatim mode +% as macro argument, then throw it away. +% For inclusions, both the opening and closing comands +% are defined as noop +% +% Changed \next to \html@next to prevent clashes with other sty files +% (mike@emn.fr) +% Changed \html@next to \htmlnext so the \makeatletter and +% \makeatother commands could be removed (they were cuasing other +% style files - changebar.sty - to crash) (nikos@cbl.leeds.ac.uk) + + +\def\makeinnocent#1{\catcode`#1=12 } +\def\csarg#1#2{\expandafter#1\csname#2\endcsname} + +\def\ThrowAwayComment#1{\begingroup + \def\CurrentComment{#1}% + \let\do\makeinnocent \dospecials + \makeinnocent\^^L% and whatever other special cases + \endlinechar`\^^M \catcode`\^^M=12 \xComment} +{\catcode`\^^M=12 \endlinechar=-1 % + \gdef\xComment#1^^M{\def\test{#1} + \csarg\ifx{PlainEnd\CurrentComment Test}\test + \let\htmlnext\endgroup + \else \csarg\ifx{LaLaEnd\CurrentComment Test}\test + \edef\htmlnext{\endgroup\noexpand\end{\CurrentComment}} + \else \let\htmlnext\xComment + \fi \fi \htmlnext} +} + +\def\includecomment + #1{\expandafter\def\csname#1\endcsname{}% + \expandafter\def\csname end#1\endcsname{}} +\def\excludecomment + #1{\expandafter\def\csname#1\endcsname{\ThrowAwayComment{#1}}% + {\escapechar=-1\relax + \csarg\xdef{PlainEnd#1Test}{\string\\end#1}% + \csarg\xdef{LaLaEnd#1Test}{\string\\end\string\{#1\string\}}% + }} + +\excludecomment{comment} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%% RAW HTML +% +% Enclose raw HTML between a \begin{rawhtml} and \end{rawhtml}. +% The html environment ignores its body +% +\excludecomment{rawhtml} + +%%% HTML ONLY +% +% Enclose LaTeX constructs which will only appear in the +% HTML output and will be ignored by LaTeX with +% \begin{htmlonly} and \end{htmlonly} +% +\excludecomment{htmlonly} + +%%% LaTeX ONLY +% Enclose LaTeX constructs which will only appear in the +% DVI output and will be ignored by latex2html with +%\begin{latexonly} and \end{latexonly} +% +\newenvironment{latexonly}{}{} + +%%% HYPERREF +% Suggested by Eric M. Carol +% Similar to \ref but accepts conditional text. +% The first argument is HTML text which will become ``hyperized'' +% (underlined). +% The second and third arguments are text which will appear only in the paper +% version (DVI file), enclosing the fourth argument which is a reference to a label. +% +%e.g. \hyperref{using the tracer}{using the tracer (see Section}{)}{trace} +% where there is a corresponding \label{trace} +% +\newcommand{\hyperref}[4]{#2\ref{#4}#3} + +%%% HTMLREF +% Reference in HTML version only. +% Mix between \htmladdnormallink and \hyperref. +% First arg is text for in both versions, second is label for use in HTML +% version. +\newcommand{\htmlref}[2]{#1} + +%%% HTMLIMAGE +% This command can be used inside any environment that is converted +% into an inlined image (eg a "figure" environment) in order to change +% the way the image will be translated. The argument of \htmlimage +% is really a string of options separated by commas ie +% [scale=],[external],[thumbnail= +% The scale option allows control over the size of the final image. +% The ``external'' option will cause the image not to be inlined +% (images are inlined by default). External images will be accessible +% via a hypertext link. +% The ``thumbnail'' option will cause a small inlined image to be +% placed in the caption. The size of the thumbnail depends on the +% reduction factor. The use of the ``thumbnail'' option implies +% the ``external'' option. +% +% Example: +% \htmlimage{scale=1.5,external,thumbnail=0.2} +% will cause a small thumbnail image 1/5th of the original size to be +% placed in the final document, pointing to an external image 1.5 +% times bigger than the original. +% +\newcommand{\htmlimage}[1]{} + +%%% HTMLADDTONAVIGATION +% This command appends its argument to the buttons in the navigation +% panel. It is ignored by LaTeX. +% +% Example: +% \htmladdtonavigation{\htmladdnormallink +% {\htmladdimg{http://server/path/to/gif}} +% {http://server/path}} +\newcommand{\htmladdtonavigation}[1]{} diff --git a/manual/styles/isolatin.sty b/manual/styles/isolatin.sty new file mode 100644 index 00000000..9a685097 --- /dev/null +++ b/manual/styles/isolatin.sty @@ -0,0 +1,174 @@ +% 1-Jun-1992 +% +% File bases on iso1ibm.tex Version 1.0 of May, 9 1990 +\message{ISO-latin-1 input coding, version 0.9 of 1-Jun-1992.} +% +% For input of 8 bits character. +% This allows reading ISO-8859 Latin-1 codes. +% +\chardef \atcode = \the \catcode `\@ +\catcode `\@ = 11 +% +\catcode160=13 \def^^a0{{\bf?}} % 160 '240, "a0 +\catcode161=13 \def^^a1{!`} % 161 '241, "a1 +\catcode162=13 \def^^a2{{\bf?}} % 162 '242, "a2 +\catcode163=13 \def^^a3{\pounds{}} % 163 '243, "a3 +\catcode164=13 \def^^a4{{\bf?}} % 164 '244, "a4 +\catcode165=13 \def^^a5{{\bf?}} % 165 '245, "a5 +\catcode166=13 \def^^a6{$\vert$} % 166 '246, "a6 +\catcode167=13 \def^^a7{\S{}} % 167 '247, "a7 \S{} ISO-1, +\catcode168=13 \def^^a8{\"{ }} % 168 '250, "a8 +\catcode169=13 \def^^a9{\copyright{}}% 169, '251, "a9 +\catcode170=13 \def^^aa{{\bf?}} % 170 '252, "aa +\catcode171=13 % 171 '253, "ab, +\@ifundefined{lguill}{\def^^ab{$<<$}}{\def^^ab{\lguill}} +\catcode172=13 \def^^ac{{\bf?}} % 172 '254, "ac +\catcode173=13 \def^^ad{{\bf?}} % 173 '255 "ad +\catcode174=13 \def^^ae{{\bf?}} % 174 '256, "ae +\catcode175=13 \def^^af{{\bf?}} % 175 '257, "af +\catcode176=13 \def^^b0{{\bf?}} % 176 '260, "b0 ?? \No +\catcode177=13 \def^^b1{$\pm$} % 177 '261, "b1 ISO-1 plus-minus +\catcode178=13 \def^^b2{${}^2$} % 178, '262, "b2 +\catcode179=13 \def^^b3{${}^3$} % 179, '263, "b3 +\catcode180=13 \def^^b4{\'{ }} % 180, '264, "b4 +\catcode181=13 \def^^b5{{\bf?}} % 181, '265, "b5 +\catcode182=13 \def^^b6{\P{}} % 182, '266, "b6 +\catcode183=13 \def^^b7{$\cdot$} % 183, '267, "b7 +\catcode184=13 \def^^b8{\c{ }} % 184, '270, "b8 +\catcode185=13 \def^^b9{${}^1$} % 185, '271, "b9 +\catcode186=13 \def^^ba{{\bf?}} % 186, '272, "ba +\catcode187=13 % 187, '273, "bb +\@ifundefined{rguill}{\def^^bb{$>>$}}{\def^^bb{\rguill}} +\catcode188=13 \def^^bc{$\frac 1 4$} % 188, '274, "bc +\catcode189=13 \def^^bd{$\frac 1 2$} % 189, '275, "bd +\catcode190=13 \def^^be{$\frac 3 4$} % 190, '276, "be +\catcode191=13 \def^^bf{?`} % 191, '277, "bf +\catcode192=13 \def^^c0{\`A} % 192, '300, "c0 +\@ifundefined{@grave@A@grave@}{\def^^c0{\`A}}{\let^^c0=\@grave@A@grave@} +\catcode193=13 \def^^c1{\'A} % 193, '301, "c1 +\@ifundefined{@acute@A@acute@}{\def^^c1{\'A}}{\let^^c1=\@acute@A@acute@} +\catcode194=13 \def^^c2{\^A} % 194, '302, "c2 +\@ifundefined{@circflx@A@circflx@}{\def^^c2{\^A}}{\let^^c2=\@circflx@A@circflx@} +\catcode195=13 \def^^c3{\~A} % 195, '303, "c3 +\@ifundefined{@tileda@A@tilda@}{\def^^c3{\~A}}{\let^^c3=\@tileda@A@tilda@} +\catcode196=13 \def^^c4{\"A} % 196, '304, "c4 +\@ifundefined{@Umlaut@A@Umlaut@}{\def^^c4{\"A}}{\let^^c4=\@Umlaut@A@Umlaut@} +\catcode197=13 \def^^c5{\AA{}} % 197, '305, "c5 +\@ifundefined{@A@A@}{\def^^c5{\AA{}}}{\let^^c5=\@A@A@} +\catcode198=13 \def^^c6{\AE{}} % 198, '306, "c6 +\@ifundefined{@A@E@}{\def^^c6{\AE{}}}{\let^^c6=\@A@E@} +\catcode199=13 \def^^c7{\c{C}} % 199, '307, "c7 +\@ifundefined{@cedilla@C@cedilla}{\def^^c7{\c{C}}}{\let^^c7=\@cedilla@C@cedilla} +\catcode200=13 \def^^c8{\`E} % 200, '310, "c8 +\@ifundefined{@grave@E@grave@}{\def^^c8{\`E}}{\let^^c8=\@grave@E@grave@} +\catcode201=13 \def^^c9{\'E} % 201, '311, "c9 +\@ifundefined{@acute@E@acute@}{\def^^c9{\'E}}{\let^^c9=\@acute@E@acute@} +\catcode202=13 \def^^ca{\^E} % 202, '312, "ca +\@ifundefined{@circflx@E@circflx@}{\def^^ca{\^E}}{\let^^ca=\@circflx@E@circflx@} +\catcode203=13 \def^^cb{{\"E}} % 203, '313, "cb +\@ifundefined{@Umlaut@E@Umlaut@}{\def^^cb{\"E}}{\let^^cb=\@Umlaut@E@Umlaut@} +\catcode204=13 \def^^cc{\`I} % 204, '314, "cc +\@ifundefined{@grave@I@grave@}{\def^^cc{\`I}}{\let^^cc=\@grave@I@grave@} +\catcode205=13 \def^^cd{\'I} % 205, '315, "cd +\@ifundefined{@acute@I@acute@}{\def^^cd{\'I}}{\let^^cd=\@acute@I@acute@} +\catcode206=13 \def^^ce{\^I} % 206, '316, "ce +\@ifundefined{@circflx@I@circflx@}{\def^^ce{\^I}}{\let^^ce=\@circflx@I@circflx@} +\catcode207=13 \def^^cf{{\"I}} % 207, '317, "cf +\@ifundefined{@Umlaut@I@Umlaut@}{\def^^cf{\"I}}{\let^^cf=\@Umlaut@I@Umlaut@} +\catcode208=13 \def^^d0{\rlap{\raise0.3ex\hbox{--}}D} % 208, '320, "d0 +\@ifundefined{@Eth@}{}{\let^^d0=\@Eth@} +\catcode209=13 \def^^d1{¥} % 209, '321, "d1 +\@ifundefined{@tileda@N@tilda@}{\def^^d1{\~N}}{\let^^d1\@tileda@N@tilda@} +\catcode210=13 \def^^d2{\`O} % 210, '322, "d2 +\@ifundefined{@grave@O@grave@}{\def^^d2{\`O}}{\let^^d2=\@grave@O@grave@} +\catcode211=13 \def^^d3{\'O} % 211, '323, "d3 +\@ifundefined{@acute@O@acute@}{\def^^d3{\'O}}{\let^^d3\@acute@O@acute@} +\catcode212=13 \def^^d4{\^O} % 212, '324, "d4 +\@ifundefined{@circflx@O@circflx@}{\def^^d4{\^O}}{\let^^d4=\@circflx@O@circflx@} +\catcode213=13 \def^^d5{\~O} % 213, '325, "d5 +\@ifundefined{@tileda@O@tilda@}{\def^^d5{\~O}}{\let^^d5\@tileda@O@tilda@} +\catcode214=13 \def^^d6{\"O} % 214, '326, "d6 +\@ifundefined{@Umlaut@O@Umlaut@}{\def^^d6{\"O}}{\let^^d6=\@Umlaut@O@Umlaut@} +\catcode215=13 \def^^d7{$\times$}% 215, '327, "d7 +\catcode216=13 \def^^d8{\O{}} % 216, '330, "d8 +\@ifundefined{@OOO@}{\def^^d8{\O{}}}{\let^^d8=\@OOO@} +\catcode217=13 \def^^d9{\`U} % 217, '331, "d9 +\@ifundefined{@grave@U@grave@}{\def^^d9{\`U}}{\let^^d9=\@grave@U@grave@} +\catcode218=13 \def^^da{\'U} % 218, '332, "da +\@ifundefined{@acute@U@acute@}{\def^^da{\'U}}{\let^^da=\@acute@U@acute@} +\catcode219=13 \def^^db{\^U} % 219, '333, "db +\@ifundefined{@circflx@U@circflx@}{\def^^db{\^U}}{\let^^db=\@circflx@U@circflx@} +\catcode220=13 \def^^dc{\"U} % 220, '334, "dc +\@ifundefined{@Umlaut@U@Umlaut@}{\def^^dc{\"U}}{\let^^dc=\@Umlaut@U@Umlaut@} +\catcode221=13 \def^^dd{{\'Y}} % 221, '335, "dd +\@ifundefined{@acute@Y@acute@}{\def^^dd{\'Y}}{\let^^dd=\@acute@Y@acute@} +\catcode222=13 \def^^de{\lower 0.7ex \hbox{l}\hskip-1ex\relax b} % 222, '336, "de +\@ifundefined{@Thorn@}{}{\let^^de=\@Thorn@} +\catcode223=13 \def^^df{\ss{}} % 223, '337, "df +\@ifundefined{@sss@}{\def^^df{\ss{}}}{\let^^df=\@sss@} +\catcode224=13 \def^^e0{\`a} % 224, '340, "e0 +\@ifundefined{@grave@a@grave@}{\def^^e0{\`a}}{\let^^e0=\@grave@a@grave@} +\catcode225=13 \def^^e1{\'a} % 225, '341, "e1 +\@ifundefined{@acute@a@acute@}{\def^^e1{\'a}}{\let^^e1=\@acute@a@acute@} +\catcode226=13 \def^^e2{\^a} % 226, '342, "e2 +\@ifundefined{@circflx@a@circflx@}{\def^^e2{\^a}}{\let^^e2=\@circflx@a@circflx@} +\catcode227=13 \def^^e3{\~a} % 227, '343, "e3 +\@ifundefined{@tileda@a@tilda@}{\def^^e3{\~a}}{\let^^e3=\@tileda@a@tilda@} +\catcode228=13 \def^^e4{\"a} % 228, '344, "e4 +\@ifundefined{@Umlaut@a@Umlaut@}{\def^^e4{\"a}}{\let^^e4=\@Umlaut@a@Umlaut@} +\catcode229=13 \def^^e5{\aa{}} % 229, '345, "e5 +\@ifundefined{@a@a@}{\def^^e5{\aa{}}}{\let^^e5=\@a@a@} +\catcode230=13 \def^^e6{\ae{}} % 230, '346, "e6 +\@ifundefined{@a@e@}{\def^^e6{\ae{}}}{\let^^e6=\@a@e@} +\catcode231=13 \def^^e7{\c{c}} % 231, '347, "e7 +\@ifundefined{@cedilla@c@cedilla}{\def^^e7{\c{c}}}{\let^^e7=\@cedilla@c@cedilla} +\catcode232=13 \def^^e8{\`e} % 232, '350, "e8 +\@ifundefined{@grave@e@grave@}{\def^^e8{\`e}}{\let^^e8=\@grave@e@grave@} +\catcode233=13 \def^^e9{\'e} % 233, '351, "e9 +\@ifundefined{@acute@e@acute@}{\def^^e9{\'e}}{\let^^e9=\@acute@e@acute@} +\catcode234=13 \def^^ea{\^e} % 234, '352, "ea +\@ifundefined{@circflx@e@circflx@}{\def^^ea{\^e}}{\let^^ea=\@circflx@e@circflx@} +\catcode235=13 \def^^eb{\"e} % 235, '353, "eb +\@ifundefined{@Umlaut@e@Umlaut@}{\def^^eb{\"e}}{\let^^eb=\@Umlaut@e@Umlaut@} +\catcode236=13 \def^^ec{\`{\i}} % 236, '354, "ec +\@ifundefined{@grave@i@grave@}{\def^^ec{\`{\i}}}{\let^^ec=\@grave@i@grave@} +\catcode237=13 \def^^ed{\'{\i}} % 237, '355, "ed +\@ifundefined{@acute@i@acute@}{\def^^ed{\'{\i}}}{\let^^ed=\@acute@i@acute@} +\catcode238=13 \def^^ee{\^{\i}} % 238, '356, "ee +\@ifundefined{@circflx@i@circflx@}{\def^^ee{\^{\i}}}{\let^^ee=\@circflx@i@circflx@} +\catcode239=13 \def^^ef{\"{\i}} % 239, '357, "ef +\@ifundefined{@Umlaut@i@Umlaut@}{\def^^ef{\"{\i}}}{\let^^ef=\@Umlaut@i@Umlaut@} +\catcode240=13 \def^^f0{$\partial$} % 240, '360, "f0 +\@ifundefined{@eth@}{\def^^f0{$\partial$}}{\let^^f0=\@eth@} +\catcode241=13 \def^^f1{\~n} % 241, '361, "f1 +\@ifundefined{@tileda@n@tilda@}{\def^^f1{\~n}}{\let^^f1\@tileda@n@tilda@} +\catcode242=13 \def^^f2{\`o} % 242, '362, "f2 +\@ifundefined{@grave@o@grave@}{\def^^f2{\`o}}{\let^^f2=\@grave@o@grave@} +\catcode243=13 \def^^f3{\'o} % 243, '363, "f3 +\@ifundefined{@acute@o@acute@}{\def^^f3{\'o}}{\let^^f3\@acute@o@acute@} +\catcode244=13 \def^^f4{\^o} % 244, '364, "f4 +\@ifundefined{@circflx@o@circflx@}{\def^^f4{\^o}}{\let^^f4=\@circflx@o@circflx@} +\catcode245=13 \def^^f5{\~o} % 245, '365, "f5 +\@ifundefined{@tileda@o@tilda@}{\def^^f5{\~o}}{\let^^f5\@tileda@o@tilda@} +\catcode246=13 \def^^f6{\"o} % 246, '366, "f6 +\@ifundefined{@Umlaut@o@Umlaut@}{\def^^f6{\"o}}{\let^^f6=\@Umlaut@o@Umlaut@} +\catcode247=13 \def^^f7{$\div$} % 247, '367, "f7 +\catcode248=13 \def^^f8{\o{}} % 248, '370, "f8 +\@ifundefined{@ooo@}{\def^^f8{\o{}}}{\let^^f8=\@ooo@} +\catcode249=13 \def^^f9{\`u} % 249, '371, "f9 +\@ifundefined{@grave@u@grave@}{\def^^f9{\`u}}{\let^^f9=\@grave@u@grave@} +\catcode250=13 \def^^fa{\'u} % 250, '372, "fa +\@ifundefined{@acute@u@acute@}{\def^^fa{\'u}}{\let^^fa=\@acute@u@acute@} +\catcode251=13 \def^^fb{\^u} % 251, '373, "fb +\@ifundefined{@circflx@u@circflx@}{\def^^fb{\^u}}{\let^^fb=\@circflx@u@circflx@} +\catcode252=13 \def^^fc{\"u} % 252, '374, "fc +\@ifundefined{@Umlaut@u@Umlaut@}{\def^^fc{\"u}}{\let^^fc=\@Umlaut@u@Umlaut@} +\catcode253=13 \def^^fd{\'y} % 253, '375, "fd +\@ifundefined{@acute@y@acute@}{\def^^fd{\'y}}{\let^^fd=\@acute@y@acute@} +\catcode254=13 \def^^fe{\lower 0.8ex\hbox{l}\hskip-1ex\relax b} % 254, '376, "fe +\@ifundefined{@thorn@}{}{\let^^fe=\@thorn@} +\catcode255=13 \def^^ff{\"y} % 255, '377, "ff +\@ifundefined{@Umlaut@y@Umlaut@}{\def^^ff{\"y}}{\let^^ff=\@Umlaut@y@Umlaut@} +\catcode `\@ = \the \atcode +\endinput +% End of iso-latin-1.tex diff --git a/manual/styles/multicols.sty b/manual/styles/multicols.sty new file mode 100644 index 00000000..2d945488 --- /dev/null +++ b/manual/styles/multicols.sty @@ -0,0 +1,176 @@ +% Save file as: MULTICOLS.STY Source: FILESERV@SHSU.BITNET +% multicols.sty version 1.0 +% Allows for multiple column typesetting +% From TUGboat, voulme 10 (1989), No. 3 +% +% Frank Mittelback +% Electronic Data Systems +% (Deutschland) GmbH +% Eisenstrasse 56 +% D-6090 Russelsheim +% Federal Republic of Germany +% Bitnet: pzf5hz@drueds2 +% +% Variables: +% \premulticols - If the space left on the page is less than this, a new +% page is started before the multiple columns. Otherwise, a \vskip +% of \multicolsep is added. +% \postmulticols - analogous to \premulticols +% \columnseprule - the width of the rule separating the columns. +% +% Commands: +% \raggedcolumns - don't align bottom lines of columns +% \flushcolumns - align bottom lines (default) +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\@ifundefined{mult@cols}{}{\endinput} + +\def\multicols#1{\col@number#1\relax + \ifnum\col@number<\@ne + \@warning{Using '\number\col@number' columns doesn't seem a good idea.^^J + I therefore use two columns instead}% + \col@number\tw@ \fi + \@ifnextchar[\mult@cols{\mult@cols[]}} + +\def\mult@cols[#1]{\@ifnextchar[% + {\mult@@cols{#1}}% + {\mult@@cols{#1}[\premulticols]}} + +\def\mult@@cols#1[#2]{% + \enough@room#2% + #1\par\addvspace\multicolsep + \begingroup + \prepare@multicols\ignorespaces} + +\def\enough@room#1{\par \penalty\z@ + \page@free \pagegoal + \advance \page@free -\pagetotal + \ifdim \page@free <#1\newpage \fi} + +\def\prepare@multicols{% + \output{\global\setbox\partial@page + \vbox{\unvbox\@cclv}}\eject + \vbadness9999 \hbadness5000 + \tolerance\multicoltolerance + \doublecol@number\col@number + \multiply\doublecol@number\tw@ + \advance\baselineskip\multicolbaselineskip + \advance\@colroom-\ht\partial@page + \vsize\col@number\@colroom + \advance\vsize\c@collectmore\baselineskip + \hsize\columnwidth \advance\hsize\columnsep + \advance\hsize-\col@number\columnsep + \divide\hsize\col@number + \linewidth\hsize + \output{\multi@columnout}% + \multiply\count\footins\col@number + \multiply\skip \footins\col@number + \reinsert@footnotes} + +\def\endmulticols{\par\penalty\z@ + \output{\balance@columns}\eject + \endgroup \reinsert@footnotes + \global\c@unbalance\z@ + \enough@room\postmulticols + \addvspace\multicolsep} + +\newcount\c@unbalance \c@unbalance = 0 +\newcount\c@collectmore \c@collectmore = 0 +\newcount\col@number +\newcount\doublecol@number +\newcount\multicoltolerance \multicoltolerance = 9999 +\newdimen\page@free +\newdimen\premulticols \premulticols = 50pt +\newdimen\postmulticols \postmulticols = 20pt +\newskip\multicolsep \multicolsep = 12pt plus 4pt minus 3pt +\newskip\multicolbaselineskip \multicolbaselineskip=0pt +\newbox\partial@page + +\def\process@cols#1#2{\count@#1\relax + \loop #2% + \advance\count@\tw@ + \ifnum\count@<\doublecol@number + \repeat} + +\def\page@sofar{\unvbox\partial@page + \process@cols\z@{\wd\count@\hsize}% + \hbox to\textwidth{% + \process@cols\tw@{\box\count@ + \hss\vrule\@width\columnseprule\hss}% + \box\z@}} + +\def\reinsert@footnotes{\ifvoid\footins\else + \insert\footins{\unvbox\footins}\fi} + +\def\multi@columnout{% + \ifnum\outputpenalty <-\@Mi + \speci@ls \else + \splittopskip\topskip + \splitmaxdepth\maxdepth + \dimen@\@colroom + \divide\skip\footins\col@number + \ifvoid\footins \else + \advance\dimen@-\skip\footins + \advance\dimen@-\ht\footins \fi + \process@cols\tw@{\setbox\count@ + \vsplit\@cclv to\dimen@}% + \setbox\z@\vsplit\@cclv to\dimen@ + \ifvoid\@cclv \else + \unvbox\@cclv + \penalty\outputpenalty + \fi + \setbox\@cclv\vbox{\page@sofar}% + \@makecol\@outputpage + \global\@colroom\@colht + \process@deferreds + \global\vsize\col@number\@colroom + \global\advance\vsize + \c@collectmore\baselineskip + \multiply\skip\footins\col@number\fi} + +\def\speci@ls{% + \typeout{floats and marginpars not allowed inside `multicols' environment}% + \unvbox\@cclv\reinsert@footnotes + \gdef\@currlist{}} + +\def\process@deferreds{% + \@floatplacement + \begingroup + \let\@tempb\@deferlist + \gdef\@deferlist{}% + \let\@elt\@scolelt + \@tempb \endgroup} + +\newif\ifshr@nking + +\def\raggedcolumns{% + \@bsphack\shr@nkingtrue\@esphack} +\def\flushcolumns{% + \@bsphack\shr@nkingfale\@esphack} + +\def\balance@columns{% + \splittopskip\topskip + \splitmaxdepth\maxdepth + \setbox\z@\vbox{\unvbox\@cclv}\dimen@\ht\z@ + \advance\dimen@\col@number\topskip + \advance\dimen@-\col@number\baselineskip + \divide\dimen@\col@number + \advance\dimen@\c@unbalance\baselineskip + {\vbadness\@M \loop + {\process@cols\@ne{\global\setbox\count@\box\voidb@x}}% + \global\setbox\@ne\copy\z@ + {\process@cols\thr@@{\global\setbox\count@\vsplit\@ne to\dimen@}}% + \ifshr@nking \global\setbox\thr@@\vbox{\unvbox\thr@@}% + \fi + \ifdim\ht\@ne >\ht\thr@@ + \global\advance\dimen@\p@ + \repeat}% + \dimen@\ht\thr@@ + \process@cols\z@{\@tempcnta\count@ + \advance\@tempcnta\@ne + \setbox\count@\vtop to\dimen@ + {\unvbox\@tempcnta + \ifshr@nking\vfill\fi}}% + \global\vsize\@colroom + \global\advance\vsize\ht\partial@page + \page@sofar} diff --git a/manual/styles/multind.sty b/manual/styles/multind.sty new file mode 100644 index 00000000..ef91c28d --- /dev/null +++ b/manual/styles/multind.sty @@ -0,0 +1,65 @@ +% indexes document style option for producing multiple indexes +% for use with the modified bbok style, CHbook.sty +% Written by F.W. Long, Version 1.1, 12 August 1991. + +% Modified by F.W. Long, Version 1.1a, 29 August 1991 +% to get the index heading correctly spaced. + +% Modified by F.W. Long, Version 1.1b, 31 August 1991 +% to remove the abbreviation \ix (which should be in the document, not here). + +% Modified \makeindex and \index commands to allow multiple indexes +% in both cases the first parameter is the index name. +% They now work more like \@starttoc and \addcontentsline. +% \index is no longer defined inside \makeindex but determines +% whether the appropriate file is defined before writing to it. + +\def\makeindex#1{\begingroup + \makeatletter + \if@filesw \expandafter\newwrite\csname #1@idxfile\endcsname + \expandafter\immediate\openout \csname #1@idxfile\endcsname #1.idx\relax + \typeout{Writing index file #1.idx }\fi \endgroup} + +\def\index#1{\@bsphack\begingroup + \def\protect##1{\string##1\space}\@sanitize + \@wrindex{#1}} + +% \@wrindex now checks that the appropriate file is defined. + +\def\@wrindex#1#2{\let\thepage\relax + \xdef\@gtempa{\@ifundefined{#1@idxfile}{}{\expandafter + \write\csname #1@idxfile\endcsname{\string + \indexentry{#2}{\thepage}}}}\endgroup\@gtempa + \if@nobreak \ifvmode\nobreak\fi\fi\@esphack} + +% Modified \printindex command to allow multiple indexes. +% This now takes over much of the work of \theindex. +% Again, the first parameter is the index name. +% The second parameter is the index title (as printed). + +\newif\if@restonecol +\def\printindex#1#2{\@restonecoltrue\if@twocolumn\@restonecolfalse\fi + \columnseprule \z@ \columnsep 35pt + \newpage \twocolumn[{\Large\bf #2 \vskip4ex}] + \markright{\uppercase{#2}} + \addcontentsline{toc}{section}{#2} + \@input{#1.ind}} + +% The following index commands are taken from book.sty. +% \theindex is modified to not start a chapter. + +\def\theindex{\parindent\z@ + \parskip\z@ plus .3pt\relax\let\item\@idxitem} +\def\@idxitem{\par\hangindent 40pt} +\def\subitem{\par\hangindent 40pt \hspace*{20pt}} +\def\subsubitem{\par\hangindent 40pt \hspace*{30pt}} +\def\endtheindex{\if@restonecol\onecolumn\else\clearpage\fi} +\def\indexspace{\par \vskip 10pt plus 5pt minus 3pt\relax} + +% the command \ix allows an abbreviation for the general index + +%\def\ix#1{#1\index{general}{#1}} + +% define the \see command from makeidx.sty + +\def\see#1#2{{\em see\/} #1} diff --git a/manual/styles/ocamldoc.hva b/manual/styles/ocamldoc.hva new file mode 100644 index 00000000..58b7bb12 --- /dev/null +++ b/manual/styles/ocamldoc.hva @@ -0,0 +1,20 @@ +\usepackage{alltt} +\newenvironment{ocamldoccode}{\begin{alltt}}{\end{alltt}} +\newenvironment{ocamldocdescription}{\begin{quote}}{\end{quote}} +\newenvironment{ocamldoccomment}{\begin{quote}}{\end{quote}} + + +\newenvironment{ocamldocindent}{\list{}{}\item\relax}{\endlist} +\newenvironment{ocamldocsigend} + {\noindent\quad\texttt{sig}\ocamldocindent} + {\endocamldocindent + \noindent\quad\texttt{end}\medskip} +\newenvironment{ocamldocobjectend} + {\noindent\quad\texttt{object}\ocamldocindent} + {\endocamldocindent + \noindent\quad\texttt{end}\medskip} + +\newcommand{\moduleref}[1]{\ifhtml\ahref{libref/#1.html}{\texttt{#1}}\fi} + +# For processing .tex generated by ocamldoc (for text manual) +\newcommand{\ocamldocvspace}[1]{\vspace{#1}} \ No newline at end of file diff --git a/manual/styles/ocamldoc.sty b/manual/styles/ocamldoc.sty new file mode 100644 index 00000000..b176c9b1 --- /dev/null +++ b/manual/styles/ocamldoc.sty @@ -0,0 +1,75 @@ + +%% Support macros for LaTeX documentation generated by ocamldoc. +%% This file is in the public domain; do what you want with it. + +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{ocamldoc} + [2001/12/04 v1.0 ocamldoc support] + +\newenvironment{ocamldoccode}{% + \bgroup + \leftskip\@totalleftmargin + \rightskip\z@skip + \parindent\z@ + \parfillskip\@flushglue + \parskip\z@skip + %\noindent + \@@par\smallskip + \@tempswafalse + \def\par{% + \if@tempswa + \leavevmode\null\@@par\penalty\interlinepenalty + \else + \@tempswatrue + \ifhmode\@@par\penalty\interlinepenalty\fi + \fi} + \obeylines + \verbatim@font + \let\org@prime~% + \@noligs + \let\org@dospecials\dospecials + \g@remfrom@specials{\\} + \g@remfrom@specials{\{} + \g@remfrom@specials{\}} + \let\do\@makeother + \dospecials + \let\dospecials\org@dospecials + \frenchspacing\@vobeyspaces + \everypar \expandafter{\the\everypar \unpenalty}} +{\egroup\par} + +\def\g@remfrom@specials#1{% + \def\@new@specials{} + \def\@remove##1{% + \ifx##1#1\else + \g@addto@macro\@new@specials{\do ##1}\fi} + \let\do\@remove\dospecials + \let\dospecials\@new@specials + } + +\newenvironment{ocamldocdescription} +{\list{}{\rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax\ignorespaces} +{\endlist\medskip} + +\newenvironment{ocamldoccomment} +{\list{}{\leftmargin 2\leftmargini \rightmargin0pt \topsep0pt}\raggedright\item\noindent\relax} +{\endlist} + +\let \ocamldocparagraph \paragraph +\def \paragraph #1{\ocamldocparagraph {#1}\noindent} +\let \ocamldocsubparagraph \subparagraph +\def \subparagraph #1{\ocamldocsubparagraph {#1}\noindent} + +\let\ocamldocvspace\vspace + +\newenvironment{ocamldocindent}{\list{}{}\item\relax}{\endlist} +\newenvironment{ocamldocsigend} + {\noindent\quad\texttt{sig}\ocamldocindent} + {\endocamldocindent\vskip -\lastskip + \noindent\quad\texttt{end}\medskip} +\newenvironment{ocamldocobjectend} + {\noindent\quad\texttt{object}\ocamldocindent} + {\endocamldocindent\vskip -\lastskip + \noindent\quad\texttt{end}\medskip} + +\endinput diff --git a/manual/styles/plaintext.sty b/manual/styles/plaintext.sty new file mode 100644 index 00000000..2d1b366c --- /dev/null +++ b/manual/styles/plaintext.sty @@ -0,0 +1,268 @@ +% Plain text style file. + +\typeout{Style option Plaintext} + +% Version from John Pavel's dvidoc.sty, March 1987 +% Heavily hacked by Xavier Leroy, June 1993. + +% Redefine all fonts to be the "doc" pseudo-font, with fixed spacing. +% Since rm, tt and mi have different character encodings, we keep +% several copies of the doc font, with different names, so that dvi2txt +% can select the right encoding according to the name. Also, we use +% different names for boldface and italics, so that dvi2txt can select +% the right style whenever possible. + +\def\sl{\rm} +\def\sc{\rm} + +\def\vpt{} +\def\vipt{} +\def\viipt{} +\def\viiipt{} +\def\ixpt{} +\def\xipt{} +\def\xiipt{} +\def\xivpt{} +\def\xviipt{} +\def\xxpt{} +\def\xxvpt{} + +%%% for i in fiv six sev egt nin ten elv twl frtn svnt twty twfv; do +%%% echo "\\font\\${i}rm = docrm" +%%% echo "\\font\\${i}mi = docmi" +%%% echo "\\font\\${i}sy = cmsy10" +%%% echo "\\font\\${i}it = docit" +%%% echo "\\font\\${i}bf = docbf" +%%% echo "\\font\\${i}tt = doctt" +%%% echo "\\font\\${i}sf = docrm" +%%% done + +\font\fivrm = docrm +\font\fivmi = docmi +\font\fivsy = cmsy10 +\font\fivit = docit +\font\fivbf = docbf +\font\fivtt = doctt +\font\fivsf = docrm +\font\sixrm = docrm +\font\sixmi = docmi +\font\sixsy = cmsy10 +\font\sixit = docit +\font\sixbf = docbf +\font\sixtt = doctt +\font\sixsf = docrm +\font\sevrm = docrm +\font\sevmi = docmi +\font\sevsy = cmsy10 +\font\sevit = docit +\font\sevbf = docbf +\font\sevtt = doctt +\font\sevsf = docrm +\font\egtrm = docrm +\font\egtmi = docmi +\font\egtsy = cmsy10 +\font\egtit = docit +\font\egtbf = docbf +\font\egttt = doctt +\font\egtsf = docrm +\font\ninrm = docrm +\font\ninmi = docmi +\font\ninsy = cmsy10 +\font\ninit = docit +\font\ninbf = docbf +\font\nintt = doctt +\font\ninsf = docrm +\font\tenrm = docrm +\font\tenmi = docmi +\font\tensy = cmsy10 +\font\tenit = docit +\font\tenbf = docbf +\font\tentt = doctt +\font\tensf = docrm +\font\elvrm = docrm +\font\elvmi = docmi +\font\elvsy = cmsy10 +\font\elvit = docit +\font\elvbf = docbf +\font\elvtt = doctt +\font\elvsf = docrm +\font\twlrm = docrm +\font\twlmi = docmi +\font\twlsy = cmsy10 +\font\twlit = docit +\font\twlbf = docbf +\font\twltt = doctt +\font\twlsf = docrm +\font\frtnrm = docrm +\font\frtnmi = docmi +\font\frtnsy = cmsy10 +\font\frtnit = docit +\font\frtnbf = docbf +\font\frtntt = doctt +\font\frtnsf = docrm +\font\svtnrm = docrm +\font\svtnmi = docmi +\font\svtnsy = cmsy10 +\font\svtnit = docit +\font\svtnbf = docbf +\font\svtntt = doctt +\font\svtnsf = docrm +\font\twtyrm = docrm +\font\twtymi = docmi +\font\twtysy = cmsy10 +\font\twtyit = docit +\font\twtybf = docbf +\font\twtytt = doctt +\font\twtysf = docrm +\font\twfvrm = docrm +\font\twfvmi = docmi +\font\twfvsy = cmsy10 +\font\twfvit = docit +\font\twfvbf = docbf +\font\twfvtt = doctt +\font\twfvsf = docrm + +\rm + +% Dimensions + +\hsize 78 em % 78 characters per line so fit any screen +\textwidth 78 em +\raggedright % Do not try to align on the right +\parindent=2em % Two blanks for paragraph indentation +\def\enspace{\kern 1em} \def\enskip{\hskip 1em\relax} + +% Vertical skips may best be multiples of \baselineskip +\baselineskip=12pt % 6 lines per inch +\normalbaselineskip=\baselineskip +\vsize 58\baselineskip % 58 lines per page +\textheight 58\baselineskip +\voffset=0pt +\parskip=0pt +\smallskipamount=0pt +\medskipamount= \baselineskip +\bigskipamount=2\baselineskip +\raggedbottom % do not try to align the page bottom + +% By default itemize is done with bullets, which don't look good. + +\def\labelitemi{-} +\def\labelitemii{-} +\def\labelitemiii{-} +\def\labelitemiv{-} + +% Fix up table of contents. Default latex uses fractional spacing between +% the section number and title. This comes out as no space in the doc file +% so we add a space to numberline, and expand tempdima by one em to allow +% for it. Also, go out of math mode for the dot in the leader. Dots in +% math mode turn out to be colons! +% +\def\@dottedtocline#1#2#3#4#5{\ifnum #1>\c@tocdepth \else + \vskip \z@ plus .2pt + {\hangindent #2\relax \rightskip \@tocrmarg \parfillskip -\rightskip + \parindent #2\relax\@afterindenttrue + \interlinepenalty\@M + \leavevmode + \@tempdima #3\relax + \addtolength\@tempdima{1em} + #4\nobreak\leaders\hbox to 2em{\hss.\hss}\hfill \nobreak \hbox to\@pnumwidth{\hfil\rm #5}\par}\fi} +\def\numberline#1{\advance\hangindent\@tempdima \hbox to\@tempdima{#1\hfil}\ } +% +% Can't really do superscripts, so do footnotes with [] +% +\def\@makefnmark{\hbox{(\@thefnmark)}} +\long\def\@makefntext#1{\parindent 1em\noindent + \hbox to 3em{\hss\@thefnmark.}\ #1} +\skip\footins 24pt plus 4pt minus 2pt +\def\footnoterule{\kern-12\p@ +\hbox to .4\columnwidth{\leaders\hbox{-}\hfill}} +% +% \arrayrulewidth 1em \doublerulesep 1em +% +% Some fairly obvious hacks. No odd/even pages in doc files. Can't do the +% fancy TeX symbols. +% +\oddsidemargin 0pt \evensidemargin 0pt +\def\TeX{TeX} +\def\LaTeX{LaTeX} +\def\SliTeX{SliTeX} +\def\BibTeX{BibTeX} +% +% special versions of stuff from xxx10.sty, since only one font size +% +\def\@normalsize{\@setsize\normalsize{12pt}\xpt\@xpt +\abovedisplayskip 12pt +\belowdisplayskip 12pt +\abovedisplayshortskip 12pt +\belowdisplayshortskip 12pt +\let\@listi\@listI} % Setting of \@listi added 9 Jun 87 +\let\small\@normalsize +\let\footnotesize\@normalsize +\normalsize +\footnotesep 12pt +\labelsep 10pt +\def\@listI{\leftmargin\leftmargini \parsep 12pt% +\topsep 12pt% +\partopsep 0pt% +\itemsep 0pt} +\let\@listi\@listI +\let\@listii\@listI +\let\@listiii\@listI +\let\@listiv\@listI +\let\@listv\@listI +\let\@listvi\@listI +\@listI + +% We had sort of random numbers of blank lines around section numbers. +% Turns out they used various fractional spacing. Rather than depend +% upon the definition of startsection, just wrap something around it +% that normalizes the arguments to 12pt. Negative args have special +% meanings. +\let\@oldstartsec\@startsection +\def\@startsection#1#2#3#4#5#6{ + \@tempskipa #4\relax + \@tempskipb #5\relax + \ifdim \@tempskipa <\z@ \@tempskipa -12pt \else \@tempskipa 12pt \fi + \ifdim \@tempskipb >\z@ \@tempskipb 12pt \fi +\@oldstartsec{#1}{#2}{#3}{\@tempskipa}{\@tempskipb}{#6} +} + +% To get even spacing in the table of contents + +\def\@pnumwidth{3em} + +\def\l@part#1#2{\addpenalty{-\@highpenalty}% + \addvspace{12pt}% space above part line + \begingroup + \@tempdima 3em + \parindent \z@ \rightskip \@pnumwidth + \parfillskip -\@pnumwidth + {\large \bf + \leavevmode + #1\hfil \hbox to\@pnumwidth{\hss #2}}\par + \nobreak + \global\@nobreaktrue + \everypar{\global\@nobreakfalse\everypar{}}%% suggested by + + \endgroup} + +\def\l@chapter#1#2{\addpenalty{-\@highpenalty}% + \vskip 12pt + \@tempdima 2em + \begingroup + \parindent \z@ \rightskip \@pnumwidth + \parfillskip -\@pnumwidth + \bf + \leavevmode + \advance\leftskip\@tempdima + \hskip -\leftskip + #1\nobreak\hfil \nobreak\hbox to\@pnumwidth{\hss #2}\par + \penalty\@highpenalty + \endgroup} + +\def\l@section{\@dottedtocline{1}{2em}{3em}} +\def\l@subsection{\@dottedtocline{2}{4em}{3em}} +\def\l@subsubsection{\@dottedtocline{3}{7em}{4em}} +\def\l@paragraph{\@dottedtocline{4}{10em}{5em}} +\def\l@subparagraph{\@dottedtocline{5}{12em}{6em}} + diff --git a/manual/styles/scroll.sty b/manual/styles/scroll.sty new file mode 100644 index 00000000..a344b03d --- /dev/null +++ b/manual/styles/scroll.sty @@ -0,0 +1,5 @@ +% Modification to plaintext.sty to suppress page headings +% and make pages contiguous when processed with dvi2txt + +\pagestyle{empty} +\advance\voffset by -2\baselineskip diff --git a/manual/styles/syntaxdef.hva b/manual/styles/syntaxdef.hva new file mode 100644 index 00000000..7266d7ef --- /dev/null +++ b/manual/styles/syntaxdef.hva @@ -0,0 +1,157 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Hevea code for syntax definitions of the ocaml manual % +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Important commands +% \token, for typesetting grammar terminals +% \nonterm, for typesetting grammar non-terminals +% +% Beware: \nonterm introduces either a local anchor or a local reference +% -Anchors are introduced when \nonterm occurs in the first column of +% syntax definitions (environment 'syntax') +% - References are introduced everywhere else +% +% For pure typesetting effect without links (eg. to typeset 'e' as 'expr') +% use the \nt command (eg. \nt{e}). +% In syntax definitions, the tool 'transf' translates @word@ into \nt{word}. +% +% Warnings are produced +% - For references to non-defined non terminals +% - For multiple definitions of the same non-terminal +% Warnings can be avoided for a given non-terminal 'expr' by issuing +% the command \stx@silent{'expr'} +% +%It is also possible to alias a nonterminal: +%\stx@alias{name}{othername} +%will make reference to 'name' point to the definition of non-terminal +%'othername' +\newif\ifspace +\def\addspace{\ifspace\;\spacefalse\fi} +\ifhtml +\newcommand{\token}[1]{\texttt{\blue#1}} +\else +\newcommand{\token}[1]{\texttt{#1}} +\fi +%%% warnings +\def\stx@warning#1#2{\@ifundefined{stx@#1@silent}{\hva@warn{#2}}{}} +\def\stx@silent#1{\def\csname stx@#1@silent\endcsname{}} +%%% Do not warn about those +%initial example +\stx@silent{like}\stx@silent{that}% +%Not defined +\stx@silent{regular-char}% +\stx@silent{regular-string-char}% +%\stx@silent{regular-char-str}% +\stx@silent{lowercase-ident}% +\stx@silent{capitalized-ident}% +\stx@silent{space}% +\stx@silent{tab}% +\stx@silent{newline}% +%Used in many places +\stx@silent{prefix}% +\stx@silent{name}% +\stx@silent{xname}% +%Not defined +\stx@silent{external-declaration}% +\stx@silent{unit-name}% +%%Redefined in exten.etex +\stx@silent{parameter}% +\stx@silent{pattern}% +\stx@silent{constr-decl}% +\stx@silent{type-param}% +\stx@silent{let-binding}% +\stx@silent{expr}% +\stx@silent{typexpr}% +\stx@silent{module-expr}% +\stx@silent{type-representation}% +\stx@silent{definition}% +\stx@silent{specification}% +\stx@silent{type-equation}% +\stx@silent{class-field}% +\stx@silent{mod-constraint}% +\stx@silent{module-type}% +\stx@silent{constant}% +%%Redefined in names.etex +\stx@silent{label-name}% +%%Not really defined in lexyacc.etex +\stx@silent{character-set}% +\stx@silent{symbol}% +%%Not defined in debugger.etex +\stx@silent{integer} +%%Not defined in ocamldoc.etex +\stx@silent{string} +\stx@silent{id} +\stx@silent{Exc} +\stx@silent{URL} +%%%%%%%%%%%%% +%% Aliases %% +%%%%%%%%%%%%% +\newcommand{\stx@alias}[2]{\def\csname stx@#1@alias\endcsname{#2}} +\stx@alias{typ}{typexpr}% +\stx@alias{met}{method-name}% +\stx@alias{tag}{tag-name}% +\stx@alias{lab}{label-name}% +\stx@alias{C}{constr-name} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%special anchor +\newstyle{a.syntax:link}{color:maroon;text-decoration:underline} +\newstyle{a.syntax:visited}{color:maroon;text-decoration:underline} +\newstyle{a.syntax:hover}{color:black;text-decoration:none;background-color:\#FF6060} +%compatibility for hevea-1.1?/heeva-2.?? +\ifu\@tr@url +\providecommand{\@tr@url}[1]{#1}\def\stx@id{NAME}\else +\def\stx@id{id}\fi +\newcommand{\@syntaxlocref}[2] +{\@aelement{href="\@print{#}\@tr@url{#1}" class="syntax"}{#2}} +\newcommand{\@syntaxaname}[2] +{\@aelement{\stx@id="#1" class="syntax"}{#2}} +%%Refer to anchor, internal : +%#1 -> anchor #2 -> visible tag +\def\@ref@anchor#1#2{% +\@ifundefined{stx@#1@exists} +{\stx@warning{#1}{Undefined non-terminal: '#1'}#2} +{\@syntaxlocref{#1}{#2}}} +%%Refer to anchor +\def\ref@anchor#1{% +\ifu\csname stx@#1@alias\endcsname +\@ref@anchor{#1}{#1}\else +\@ref@anchor{\csname stx@#1@alias\endcsname}{#1}\fi} +\def\stx@exists#1{\def\csname stx@#1@exists\endcsname{}} +%%Define anachor +\def\def@anchor#1{% +\@ifundefined{stx@#1} +{{\@nostyle\@auxdowrite{\string\stx@exists\{#1\}}}% +\gdef\csname stx@#1\endcsname{}\@syntaxaname{#1}{#1}} +{\@ifundefined{stx@#1@silent} +{\hva@warn{Redefinition of non-terminal '#1'}#1} +{\ref@anchor{#1}}}} +%%%Change \@anchor and initial definition, for html only, of course! +\ifhtml +\def\set@name{\let\@anchor\def@anchor} +\let\@anchor\ref@anchor +\else +\def\set@name{} +\def\@anchor{} +\fi +%%%Format non-terminal +\def\nt#1{\textit{\maroon#1}} +%%%Link for non-terminal and format +\def\nonterm#1{\addspace\nt{\@anchor{#1}}\spacetrue} +\def\brepet{\addspace\{} +\def\erepet{\}} +\def\boption{\addspace[} +\def\eoption{]} +\def\brepets{\addspace\{} +\def\erepets{\}^+} +\def\bparen{\addspace(} +\def\eparen{)} +\def\orelse{\mid \spacefalse} +\def\is{ & ::= & \spacefalse } +\def\alt{ \\ & \mid & \spacefalse } +\def\sep{ \\ \\ \spacefalse } +\def\cutline{} +\def\emptystring{\epsilon} +\def\syntax{\@open{div}{class="syntax"}$$\begin{array}{>{\set@name}rcl}\spacefalse} +\def\endsyntax{\end{array}$$\@close{div}} +\def\syntaxleft{\@open{div}{class="syntaxleft"}$\begin{array}{>{\set@name}rcl}\spacefalse} +\def\endsyntaxleft{\end{array}$\@close{div}} +\def\synt#1{$\spacefalse#1$} diff --git a/manual/styles/syntaxdef.sty b/manual/styles/syntaxdef.sty new file mode 100644 index 00000000..1db6f5bf --- /dev/null +++ b/manual/styles/syntaxdef.sty @@ -0,0 +1,26 @@ +\newif\ifspace +\def\addspace{\ifspace \; \spacefalse \fi} +\def\token#1{\addspace\hbox{\tt #1} \spacetrue} +\def\nonterm#1{\addspace\nt{#1} \spacetrue} +\def\nt#1{\hbox{\sl #1\/}} +\def\brepet{\addspace\{} +\def\erepet{\}} +\def\boption{\addspace[} +\def\eoption{]} +\def\brepets{\addspace\{} +\def\erepets{\}^+} +\def\bparen{\addspace(} +\def\eparen{)} +\def\orelse{\mid \spacefalse} +\def\is{ & ::= & \spacefalse } +\def\alt{ \\ & \mid & \spacefalse } +\def\cutline{ \\ & & \spacefalse } +\def\sep{ \\[2mm] \spacefalse } +\def\emptystring{\epsilon} +\def\syntax{$$\begin{array}{rrl}\spacefalse} +\def\endsyntax{\end{array}$$} +\def\syntaxleft{$\begin{array}{rrl}\spacefalse} +\def\endsyntaxleft{\end{array}$} +\let\oldldots=\ldots +\def\ldots{\spacefalse\oldldots} +\def\synt#1{$\spacefalse#1$} diff --git a/manual/styles/syntaxdeftxt.sty b/manual/styles/syntaxdeftxt.sty new file mode 100644 index 00000000..370b6580 --- /dev/null +++ b/manual/styles/syntaxdeftxt.sty @@ -0,0 +1,22 @@ +\newif\ifspace +\def\addspace{\ifspace\ \spacefalse\fi} +\def\token#1{\addspace\hbox{\tt #1}\spacetrue\ignorespaces} +%%% \def\nonterm#1{\addspace\hbox{\tt <#1>}\spacetrue\ignorespaces} +\def\nonterm#1{\addspace\hbox{\it #1}\spacetrue\ignorespaces} +\def\brepet{\addspace\hbox to1em{$\{$\hfil}\ignorespaces} +\def\erepet{\hbox to1em{$\}$\hfil}\ignorespaces} +\def\boption{\addspace[\ignorespaces} +\def\eoption{]\ignorespaces} +\def\brepets{\brepet\ignorespaces} +\def\erepets{\erepet+\ignorespaces} +\def\bparen{\addspace(\ignorespaces} +\def\eparen{)\ignorespaces} +\def\orelse{~\hbox to1em{$|$\hfil}~\spacefalse\ignorespaces} +\def\is{& ::= & \spacefalse\ignorespaces} +\def\alt{\\ & \hbox to1em{$|$\hfil} & \spacefalse } +\def\sep{\\[\baselineskip] \spacefalse} +\def\emptystring{nothing} +\def\syntax{\begin{center}\begin{tabular}{rrl}\spacefalse\ignorespaces} +\def\endsyntax{\end{tabular}\end{center}} +\def\ldots{\spacefalse...\ignorespaces} +\def\synt#1{$\spacefalse#1$} diff --git a/manual/tests/Makefile b/manual/tests/Makefile new file mode 100644 index 00000000..3947b175 --- /dev/null +++ b/manual/tests/Makefile @@ -0,0 +1,20 @@ +TOPDIR=$(abspath ../..) +include $(TOPDIR)/Makefile.tools +MANUAL=$(TOPDIR)/manual/manual + +.PHONY: all +all: check-cross-references check-stdlib + +cross-reference-checker: cross_reference_checker.ml + $(OCAMLC) $(TOPDIR)/compilerlibs/ocamlcommon.cma -I $(TOPDIR)/parsing \ + -I $(TOPDIR)/driver \ + cross_reference_checker.ml -o cross-reference-checker + +check-cross-references: cross-reference-checker + $(OCAMLRUN) ./cross-reference-checker \ + -auxfile $(MANUAL)/texstuff/manual.aux \ + $(TOPDIR)/utils/warnings.ml \ + $(TOPDIR)/bytecomp/translmod.ml + +check-stdlib: + ./check-stdlib-modules $(TOPDIR) diff --git a/manual/tests/README.md b/manual/tests/README.md new file mode 100644 index 00000000..31d2ab40 --- /dev/null +++ b/manual/tests/README.md @@ -0,0 +1,8 @@ +These tests have for objective to test the consistency between the manual and +the rest of the compiler sources: + +- `cross_reference_checker.ml` checks that reference to the manual from the + compiler sources are still accurate. + +- `check-stdlib-modules` checks that all stdlib modules are linked from the + main entry of the stdlib in the manual: `manual/manual/library/stdlib.etex` diff --git a/manual/tests/check-stdlib-modules b/manual/tests/check-stdlib-modules new file mode 100755 index 00000000..6e983523 --- /dev/null +++ b/manual/tests/check-stdlib-modules @@ -0,0 +1,23 @@ +#!/bin/sh + +TMPDIR="${TMPDIR:-/tmp}" + +(cd $1/stdlib; ls -1 *.mli) | sed -e 's/\.mli//' >$TMPDIR/stdlib-$$-files +cut -c 1 $TMPDIR/stdlib-$$-files | tr a-z A-Z >$TMPDIR/stdlib-$$-initials +cut -c 2- $TMPDIR/stdlib-$$-files \ +| paste -d '\0' $TMPDIR/stdlib-$$-initials - >$TMPDIR/stdlib-$$-modules + +exitcode=0 +for i in `cat $TMPDIR/stdlib-$$-modules`; do + case $i in + Stdlib | Camlinternal* | *Labels | Obj | Pervasives) continue;; + esac + grep -q -e '"'$i'" & p\.~\\pageref{'$i'} &' $1/manual/manual/library/stdlib.etex || { + echo "Module $i is missing from stdlib.etex." >&2 + exitcode=2 + } +done + +rm -f $TMPDIR/stdlib-$$-* + +exit $exitcode diff --git a/manual/tests/cross_reference_checker.ml b/manual/tests/cross_reference_checker.ml new file mode 100644 index 00000000..98eb598d --- /dev/null +++ b/manual/tests/cross_reference_checker.ml @@ -0,0 +1,243 @@ +(** Check reference to manual section in ml files + + [cross-reference-cheker -auxfile tex.aux src.ml ] + checks that all expression and let bindings in [src.ml] annotated + with [[@manual.ref "tex_label"]] are integer tuple literals, e.g + {[ + let[@manual.ref "sec:major"] ref = 1, 1 + (* or *) + let ref = (3 [@manual.ref "ch:pentatonic"]) + ]} + and that their values are consistent with the computed references for the + payload labels (e.g "sec:major", "ch:pentatonic") present in the TeX + auxiliary file [tex.aux] + +*) + + +(** {1 Error printing } *) +type error = + | Reference_mismatch of + {loc:Location.t; label:string; ocaml:int list; tex:int list} + | Unknown_label of Location.t * string + | Tuple_expected of Location.t + | No_aux_file + | Wrong_attribute_payload of Location.t + +let pp_ref ppf = Format.pp_print_list ~pp_sep:( fun ppf () -> + Format.pp_print_string ppf ".") Format.pp_print_int ppf + +let print_error error = + Location.report_error Format.std_formatter @@ match error with + | Tuple_expected loc -> + Location.errorf ~loc + "Integer tuple expected after manual reference annotation@." + | Unknown_label (loc,label) -> + Location.errorf ~loc + "@[Unknown manual label:@ %s@]@." label + | Reference_mismatch r -> + Location.errorf ~loc:r.loc + "@[References for label %S do not match:@,\ + OCaml side %a,@,\ + manual %a@]@." + r.label + pp_ref r.ocaml + pp_ref r.tex + | No_aux_file -> + Location.errorf "No aux file provided@." + | Wrong_attribute_payload loc -> + Location.errorf ~loc "Wrong payload for \"@manual.ref\"@." + + +(** {1 Main types} *) + +(** Maps of ocaml reference to manual labels *) +module Refs = Map.Make(String) + +(** Reference extracted from TeX aux files *) +type tex_reference = + { label: string; + pos: int list; + level: string + } + +type status = Ok | Bad | Unknown + +(** Reference extracted from OCaml source files *) +type ml_reference = { loc: Location.t; pos: int list; status:status } + +(** {1 Consistency check } *) + +let check_consistency (ref:tex_reference) {loc; pos; _ } = + if ref.pos = pos then + { loc; pos; status = Ok } + else begin + print_error @@ Reference_mismatch {loc;label=ref.label;tex=ref.pos;ocaml=pos}; + {loc; pos; status = Bad } + end + +let rec check_final_status label error = function + | { status = Ok; _ } -> error + | { status = Bad; _ } -> true + | { status = Unknown; loc; _} -> + print_error (Unknown_label (loc,label)); + true + +(** {1 Data extraction from TeX side} *) + +module TeX = struct + + (** Read reference information from a line of the aux file *) + let scan s = + try + Scanf.sscanf s + "\\newlabel{%s@}{{%s@}{%_d}{%_s@}{%s@.%_s@}{%_s@}}" + (fun label position_string level -> + let pos = + List.map int_of_string (String.split_on_char '.' position_string) in + Some {label;level;pos} ) + with + | Scanf.Scan_failure _ -> None + | Failure _ -> None + + let check_line refs line = + match scan line with + | None -> refs + | Some ref -> + match Refs.find_opt ref.label refs with + | None -> refs + | Some l -> + Refs.add ref.label + (List.map (check_consistency ref) l) + refs + + let check_all aux refs = + let chan = open_in aux in + let rec lines refs = + let s = try Some (input_line chan) with End_of_file -> None in + match s with + | None -> refs + | Some line -> + lines @@ check_line refs line in + let refs = lines refs in + close_in chan; + let error = Refs.fold (fun label ocaml_refs error -> + List.fold_left (check_final_status label) error ocaml_refs) + refs false in + if error then exit 2 else exit 0 +end + +(** {1 Extract references from Ocaml source files} *) +module OCaml_refs = struct + + let parse ppf sourcefile = + Pparse.parse_implementation ppf ~tool_name:"manual_cross_reference_check" + sourcefile + + (** search for an attribute [[@manual.ref "tex_label_name"]] *) + let manual_reference_attribute (s, payload) = + if s.Location.txt = "manual.ref" then + match payload with + | Parsetree.( + PStr [{pstr_desc= Pstr_eval + ({ pexp_desc = Pexp_constant Pconst_string (s,_) },_) } ] ) -> + Some s + | _ -> print_error (Wrong_attribute_payload s.Location.loc); + Some "" (* triggers an error *) + else + None + + let rec label_from_attributes = function + | [] -> None + | a :: q -> match manual_reference_attribute a with + | Some _ as x -> x + | None -> label_from_attributes q + + let int e = + let open Parsetree in + match e.pexp_desc with + | Pexp_constant Pconst_integer (s, _ ) -> int_of_string s + | _ -> raise Exit + + let int_list l = + try Some (List.map int l) with + | Exit -> None + + (** We keep a list of OCaml-side references to the same label *) + let add_ref label ref refs = + let l = match Refs.find_opt label refs with + | None -> [ref] + | Some l -> ref :: l in + Refs.add label l refs + + let inner_expr loc e = + let tuple_expected () = print_error (Tuple_expected loc) in + match e.Parsetree.pexp_desc with + | Parsetree.Pexp_tuple l -> + begin match int_list l with + | None -> tuple_expected (); [] + | Some pos -> pos + end + | Parsetree.Pexp_constant Pconst_integer (n,_) -> + [int_of_string n] + | _ -> tuple_expected (); [] + + (** extract from [let[@manual.ref "label"] x= 1, 2] *) + let value_binding m iterator vb = + let open Parsetree in + begin match label_from_attributes vb.pvb_attributes with + | None -> () + | Some label -> + let pos = inner_expr vb.pvb_loc vb.pvb_expr in + m := add_ref label {loc = vb.pvb_loc; pos; status = Unknown } !m + end; + iterator.Ast_iterator.expr iterator vb.pvb_expr + + + (** extract from [ (1,2)[@manual.ref "label"]] *) + let expr m iterator e = + let open Parsetree in + begin match label_from_attributes e.pexp_attributes with + | None -> () + | Some label -> + let pos = inner_expr e.pexp_loc e in + m := add_ref label {loc = e.pexp_loc; pos; status = Unknown } !m + end; + Ast_iterator.default_iterator.expr iterator e + + let from_ast m ast = + let iterator = + let value_binding = value_binding m in + let expr = expr m in + Ast_iterator.{ default_iterator with value_binding; expr } in + iterator.structure iterator ast + + let from_file m f = + from_ast m @@ parse Format.std_formatter f +end + + +(** {1 Argument handling and main function } *) + +let usage = + "cross-reference-check -auxfile [file.aux] file_1 ... file_n checks that \ + the cross reference annotated with [@manual_cross_reference] are consistent \ + with the provided auxiliary TeX file" + +(** the auxiliary file containing reference to be checked *) +let aux_file = ref None + +let args = + [ + "-auxfile",Arg.String (fun s -> aux_file := Some s), + "set the reference file" + ] + +let () = + let m = ref Refs.empty in + Arg.parse args (OCaml_refs.from_file m) usage; + match !aux_file with + | None -> print_error No_aux_file; exit 2 + | Some aux -> + let error = TeX.check_all aux !m in + if error then exit 2 else exit 0 diff --git a/manual/tools/.gitignore b/manual/tools/.gitignore new file mode 100644 index 00000000..db7f8368 --- /dev/null +++ b/manual/tools/.gitignore @@ -0,0 +1,12 @@ +transf.ml +texquote2 +htmltransf.ml +transf +htmlgen +htmlquote +latexscan.ml +dvi2txt +caml-tex2 +*.dSYM +*.cm[io] +*.o diff --git a/manual/tools/.ignore b/manual/tools/.ignore new file mode 100644 index 00000000..12c72e4a --- /dev/null +++ b/manual/tools/.ignore @@ -0,0 +1,11 @@ +transf.ml +texquote2 +htmltransf.ml +transf +htmlgen +htmlquote +latexscan.ml +dvi2txt +caml-tex2 +*.dSYM +*.cm[io] diff --git a/manual/tools/Makefile b/manual/tools/Makefile new file mode 100644 index 00000000..b3500acb --- /dev/null +++ b/manual/tools/Makefile @@ -0,0 +1,54 @@ +TOPDIR=../.. +COMPFLAGS=-I $(OTOPDIR)/otherlibs/str -I $(OTOPDIR)/otherlibs/unix +include $(TOPDIR)/Makefile.tools + +CFLAGS=-g -O + +all: texquote2 transf htmlquote htmlgen dvi2txt caml-tex2 + +dvi2txt: + cd dvi_to_txt; ${MAKE} + +transf: transf.cmo htmltransf.cmo transfmain.cmo + $(OCAMLC) -o transf -g transf.cmo htmltransf.cmo transfmain.cmo + +transf.ml: transf.mll + $(OCAMLLEX) transf.mll + +htmltransf.ml: htmltransf.mll + $(OCAMLLEX) htmltransf.mll + +htmlgen: latexmacros.cmo latexscan.cmo latexmain.cmo + $(OCAMLC) -o htmlgen -g latexmacros.cmo latexscan.cmo latexmain.cmo + +latexscan.ml: latexscan.mll + ocamllex latexscan.mll + +caml-tex2: caml_tex2.ml + $(OCAMLC) $(TOPDIR)/compilerlibs/ocamlcommon.cma -I $(TOPDIR)/parsing \ + -o caml-tex2 str.cma unix.cma caml_tex2.ml + +.SUFFIXES: +.SUFFIXES: .ml .cmo .mli .cmi .c + +.ml.cmo: + $(OCAMLC) -c $< + +.mli.cmi: + $(OCAMLC) -c $< + +.c: + $(CC) $(CFLAGS) -o $@ $< + +clean: + rm -f transf.ml latexscan.ml htmltransf.ml + rm -f texquote2 transf htmlquote htmlgen dvi2txt + rm -f transf.ml latex.ml + rm -f *.o *.cm? *.cmxa + rm -f *~ #*# + cd dvi_to_txt; ${MAKE} clean + +latexmacros.cmo: latexmacros.cmi +latexmain.cmo: latexscan.cmo +latexscan.cmo: latexmacros.cmi +transfmain.cmo: transf.cmo htmltransf.cmo diff --git a/manual/tools/caml-tex b/manual/tools/caml-tex new file mode 100755 index 00000000..7eea11b5 --- /dev/null +++ b/manual/tools/caml-tex @@ -0,0 +1,131 @@ +#!/usr/bin/perl + +$camllight = "TERM=dumb ocaml"; +$camlbegin = "\\caml\n"; +$camlend = "\\endcaml\n"; +$camlin = "\\?"; +$camlout = "\\:"; +$camlblank = "\\;\n"; + +$linelen = 72; +$output = ""; +$cut_at_blanks = 0; + +while ($#ARGV >= 0) { + $_ = $ARGV[0]; + last unless (/^-/); + $linelen = $ARGV[1], shift, shift, next if (/^-n$/); + $output = $ARGV[1], shift, shift, next if (/^-o$/); + $camllight = $ARGV[1], shift, shift, next if (/^-caml$/); + $cut_at_blanks = 1, shift, next if (/^-w$/); + printf STDERR ("Unknown option '%s', ignored\n", $_); + shift; +} + +# First pass: extract the Caml phrases to evaluate + +open(ML, "> .input.ml") || die("Cannot create .input.ml : $!"); + +foreach $infile (@ARGV) { + open(IN, $infile) || die("Cannot open $infile : $!"); + while() { + if (m/^\\begin{caml_(example|example\*|eval)}\s*$/) { + while() { + last if m/^\\end{caml_(example|example\*|eval)}\s*$/; + print ML $_; + } + } + } + close(IN); +} + +close(ML); + +# Feed the phrases to a Caml toplevel + +open(TOPLEVEL, "$camllight 2>&1 < .input.ml |") || + die("Cannot start camllight : $!"); + +; ; # skip the banner +$lastread = ; +$lastread =~ s/^# //; + +# Second pass: shuffle the TeX source and the output of the toplevel + +if ($output) { + if ($output eq "-") { + open(OUT, ">&STDOUT"); + } else { + open(OUT, ">$output") || die("Cannot create $output: $!"); + } +} + +foreach $infile (@ARGV) { + open(IN, $infile) || die("Cannot open $infile: $!"); + if (! $output) { + $outfile = $infile; + $outfile =~ s/\.tex$//; + open(OUT, "> $outfile.ml.tex") || die("Cannot create $outfile.ml.tex: $!"); + } + while() { + if (m/^\\begin{caml_example(\*?)}\s*$/) { + $omit_answer = $1; # true if caml_example*, false if caml_example + print OUT $camlbegin; + $severalphrases = 0; + while() { + last if m/\\end{caml_example\*?}\s*$/; + print OUT $camlblank if ($severalphrases); + while(1) { + s/\\/\\\\/g; + print OUT $camlin, $_; + last if m/;; *$/; + $_ = ; + } + while ($lastread =~ s/^ //) { } + while($lastread) { + last if $lastread =~ s/^# //; + print STDERR $lastread; + if (! $omit_answer) { + while (length($lastread) > $linelen) { + if ($cut_at_blanks) { + $cutpos = rindex($lastread, ' ', $linelen); + if ($cutpos == -1) { $cutpos = $linelen; } else { $cutpos++; } + } else { + $cutpos = $linelen; + } + $line = substr($lastread, 0, $cutpos); + $line =~ s/\\/\\\\/g; + print OUT $camlout, $line, "\n"; + $lastread = substr($lastread, $cutpos, + length($lastread) - $cutpos); + } + $lastread =~ s/\\/\\\\/g; + print OUT $camlout, $lastread; + } + $lastread = ; + } + $severalphrases = 1; + } + print OUT $camlend; + } + elsif (m/^\\begin{caml_eval}\s*$/) { + while() { + last if m/^\\end{caml_eval}\s*$/; + if (m/;; *$/) { + while ($lastread =~ s/^ //) { } + while($lastread) { + last if $lastread =~ s/^#//; + print STDERR $lastread; + $lastread = ; + } + } + } + } + else { + print OUT $_; + } + } + close(IN); +} + +close(TOPLEVEL); diff --git a/manual/tools/caml_tex2.ml b/manual/tools/caml_tex2.ml new file mode 100644 index 00000000..d115b274 --- /dev/null +++ b/manual/tools/caml_tex2.ml @@ -0,0 +1,605 @@ +(* $Id$ *) + +open StdLabels +open Printf +open Str + +let camlbegin = "\\caml" +let camlend = "\\endcaml" +let camlin = {|\\?\1|} +let camlout = {|\\:\1|} +let camlbunderline = "\\<" +let camleunderline = "\\>" + +let start newline out s args = + Printf.fprintf out "%s%s" camlbegin s; + List.iter (Printf.fprintf out "{%s}") args; + if newline then Printf.fprintf out "\n" + +let stop newline out s = + Printf.fprintf out "%s%s" camlend s; + if newline then Printf.fprintf out "\n" + +let code_env ?(newline=true) env out s = + Printf.fprintf out "%a%s\n%a" + (fun ppf env -> start false ppf env []) env s (stop newline) env + +let main = "example" +type example_mode = Toplevel | Verbatim | Signature +let string_of_mode = function + | Toplevel -> "toplevel" + | Verbatim -> "verbatim" + | Signature -> "signature" + +let input_env = "input" +let ok_output ="output" +let error ="error" +let warning ="warn" +let phrase_env = "" + + +let camllight = ref "TERM=norepeat ocaml" +let verbose = ref true +let linelen = ref 72 +let outfile = ref "" +let cut_at_blanks = ref false +let files = ref [] + +let _ = + Arg.parse ["-n", Arg.Int (fun n -> linelen := n), "line length"; + "-o", Arg.String (fun s -> outfile := s), "output"; + "-caml", Arg.String (fun s -> camllight := s), "toplevel"; + "-w", Arg.Set cut_at_blanks, "cut at blanks"; + "-v", Arg.Bool (fun b -> verbose := b ), "output result on stderr" + ] + (fun s -> files := s :: !files) + "caml-tex2: " + +let (~!) = + let memo = ref [] in + fun key -> + try List.assq key !memo + with Not_found -> + let data = Str.regexp key in + memo := (key, data) :: !memo; + data + +(** The Output module deals with the analysis and classification + of the interpreter output and the parsing of status-related options + or annotations for the caml_example environment *) +module Output = struct + + (** Interpreter output status *) + type status = + | Ok + | Warning of int + | Error + + type kind = + | Annotation (** Local annotation: [ [@@expect (*annotation*) ] ]*) + | Option (** Global environment option: + [\begin{caml_example}[option[=value]] + ... + \end{caml_example}] *) + + (** Pretty printer for status *) + let pp_status ppf = function + | Error -> Printf.fprintf ppf "error" + | Ok -> Printf.fprintf ppf "ok" + | Warning n -> Printf.fprintf ppf "warning %d" n + + (** Pretty printer for status preceded with an undefined determinant *) + let pp_a_status ppf = function + | Error -> Printf.fprintf ppf "an error" + | Ok -> Printf.fprintf ppf "an ok" + | Warning n -> Printf.fprintf ppf "a warning %d" n + + (** {1 Related latex environment } *) + let env = function + | Error -> error + | Warning _ -> warning + | Ok -> ok_output + + (** {1 Exceptions } *) + exception Parsing_error of kind * string + + type source = { file:string; lines:int * int; phrase:string; output:string } + type unexpected_report = {source:source; expected:status; got:status} + exception Unexpected_status of unexpected_report + + let print_source ppf {file; lines = (start, stop); phrase; output} = + Printf.fprintf ppf "%s, lines %d to %d:\n\"\n%s\n\"\n\"\n%s\n\"." + file start stop phrase output + + let print_unexpected {source; expected; got} = + if expected = Ok then + Printf.eprintf + "Error when evaluating a caml_example environment in %a\n\ + Unexpected %a status.\n\ + If %a status was expected, add an [@@expect %a] annotation.\n" + print_source source + pp_status got + pp_a_status got + pp_status got + else + Printf.eprintf + "Error when evaluating a guarded caml_example environment in %a\n\ + Unexpected %a status, %a status was expected.\n\ + If %a status was in fact expected, change the status annotation to \ + [@@expect %a].\n" + print_source source + pp_status got + pp_a_status expected + pp_a_status got + pp_status got; + flush stderr + + let print_parsing_error k s = + match k with + | Option -> + Printf.eprintf + "Unknown caml_example option: [%s].\n\ + Supported options are \"ok\",\"error\", or \"warning=n\" (with n \ + a warning number).\n" s + | Annotation -> + Printf.eprintf + "Unknown caml_example phrase annotation: [@@expect %s].\n\ + Supported annotations are [@@expect ok], [@@expect error],\n\ + and [@@expect warning n] (with n a warning number).\n" s + + (** {1 Output analysis} *) + let catch_error s = + if string_match ~!{|Error:|} s 0 then Some Error else None + + let catch_warning s = + if string_match ~!{|Warning \([0-9]+\):|} s 0 then + Some (Warning (int_of_string @@ matched_group 1 s)) + else + None + + let status s = match catch_warning s, catch_error s with + | Some w, _ -> w + | None, Some e -> e + | None, None -> Ok + + (** {1 Parsing caml_example options } *) + + (** Parse [warning=n] options for caml_example options *) + let parse_warning s = + if string_match ~!{|warning=\([0-9]+\)|} s 0 then + Some (Warning (int_of_string @@ matched_group 1 s)) + else + None + + (** Parse [warning n] annotations *) + let parse_local_warning s = + if string_match ~!{|warning \([0-9]+\)|} s 0 then + Some (Warning (int_of_string @@ matched_group 1 s)) + else + None + + let parse_error s = + if s="error" then Some Error else None + + let parse_ok s = + if s = "ok" then Some Ok else None + + (** Parse the environment-wide expected status output *) + let expected s = + match parse_warning s, parse_error s with + | Some w, _ -> w + | None, Some e -> e + | None, None -> raise (Parsing_error (Option,s)) + + (** Parse the local (i.e. phrase-wide) expected status output *) + let local_expected s = + match parse_local_warning s, parse_error s, parse_ok s with + | Some w, _, _ -> w + | None, Some e, _ -> e + | None, None, Some ok -> ok + | None, None, None -> raise (Parsing_error (Annotation,s)) + +end + +module Text_transform = struct + + type kind = + | Underline + | Ellipsis + + exception Intersection of + {line:int; file:string; left:kind; stop:int; start:int; right:kind} + + let pp ppf = function + | Underline -> Format.fprintf ppf "underline" + | Ellipsis -> Format.fprintf ppf "ellipsis" + + type t = { kind:kind; start:int; stop:int} + let escape_specials s = + let s1 = global_replace ~!"\\\\" "\\\\\\\\" s in + let s2 = global_replace ~!"'" "\\\\textquotesingle\\\\-" s1 in + let s3 = global_replace ~!"`" "\\\\textasciigrave\\\\-" s2 in + s3 + + let rec apply_transform input (pos,underline_stop,out) t = + if pos >= String.length input then pos, underline_stop, out + else match underline_stop with + | Some stop when stop <= t.start -> + let f = escape_specials (String.sub input ~pos ~len:(stop - pos)) in + let out = {|\>|} :: f :: out in + apply_transform input (stop,None,out) t + | _ -> + let out = + escape_specials (String.sub input ~pos ~len:(t.start - pos))::out in + match t.kind with + | Ellipsis -> t.stop, underline_stop, {|\ldots|} :: out + | Underline -> + t.start, Some t.stop, {|\<|} :: out + + (** Check that all ellipsis are strictly nested inside underline transform + and that otherwise no transform starts before the end of the previous + transform in a list of transforms *) + type partition = U of t * t list | E of t + let check_partition line file l = + let init = Ellipsis, 0 in + let rec partition = function + | [] -> [] + | {kind=Underline; _ } as t :: q -> underline t [] q + | {kind=Ellipsis; _ } as t :: q -> E t :: partition q + and underline u n = function + | [] -> end_underline u n [] + | {kind=Underline; _ } :: _ as q -> end_underline u n q + | {kind=Ellipsis; _ } as t :: q -> + if t.stop < u.stop then underline u (t::n) q + else end_underline u n (t::q) + and end_underline u n l = U(u,List.rev n) :: partition l in + let check_elt (left,stop) t = + if t.start < stop then + raise (Intersection{line;file;left;stop;start=t.start;right=t.kind}) + else + (t.kind,t.stop) in + let check acc = function + | E t -> check_elt acc t + | U(u,n) -> + let _ = check_elt acc u in + let _ = List.fold_left ~f:check_elt ~init n in + u.kind, u.stop in + List.fold_left ~f:check ~init (partition l) + |> ignore + + let apply ts file line s = + let ts = List.sort (fun x y -> compare x.start y.start) ts in + check_partition line file ts; + let last, underline, ls = + List.fold_left ~f:(apply_transform s) ~init:(0,None,[]) ts in + let last, ls = match underline with + | None -> last, ls + | Some stop -> + let f = escape_specials (String.sub s ~pos:last ~len:(stop - last)) in + stop, {|\>|} :: f :: ls in + let ls = + let n = String.length s in + if last = n then ls else + escape_specials (String.sub s last (n-last)) :: ls in + String.concat "" (List.rev ls) +end + + +let caml_input, caml_output = + let cmd = !camllight ^ " 2>&1" in + try Unix.open_process cmd with _ -> failwith "Cannot start toplevel" +let () = + at_exit (fun () -> ignore (Unix.close_process (caml_input, caml_output))); + ignore (input_line caml_input); + ignore (input_line caml_input) + +let read_output () = + let input = ref (input_line caml_input) in + input := replace_first ~!{|^#\( *\*\)* *|} "" !input; + (* the inner ( *\* )* group is here to clean the starting "*" + introduced for multiline comments *) + let underline = + if string_match ~!"Characters *\\([0-9]+\\)-\\([0-9]+\\):$" !input 0 + then + let start = int_of_string (matched_group 1 !input) + and stop = int_of_string (matched_group 2 !input) in + input := input_line caml_input; + Text_transform.[{kind=Underline; start; stop}] + else [] + in + let output = Buffer.create 256 in + let first_line = ref true in + while not (string_match ~!".*\"end_of_input\"$" !input 0) do + if !verbose then prerr_endline !input; + if not !first_line then Buffer.add_char output '\n' else first_line:=false; + Buffer.add_string output !input; + input := input_line caml_input; + done; + Buffer.contents output, underline + +exception Missing_double_semicolon of string * int + +exception Missing_mode of string * int + +type incompatibility = + | Signature_with_visible_answer of string * int +exception Incompatible_options of incompatibility + +exception Phrase_parsing of string + +module Ellipsis = struct + (** This module implements the extraction of ellipsis locations + from phrases. + + An ellipsis is either an [[@ellipsis]] attribute, or a pair + of [[@@@ellipsis.start]...[@@@ellipsis.stop]] attributes. *) + + exception Unmatched_ellipsis of {kind:string; start:int; stop:int} + (** raised when an [[@@@ellipsis.start]] or [[@@@ellipsis.stop]] is + not paired with another ellipsis attribute *) + + exception Nested_ellipses of {first:int ; second:int } + (** raised by [[@@@ellipsis.start][@@@ellipsis.start]] *) + + let extract f x = + let transforms = ref [] in + let last_loc = ref Location.none in + let left_mark = ref None (* stored position of [@@@ellipsis.start]*) in + let location _this loc = + (* we rely on the fact that the default iterator call first + the location subiterator, then the attribute subiterator *) + last_loc := loc in + let attribute _this (attr,_) = + let name = attr.Location.txt in + let loc = !last_loc in + let start = loc.Location.loc_start.Lexing.pos_cnum in + let attr_start = attr.Location.loc.loc_start.Lexing.pos_cnum in + let attr_stop = 1 + attr.Location.loc.loc_end.Lexing.pos_cnum in + let stop = loc.Location.loc_end.Lexing.pos_cnum in + let check_nested () = match !left_mark with + | Some (first,_) -> raise (Nested_ellipses {first; second=attr_start}) + | None -> () in + match name with + | "ellipsis" -> + check_nested (); + transforms := + {Text_transform.kind=Ellipsis; start; stop=max attr_stop stop } + :: !transforms + | "ellipsis.start" -> + check_nested (); + left_mark := Some (start, stop) + | "ellipsis.stop" -> + begin match !left_mark with + | None -> raise (Unmatched_ellipsis {kind="right"; start; stop}) + | Some (start, _ ) -> + transforms := {kind=Ellipsis; start ; stop } :: !transforms; + left_mark := None + end + | _ -> () + in + f {Ast_iterator.default_iterator with location; attribute} x; + (match !left_mark with + | None -> () + | Some (start,stop) -> + raise (Unmatched_ellipsis {kind="left"; start; stop }) + ); + !transforms + + let find fname mode s = + let lex = Lexing.from_string s in + Location.init lex fname; + Location.input_name := fname; + Location.input_lexbuf := Some lex; + try + match mode with + | Toplevel -> begin + match Parse.toplevel_phrase lex with + | Ptop_dir _ -> [] + | Ptop_def str -> extract (fun it -> it.structure it) str + end + | Verbatim -> + extract (fun it -> it.structure it) (Parse.implementation lex) + | Signature -> + extract (fun it -> it.signature it) (Parse.interface lex) + with Syntaxerr.Error _ -> raise (Phrase_parsing s) + +end + +let process_file file = + prerr_endline ("Processing " ^ file); + let ic = try open_in file with _ -> failwith "Cannot read input file" in + let phrase_start = ref 1 and phrase_stop = ref 1 in + let incr_phrase_start () = + incr phrase_start; + phrase_stop := !phrase_start in + let oc = + try if !outfile = "-" then + stdout + else if !outfile = "" then + open_out (replace_first ~!"\\.tex$" "" file ^ ".ml.tex") + else + open_out_gen [Open_wronly; Open_creat; Open_append; Open_text] + 0x666 !outfile + with _ -> failwith "Cannot open output file" in + let fatal fmt = + Format.kfprintf + (fun ppf -> Format.fprintf ppf "@]@."; close_in ic; close_out oc; exit 1) + Format.err_formatter ("@[ Error " ^^ fmt) in + let re_spaces = "[ \t]*" in + let re_start = ~!( + {|\\begin{caml_example\(\*?\)}|} ^ re_spaces + ^ {|\({toplevel}\|{verbatim}\|{signature}\)?|} ^ re_spaces + ^ {|\(\[\(.*\)\]\)?|} ^ re_spaces + ^ "$" + ) in + try while true do + let input = ref (input_line ic) in + incr_phrase_start(); + if string_match re_start !input 0 + then begin + let omit_answer = matched_group 1 !input = "*" in + let mode = + match matched_group 2 !input with + | exception Not_found -> raise (Missing_mode(file, !phrase_stop)) + | "{toplevel}" -> Toplevel + | "{verbatim}" -> Verbatim + | "{signature}" -> Signature + | _ -> assert false in + if mode = Signature && not omit_answer then raise + (Incompatible_options( + Signature_with_visible_answer(file,!phrase_stop)) + ); + let explicit_stop = match mode with + | Verbatim | Signature -> false + | Toplevel -> true in + let global_expected = try Output.expected @@ matched_group 4 !input + with Not_found -> Output.Ok in + start true oc main [string_of_mode mode]; + let first = ref true in + let read_phrase () = + let phrase = Buffer.create 256 in + let rec read () = + let input = incr phrase_stop; input_line ic in + let implicit_stop = + if string_match ~!"\\\\end{caml_example\\*?}[ \t]*$" + input 0 + then + begin + if !phrase_stop = 1 + !phrase_start then + raise End_of_file + else if explicit_stop then + raise @@ Missing_double_semicolon (file,!phrase_stop) + else + true + end + else false in + if Buffer.length phrase > 0 then Buffer.add_char phrase '\n'; + let stop = + implicit_stop || + ( not (mode = Signature) + && string_match ~!"\\(.*\\)[ \t]*;;[ \t]*$" input 0 ) + in + if not stop then ( + Buffer.add_string phrase input; read () + ) + else begin + decr phrase_stop; + let last_input = if implicit_stop then "" else matched_group 1 input in + let expected = + if string_match ~!{|\(.*\)\[@@expect \(.*\)\]|} last_input 0 then + ( Buffer.add_string phrase (matched_group 1 last_input); + Output.local_expected @@ matched_group 2 last_input ) + else + (Buffer.add_string phrase last_input; global_expected) + in + if not implicit_stop then Buffer.add_string phrase ";;"; + implicit_stop, Buffer.contents phrase, expected + end in + read () + in + try while true do + let implicit_stop, phrase, expected = read_phrase () in + let ellipses = Ellipsis.find file mode phrase in + if mode = Signature then fprintf caml_output "module type Wrap = sig\n"; + fprintf caml_output "%s%s%s" phrase + (if mode = Signature then "\nend" else "") + (if implicit_stop then ";;\n" else "\n"); + flush caml_output; + output_string caml_output "\"end_of_input\";;\n"; + flush caml_output; + let output, underline = read_output () in + let status = Output.status output in + if status <> expected then ( + let source = Output.{ + file; + lines = (!phrase_start, !phrase_stop); + phrase; + output + } in + raise (Output.Unexpected_status + {Output.got=status; expected; source} ) ) + else ( incr phrase_stop; phrase_start := !phrase_stop ); + let phrase = + Text_transform.apply (underline @ ellipses) + file !phrase_stop phrase in + (* Special characters may also appear in output strings -Didier *) + let output = Text_transform.escape_specials output in + let phrase = global_replace ~!{|^\(.\)|} camlin phrase + and output = global_replace ~!{|^\(.\)|} camlout output in + start false oc phrase_env []; + code_env ~newline:omit_answer input_env oc phrase; + if not omit_answer then + code_env ~newline:false (Output.env status) oc output; + stop true oc phrase_env; + flush oc; + first := false; + if implicit_stop then raise End_of_file + done + with End_of_file -> phrase_start:= !phrase_stop; stop true oc main + end + else if string_match ~!"\\\\begin{caml_eval}[ \t]*$" !input 0 + then begin + while input := input_line ic; + not (string_match ~!"\\\\end{caml_eval}[ \t]*$" !input 0) + do + fprintf caml_output "%s\n" !input; + if string_match ~!".*;;[ \t]*$" !input 0 then begin + flush caml_output; + output_string caml_output "\"end_of_input\";;\n"; + flush caml_output; + ignore (read_output ()) + end + done + end else begin + fprintf oc "%s\n" !input; + flush oc + end + done with + | End_of_file -> close_in ic; close_out oc + | Output.Unexpected_status r -> + ( Output.print_unexpected r; close_in ic; close_out oc; exit 1 ) + | Output.Parsing_error (k,s) -> + ( Output.print_parsing_error k s; + close_in ic; close_out oc; exit 1 ) + | Phrase_parsing s -> fatal "when parsing the following phrase:@ %s" s + | Missing_double_semicolon (file, line_number) -> + fatal + "when evaluating a caml_example environment in %s:@;\ + missing \";;\" at line %d@]@." file (line_number-2) + | Missing_mode (file, line_number) -> + fatal "when parsing a caml_example environment in %s:@;\ + missing mode argument at line %d,@ \ + available modes {toplevel,verbatim}@]@." + file (line_number-2) + | Incompatible_options Signature_with_visible_answer (file, line_number) -> + fatal + "when parsing a caml_example environment in@ \ + %s, line %d:@,\ + the signature mode is only compatible with \"caml_example*\"@ \ + Hint: did you forget to add \"*\"?@]@." + file (line_number-2); + | Text_transform.Intersection {line;file;left;stop;start;right} -> + fatal + "when evaluating a caml_example environment in %s, line %d:@ \ + Textual transforms must be well-separated.@ The \"%a\" transform \ + ended at %d,@ after the start at %d of another \"%a\" transform.@ \ + Hind: did you try to elide a code fragment which raised a warning?\ + @]@." + file (line-2) + Text_transform.pp left stop start Text_transform.pp right + | Ellipsis.Unmatched_ellipsis {kind;start;stop} -> + fatal "when evaluating a caml_example environment,@ \ + the %s mark at position %d-%d was unmatched" + kind start stop + | Ellipsis.Nested_ellipses {first;second} -> + fatal "when evaluating a caml_example environment,@ \ + there were two nested ellipsis attribute.@ The first one \ + started at position %d,@ the second one at %d" + first second + +let _ = + if !outfile <> "-" && !outfile <> "" then begin + try close_out (open_out !outfile) + with _ -> failwith "Cannot open output file" + end; + List.iter process_file (List.rev !files) diff --git a/manual/tools/dvi_to_txt/Makefile b/manual/tools/dvi_to_txt/Makefile new file mode 100644 index 00000000..852996a9 --- /dev/null +++ b/manual/tools/dvi_to_txt/Makefile @@ -0,0 +1,8 @@ +OBJS=io.o interp.o output.o main.o print.o print_rtf.o print_styl.o +CFLAGS=-g + +../dvi2txt: $(OBJS) + $(CC) $(CFLAGS) -o ../dvi2txt $(OBJS) + +clean: + rm -f ../dvi2txt *.o *~ #*# diff --git a/manual/tools/dvi_to_txt/dvi.h b/manual/tools/dvi_to_txt/dvi.h new file mode 100644 index 00000000..8dfb25dc --- /dev/null +++ b/manual/tools/dvi_to_txt/dvi.h @@ -0,0 +1,8 @@ +enum { + SET_CHAR_0=0, SET_CHAR_127=127, SET1=128, SET2, SET3, SET4, SET_RULE, + PUT1, PUT2, PUT3, PUT4, PUT_RULE, NOP, BOP, EOP, PUSH, POP, RIGHT1, + RIGHT2, RIGHT3, RIGHT4, W0, W1, W2, W3, W4, X0, X1, X2, X3, X4, DOWN1, + DOWN2, DOWN3, DOWN4, Y0, Y1, Y2, Y3, Y4, Z0, Z1, Z2, Z3, Z4, + FNT_NUM_0=171, FNT_NUM_63=234, FNT1=235, FNT2, FNT3, FNT4, XXX1, XXX2, + XXX3, XXX4, FNT_DEF1, FNT_DEF2, FNT_DEF3, FNT_DEF4, PRE, POST, POST_POST +}; diff --git a/manual/tools/dvi_to_txt/interp.c b/manual/tools/dvi_to_txt/interp.c new file mode 100644 index 00000000..e50aed36 --- /dev/null +++ b/manual/tools/dvi_to_txt/interp.c @@ -0,0 +1,305 @@ +#include +#include +#include +#include +#include "io.h" +#include "dvi.h" +#include "output.h" + +#define SEEK_CUR 1 + +int h, v, w, x, y, z, sp; +int currfont; +int encoding; +int style; + +#define FONT_NAME_SIZE 31 +#define NUM_FONTS 256 + +struct { + char name[FONT_NAME_SIZE+1]; + int encoding; + int style; +} font[NUM_FONTS]; + +#define TYPEWRITER 0 +#define ROMAN 1 +#define MATH_ITALIC 2 +#define MATH_SYMBOL 3 +#define MATH_EXTENSION 4 +#define LINE_SEGMENTS 5 +#define CIRCLE_SEGMENTS 6 +#define LATEX_SYMBOLS 7 + +char * transcode[] = { +/* 0.......+.......1.......+.......2.......+.......3.......+.......4.......+.......5.......+.......6.......+.......7.......+....... */ +/* TYPEWRITER */ + "GDTLXPSUPYO##################### !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~#", +/* ROMAN */ + "GDTLXPSUPYO***** 0'!\"#$%&'()*+,-./0123456789:;!=??@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\"]^.`abcdefghijklmnopqrstuvwxyz--\"~#", +/* MATH_ITALIC */ + "GDTLXPSUPYOabgdezhtiklmnxpystupxyoeuorsp----`'><0123456789.,*dABCDEFGHIJKLMNOPQRSTUVWXYZ#####labcdefghijklmnopqrstuvwxyzij###", +/* MATH_SYMBOL */ + "-.x*/###+-x/.ooo==##<><>==##<><><>||####<>||####'#####/|###0RIT##ABCDEFGHIJKLMNOPQRSTUVWXYZ###########{}<>||||\\|################", +/* MATH_EXTENSION */ + "()[]||||{}<>||##()[]||||{}<>||##()[]||||{}<>||##########################################################[]||||{}################", +/* LINE_SEGMENTS */ + "/||||| _ / / _/ // _ / / __// / _ / \\||||| \\ \\ \\ _\\ \\\\ _ \\ \\ __\\\\ \\ _ \\ ", +/* CIRCLE_SEGMENTS */ + " ", +/* LATEX_SYMBOLS */ + " <<>> U#O0 ~~[] " +}; + +#define STACK_SIZE 100 + +struct { int sh, sv, sw, sx, sy, sz; } stack[STACK_SIZE]; + +struct known_font_struct { + char * prefix; + int encoding, style; +} known_fonts[] = { + "docrm", ROMAN, PLAIN, + "doctt", TYPEWRITER, MONOSPACED, + "docit", ROMAN, ITALICS, + "docbf", ROMAN, BOLD, + "docmi", MATH_ITALIC, PLAIN, + "cmsy", MATH_SYMBOL, PLAIN, + "cmex", MATH_EXTENSION, PLAIN, + "line", LINE_SEGMENTS, PLAIN, + "lcircle", CIRCLE_SEGMENTS, PLAIN, + "lasy", LATEX_SYMBOLS, PLAIN +}; + +void fontdef(input, fontnum) + FILE * input; + int fontnum; +{ + int a, l, i; + + assert(fontnum >= 0 && fontnum < NUM_FONTS); + fseek(input, 12, SEEK_CUR); /* skip c, s and d parameters */ + a = get8u(input); + l = get8u(input); + assert(l < FONT_NAME_SIZE); + fseek(input, a, SEEK_CUR); /* skip the "area" part */ + fread(font[fontnum].name, 1, l, input); /* read the font name */ + font[fontnum].name[l] = 0; + for (i = 0; + i < sizeof(known_fonts) / sizeof(struct known_font_struct); + i++) { + if (strncmp(font[fontnum].name, known_fonts[i].prefix, + strlen(known_fonts[i].prefix)) == 0) { + font[fontnum].encoding = known_fonts[i].encoding; + font[fontnum].style = known_fonts[i].style; + return; + } + } + fprintf(stderr, "Warning: unknown font `%s'\n", font[fontnum].name); + font[fontnum].encoding = ROMAN; + font[fontnum].style = PLAIN; +} + +void setfont(fontnum) + int fontnum; +{ + currfont = fontnum; + encoding = font[fontnum].encoding; + style = font[fontnum].style; +} + +int outchar(c) + int c; +{ + if (c < 0 || c > 127) + out(h, v, '#', PLAIN); + else + out(h, v, transcode[encoding][c], style); + return scalex; +} + +void outrule(height, width) + int height, width; +{ + char c; + int dx, dy; + + if (height <= 0 || width <= 0) return; + c = height >= width ? '|' : '-'; + dy = 0; + do { + dx = 0; + do { + out(h + dx, v - dy, c, PLAIN); + dx += scalex; + } while (dx <= width); + dy += scaley; + } while (dy < height); +} + +void interprete(input) + FILE * input; +{ + int c, n, height, width, mag; + + sp = 0; + c = get8u(input); + n = get8u(input); + if (c != PRE || n != 2) { + fprintf(stderr, "File does not start with DVI preamble.\n"); + exit(2); + } + (void) get32s(input); + (void) get32s(input); + mag = get32s(input); + scalex = SCALEX * mag / 1000; + scaley = SCALEY * mag / 1000; + n = get8u(input); + fseek(input, n, SEEK_CUR); /* skip comment */ + + begin_document(); + + while (1) { + c = get8u(input); + if (c >= SET_CHAR_0 && c <= SET_CHAR_127) + h += outchar(c); + else if (c >= FNT_NUM_0 && c <= FNT_NUM_63) + setfont(c - FNT_NUM_0); + else switch(c) { + case SET1: + h += outchar(get8u(input)); break; + case SET2: + h += outchar(get16u(input)); break; + case SET3: + h += outchar(get24u(input)); break; + case SET4: + h += outchar(get32s(input)); break; + case SET_RULE: + height = get32s(input); + width = get32s(input); + outrule(height, width); + h += width; + break; + case PUT1: + (void) outchar(get8u(input)); break; + case PUT2: + (void) outchar(get16u(input)); break; + case PUT3: + (void) outchar(get24u(input)); break; + case PUT4: + (void) outchar(get32s(input)); break; + case PUT_RULE: + height = get32s(input); + width = get32s(input); + outrule(height, width); + break; + case NOP: + break; + case BOP: + clear_page(); + h = v = w = x = y = z = 0; + sp = 0; + fseek(input, 44, SEEK_CUR); /* skip c0...c9 and ptr to previous page */ + break; + case EOP: + output_page(); + break; + case PUSH: + assert(sp < STACK_SIZE); + stack[sp].sh = h; stack[sp].sv = v; stack[sp].sw = w; + stack[sp].sx = x; stack[sp].sy = y; stack[sp].sz = z; + sp++; + break; + case POP: + assert(sp > 0); + sp--; + h = stack[sp].sh; v = stack[sp].sv; w = stack[sp].sw; + x = stack[sp].sx; y = stack[sp].sy; z = stack[sp].sz; + break; + case RIGHT1: + h += get8s(input); break; + case RIGHT2: + h += get16s(input); break; + case RIGHT3: + h += get24s(input); break; + case RIGHT4: + h += get32s(input); break; + case W0: + h += w; break; + case W1: + w = get8s(input); h += w; break; + case W2: + w = get16s(input); h += w; break; + case W3: + w = get24s(input); h += w; break; + case W4: + w = get32s(input); h += w; break; + case X0: + h += x; break; + case X1: + x = get8s(input); h += x; break; + case X2: + x = get16s(input); h += x; break; + case X3: + x = get24s(input); h += x; break; + case X4: + x = get32s(input); h += x; break; + case DOWN1: + v += get8s(input); break; + case DOWN2: + v += get16s(input); break; + case DOWN3: + v += get24s(input); break; + case DOWN4: + v += get32s(input); break; + case Y0: + v += y; break; + case Y1: + y = get8s(input); v += y; break; + case Y2: + y = get16s(input); v += y; break; + case Y3: + y = get24s(input); v += y; break; + case Y4: + y = get32s(input); v += y; break; + case Z0: + v += z; break; + case Z1: + z = get8s(input); v += z; break; + case Z2: + z = get16s(input); v += z; break; + case Z3: + z = get24s(input); v += z; break; + case Z4: + z = get32s(input); v += z; break; + case FNT1: + setfont(get8u(input)); break; + case FNT2: + setfont(get16u(input)); break; + case FNT3: + setfont(get24u(input)); break; + case FNT4: + setfont(get32s(input)); break; + case XXX1: + n = get8u(input); fseek(input, n, SEEK_CUR); break; + case XXX2: + n = get16u(input); fseek(input, n, SEEK_CUR); break; + case XXX3: + n = get24u(input); fseek(input, n, SEEK_CUR); break; + case XXX4: + n = get32s(input); fseek(input, n, SEEK_CUR); break; + case FNT_DEF1: + fontdef(input, get8u(input)); break; + case FNT_DEF2: + fontdef(input, get16u(input)); break; + case FNT_DEF3: + fontdef(input, get24u(input)); break; + case FNT_DEF4: + fontdef(input, get32s(input)); break; + case POST: + end_document(); return; + default: + assert(0); + } + } +} diff --git a/manual/tools/dvi_to_txt/io.c b/manual/tools/dvi_to_txt/io.c new file mode 100644 index 00000000..5dfe1ccc --- /dev/null +++ b/manual/tools/dvi_to_txt/io.c @@ -0,0 +1,43 @@ +#include +#include "io.h" + +int get16u(input) + FILE * input; +{ + int b1 = getc(input); + int b2 = getc(input); + return (b1 << 8) + b2; +} +int get16s(input) + FILE * input; +{ + int b1 = (schar) getc(input); + int b2 = getc(input); + return (b1 << 8) + b2; +} +int get24u(input) + FILE * input; +{ + int b1 = getc(input); + int b2 = getc(input); + int b3 = getc(input); + return (b1 << 16) + (b2 << 8) + b3; +} +int get24s(input) + FILE * input; +{ + int b1 = (schar) getc(input); + int b2 = getc(input); + int b3 = getc(input); + return (b1 << 16) + (b2 << 8) + b3; +} +int get32s(input) + FILE * input; +{ + int b1 = (schar) getc(input); + int b2 = getc(input); + int b3 = getc(input); + int b4 = getc(input); + return (b1 << 24) + (b2 << 16) + (b3 << 8) + b4; +} + diff --git a/manual/tools/dvi_to_txt/io.h b/manual/tools/dvi_to_txt/io.h new file mode 100644 index 00000000..f9a800d4 --- /dev/null +++ b/manual/tools/dvi_to_txt/io.h @@ -0,0 +1,10 @@ +#ifdef __STDC__ +typedef signed char schar; +#else +typedef char schar; +#endif + +#define get8u(input) getc(input) +#define get8s(input) (schar) getc(input) + +int get16u(), get16s(), get24u(), get24s(), get32u(), get32s(); diff --git a/manual/tools/dvi_to_txt/main.c b/manual/tools/dvi_to_txt/main.c new file mode 100644 index 00000000..0fb03c43 --- /dev/null +++ b/manual/tools/dvi_to_txt/main.c @@ -0,0 +1,47 @@ +#include +#include "output.h" + +void interprete(FILE *input); + +char * input_name; + +int main(argc, argv) + int argc; + char ** argv; +{ + FILE * f; + int i; + + output_device = OUTPUT_PLAIN; + standout_tt = 0; + for (i = 1; i < argc && argv[i][0] == '-'; i++) { + switch(argv[i][1]) { + case 'p': + output_device = OUTPUT_PRINTER; break; + case 'r': + output_device = OUTPUT_RTF; break; + case 's': + output_device = OUTPUT_STYL; break; + case 't': + standout_tt = 1; break; + default: + fprintf(stderr, "Unknown option `%s', ignored\n", argv[i]); + } + } + if (i >= argc) { + input_name = "unknown.dvi"; + interprete(stdin); + } else { + for (/*nothing*/; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + perror(argv[i]); + continue; + } + input_name = argv[i]; + interprete(f); + fclose(f); + } + } + return 0; +} diff --git a/manual/tools/dvi_to_txt/output.c b/manual/tools/dvi_to_txt/output.c new file mode 100644 index 00000000..fc79ed80 --- /dev/null +++ b/manual/tools/dvi_to_txt/output.c @@ -0,0 +1,209 @@ +#include +#include +#include +#include "output.h" + +void null(), print_FF(), plain_line(), printer_line(); +void begin_rtf_document(), end_rtf_document(), end_rtf_page(), rtf_line(); +void begin_styl_page(), end_styl_page(), styl_line(); + +struct output_device { + void (*begin_document)(); + void (*end_document)(); + void (*begin_page)(); + void (*end_page)(); + void (*line)(); +} device[] = { + null, null, null, print_FF, plain_line, + null, null, null, print_FF, printer_line, + begin_rtf_document, end_rtf_document, null, end_rtf_page, rtf_line, + null, null, begin_styl_page, end_styl_page, styl_line +}; + +#define SIZEX 160 + +struct line { + int ypos; + int len; + char * contents; + char * styles; + struct line * next_in_bucket; +}; + +#define NBUCKETS 101 + +struct line * screenlines[NBUCKETS]; + +int numlines; + +char * xmalloc(size) + int size; +{ + char * res = (char *) malloc(size); + if (res == NULL) { + fprintf(stderr, "Out of memory\n"); + exit(2); + } + return res; +} + +char * xrealloc(ptr, size) + char * ptr; + int size; +{ + char * res = (char *) realloc(ptr, size); + if (res == NULL) { + fprintf(stderr, "Out of memory\n"); + exit(2); + } + return res; +} + +void begin_document() +{ + device[output_device].begin_document(); +} + +void end_document() +{ + device[output_device].end_document(); +} + +void clear_page() +{ + int i; + + for (i = 0; i < NBUCKETS; i++) screenlines[i] = NULL; + numlines = 0; +} + +void out(x, y, c, style) + int x, y; + char c; + char style; +{ + unsigned int h; + struct line * line; + + h = ((unsigned int) y) % NBUCKETS; + line = screenlines[h]; + while (line != NULL && line->ypos != y) line = line->next_in_bucket; + if (line == NULL) { + line = (struct line *) xmalloc(sizeof(struct line)); + line->ypos = y; + line->len = 80; + line->contents = (char *) xmalloc(line->len); + memset(line->contents, ' ', line->len); + line->styles = (char *) xmalloc(line->len); + memset(line->styles, PLAIN, line->len); + line->next_in_bucket = screenlines[h]; + screenlines[h] = line; + numlines++; + } + x = x / scalex; + if (x < 0) return; + while (x >= line->len) { + int newlen = 2 * line->len; + line->contents = (char *) xrealloc(line->contents, newlen); + memset(line->contents + line->len, ' ', newlen - line->len); + line->styles = (char *) xrealloc(line->styles, newlen); + memset(line->styles + line->len, PLAIN, newlen - line->len); + line->len = newlen; + } + line->contents[x] = c; + line->styles[x] = style; +} + +static void free_bucket(l) + struct line * l; +{ + if (l != NULL) { + free(l->contents); + free(l->styles); + free_bucket(l->next_in_bucket); + free(l); + } +} + +static void free_buckets() +{ + int i; + for (i = 0; i < NBUCKETS; i++) free_bucket(screenlines[i]); +} + +static int compare_lines(l1, l2) + struct line ** l1, ** l2; +{ + return (**l1).ypos - (**l2).ypos; +} + +void output_page() +{ + struct line ** lines; + struct line * l; + int i, j, k, y; + char * p, * q, * style_p, * style_q, * s; + + device[output_device].begin_page(); + + /* First, sort the lines by y coordinate */ + lines = (struct line **) malloc(numlines * sizeof(struct line *)); + if (lines == NULL) { + printf("*** Out of memory ***\n\014"); + free_buckets(); + return; + } + j = 0; + for (i = 0; i < NBUCKETS; i++) + for (l = screenlines[i]; l != NULL; l = l->next_in_bucket) + lines[j++] = l; + qsort(lines, numlines, sizeof(struct line *), compare_lines); + + /* Output the lines */ + + y = 0; + for (i = 0; i < numlines; i++) { + /* Emit blank lines to reach the current line ypos */ + while (lines[i]->ypos - y >= 3 * scaley / 2) { + device[output_device].line(NULL, NULL, 0); + y += scaley; + } + /* If next line is close to current line, attempt to merge them */ + while (i + 1 < numlines && + lines[i+1]->ypos - lines[i]->ypos < scaley) { + p = lines[i]->contents; + q = lines[i+1]->contents; + style_p = lines[i]->styles; + style_q = lines[i+1]->styles; + for (j = lines[i]->len; j < lines[i+1]->len; j++) + if (q[j] != ' ') goto cannot_merge; + for (j = lines[i+1]->len; j < lines[i]->len; j++) + if (p[j] != ' ') goto cannot_merge; + k = lines[i]->len; + if (k > lines[i+1]->len) k = lines[i+1]->len; + for (j = 0; j < k; j++) + if (p[j] != ' ' && q[j] != ' ') goto cannot_merge; + /* Seems OK, do the merging */ + for (j = 0; j < k; j++) + if (p[j] != ' ') { + q[j] = p[j]; + style_q[j] = style_p[j]; + } + /* Now consider next line */ + i++; + } + cannot_merge: + /* Now print the current line */ + p = lines[i]->contents; + q = p + lines[i]->len; + while (q >= p && *--q == ' ') /*nothing*/; + device[output_device].line(p, lines[i]->styles, q-p+1); + /* Go on with next line */ + y = lines[i]->ypos; + } + + device[output_device].end_page(); + free(lines); + free_buckets(); +} + diff --git a/manual/tools/dvi_to_txt/output.h b/manual/tools/dvi_to_txt/output.h new file mode 100644 index 00000000..43ce317b --- /dev/null +++ b/manual/tools/dvi_to_txt/output.h @@ -0,0 +1,24 @@ +#define SCALEX 404685 +#define SCALEY 786432 + +int scalex; +int scaley; + +#define PLAIN 0 +#define ITALICS 1 +#define BOLD 2 +#define MONOSPACED 3 + +void begin_document(); +void end_document(); +void clear_page(); +void output_page(); +void out(); + +int output_device; +int standout_tt; + +#define OUTPUT_PLAIN 0 +#define OUTPUT_PRINTER 1 +#define OUTPUT_RTF 2 +#define OUTPUT_STYL 3 diff --git a/manual/tools/dvi_to_txt/print.c b/manual/tools/dvi_to_txt/print.c new file mode 100644 index 00000000..d0f02cc3 --- /dev/null +++ b/manual/tools/dvi_to_txt/print.c @@ -0,0 +1,41 @@ +#include +#include "output.h" + +/* Low-level output functions */ + +void null() +{ +} + +void print_FF() +{ + putchar('\014'); +} + +void plain_line(txt, style, len) + char * txt, * style; + int len; +{ + fwrite(txt, 1, len, stdout); + putchar('\n'); +} + +void printer_line(txt, style, len) + char * txt, * style; + int len; +{ + for (/*nothing*/; len > 0; len--, txt++, style++) { + putchar(*txt); + switch(*style) { + case ITALICS: + putchar('\b'); putchar('_'); break; + case BOLD: + putchar('\b'); putchar(*txt); break; + case MONOSPACED: + if (standout_tt) { putchar('\b'); putchar(*txt); } + break; + } + } + putchar('\n'); +} + diff --git a/manual/tools/dvi_to_txt/print_rtf.c b/manual/tools/dvi_to_txt/print_rtf.c new file mode 100644 index 00000000..c12c67a3 --- /dev/null +++ b/manual/tools/dvi_to_txt/print_rtf.c @@ -0,0 +1,80 @@ +#include +#include "output.h" + +/* Rich Text Format */ + +void begin_rtf_document() +{ + printf("{\\rtf1\\ansi\\deff0\n"); + printf("{\\fonttbl{\\f0\\fmodern Courier;}}\n"); + printf("\\f0\\fs20\n"); +} + +void end_rtf_document() +{ + printf("}\n"); +} + +void end_rtf_page() +{ + printf("\\page\n"); +} + +void rtf_line(txt, style, len) + char * txt, * style; + int len; +{ + int currstyle; + + for (currstyle = PLAIN; len > 0; len--, txt++, style++) { + if (*txt != ' ') { + switch(*style) { + case PLAIN: + if (currstyle != PLAIN) { + putchar('}'); + currstyle = PLAIN; + } + break; + case ITALICS: + if (currstyle != ITALICS) { + if (currstyle != PLAIN) putchar('}'); + printf("{\\i "); + currstyle = ITALICS; + } + break; + case BOLD: + if (currstyle != BOLD) { + if (currstyle != PLAIN) putchar('}'); + printf("{\\b "); + currstyle = BOLD; + } + break; + case MONOSPACED: + if (standout_tt) { + if (currstyle != BOLD) { + if (currstyle != PLAIN) putchar('}'); + printf("{\\b "); + currstyle = BOLD; + } + } else { + if (currstyle != PLAIN) { + putchar('}'); + currstyle = PLAIN; + } + } + break; + } + } + switch(*txt) { + case '\\': + case '{': + case '}': + putchar('\\'); putchar(*txt); break; + default: + putchar(*txt); break; + } + } + if (currstyle != PLAIN) putchar('}'); + printf("\\par\n"); +} + diff --git a/manual/tools/dvi_to_txt/print_styl.c b/manual/tools/dvi_to_txt/print_styl.c new file mode 100644 index 00000000..39135f23 --- /dev/null +++ b/manual/tools/dvi_to_txt/print_styl.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include "output.h" + +/* Macintosh STYL tables */ + +extern char * input_name; + +static FILE * text; +static FILE * styl; +static int partnum = 0; +static int currstyle; +static int currstart; +static int currpos; + +static void output_current_style() +{ + int style_code; + + switch(currstyle) { + case PLAIN: + style_code = 0; break; + case ITALICS: + style_code = 2; break; + case BOLD: + style_code = 1 + 32; break; /* bold condensed */ + case MONOSPACED: + style_code = standout_tt ? 1 + 32 : 0; break; + } + fprintf(styl, "%d %d Monaco %d 9 0 0 0\n", currstart, currpos, style_code); +} + + +static void output_style_change(newstyle) + int newstyle; +{ + if (!standout_tt && (newstyle == PLAIN && currstyle == MONOSPACED || + newstyle == MONOSPACED && currstyle == PLAIN)) { + currstyle = newstyle; + return; + } + output_current_style(); + currstyle = newstyle; + currstart = currpos; +} + +void begin_styl_page() +{ + char name[1024], buffer[1024]; + int n; + + strcpy(name, input_name); + n = strlen(name); + if (n >= 4 && strcmp(name + n - 4, ".dvi") == 0) name[n - 4] = 0; + partnum++; + sprintf(buffer, "%s.%03d.txt", name, partnum); + text = fopen(buffer, "w"); + if (text == NULL) { perror(buffer); exit(2); } + sprintf(buffer, "%s.%03d.stl", name, partnum); + styl = fopen(buffer, "w"); + if (styl == NULL) { perror(buffer); exit(2); } + currstyle = PLAIN; + currstart = 0; + currpos = 0; +} + +void end_styl_page() +{ + output_current_style(); + fclose(text); + fclose(styl); +} + +void styl_line(txt, style, len) + char * txt, * style; + int len; +{ + for (/*nothing*/; len > 0; len--, txt++, style++, currpos++) { + putc(*txt, text); + if (*txt != ' ' && *style != currstyle) { + output_style_change(*style); + } + } + putc('\n', text); + currpos++; +} + + + diff --git a/manual/tools/fix_index.sh b/manual/tools/fix_index.sh new file mode 100755 index 00000000..d2402b40 --- /dev/null +++ b/manual/tools/fix_index.sh @@ -0,0 +1,52 @@ +#!/bin/sh + +# usage: fix_index.sh .idx + +# This script works around a hyperref bug: hyperref does not handle +# quotes in \index arguments properly. +# +# Symptom: +# When \index{-pipe-pipe@\verb`("|"|)`} appears in your .tex, the hyperref +# package mangles it and produces this line in your .idx: +# \indexentry{(-pipe-pipe)@\verb`("|hyperindexformat{\"}}{292} +# instead of the expected: +# \indexentry{(-pipe-pipe)@\verb`("|"|)`|hyperpage}{292} +# +# This is because it fails to handle quoted characters correctly. +# +# The workaround: +# Look for the buggy line in the given .idx file and change it. + +# Note: this bug will happen every time you have a | (pipe) character +# in an index entry (properly quoted with a " (double-quote) before it). +# We fix only the one case that appears in the OCaml documentation. +# We do not attempt a general solution because hyperref erases part +# of the argument, so we cannot recover the correct string from its +# output. + +# Note 2013-06-19: +# The above was for the || operator in the stdlib's Pervasives module. +# Now we have the same problem with the |> operator that was added +# to the same module in commit 13739, hence the second special case. + +usage(){ + echo "usage: fix_index.sh .idx" >&2 + exit 2 +} + +case $# in + 1) ;; + *) usage;; +esac + +ed "$1" <<'EOF' +/-pipe-pipe/s/verb`("|hyperindexformat{\\"}/verb`("|"|)`|hyperpage/ +/-pipe-gt/s/verb`("|hyperindexformat{\\>)`}/verb`("|>)`|hyperpage/ +w +q +EOF + +case $? in + 0) echo "fix_index.sh: fixed $1 successfully.";; + *) echo "fix_index.sh: some error occurred."; exit 0;; +esac diff --git a/manual/tools/format-intf b/manual/tools/format-intf new file mode 100755 index 00000000..0228ecc1 --- /dev/null +++ b/manual/tools/format-intf @@ -0,0 +1,153 @@ +#!/usr/bin/perl + +$sep = "\246"; + +$html = 0; +if ($ARGV[0] eq "-html") { + $html = 1; + shift; +} + +# Skip initial junk + +while(($_ = <>) && ! m/^\(\* Module \[(.*)\]:/) { } +m/^\(\* Module \[(.*)\]:/; +$modname = $1; +chop; +s/^\(\* *//; +s/ *\*\) *$//; +s/\[/{\\tt /g; +s/\]/}/g; +print "\\section{$_}\n\n"; +$label = $modname; $label =~ s/[^A-Za-z0-9]//g; +print "\\label{s:$label}\n"; +print "\\index{$modname (module)@\\verb~$modname~ (module)}%\n\n"; +s/{\\tt //g; +s/}//g; +s/_//g; +print "\\pdfsection{$_}\n\n"; + +$incomment = 0; +$inverbatim = 0; + +line: +while(<>) { + chop; + last line if /^\s*\(\*--/; + if (s/^\(\*- //) { + s/ *\*\)$//; + } + if (m/^\s*\(\*\*\*\s*(.*)\*\)\s*$/) { + if ($inverbatim) { + do end_verbatim(); + } + print "\\subsection*{", $1, "}\n"; + next line; + } + if (m/^\s*\(\*\*\s*(.*)\*\)\s*$/) { + if ($inverbatim) { + do end_verbatim(); + } + print "\\subsubsection*{", $1, "}\n"; + next line; + } + if (s/^\s*\(\*//) { + if ($inverbatim) { + do end_verbatim(); + } + print "\\begin{comment}\n"; + $incomment = 1; + } + if ($incomment) { + $endcomment = s/\*\)\s*$//; + if (m/^\s*\[\s*$/) { + print "\\begin{restoreindent}\n" unless $html; + print "\\begin{verbatim}\n"; + while (($_ = <>) && ! m/^\s*\]\s*$/) { + print $_; + } + print "\\end{verbatim}\n"; + print "\\end{restoreindent}\n" unless $html; + } else { + if (s/^-//) { + print "\\\\"; + print "[\\smallskipamount]" unless $html; + } + s/^\s*//; + $count = 0; + foreach $part (split(/(\\?[\[\]])/, $_)) { + if ($part eq "[") { + print ($count == 0 ? "\\verb$sep" : "["); + $count++; + } elsif ($part eq "]") { + $count--; + print ($count == 0 ? "$sep" : "]"); + } elsif ($part =~ m/^\\([\[\]])$/) { + print $1; + } else { + print $part; + } + } + } + if ($endcomment) { + print "\n\\end{comment}"; + $incomment = 0; + $inverbatim = 0; + } + } else { + next line if /^$/; + if (! $inverbatim) { + print "\\begin{verbatim}\n"; + $inverbatim = 1; + } + s/^external /val /; + s/ = ("[^"]*"\s*)+$//; + next line if /^\s*$/; + s/^val \( ([^ )]+) \)/val (\1)/; + { + do indexentry($1, " (operator)"), last + if (m/^val \(([^)]*)\)/); + do indexentry($1, ""), last + if (m/^val ([a-zA-Z0-9_']*)/); + do indexentry($1, " (type)"), last + if (m/^type\s.*([a-zA-Z0-9_']*)\s*=/); + do indexentry($1, " (exception)"), last + if (m/^exception ([a-zA-Z0-9_']*)/); + do indexentry($1, " (module type)"), last + if (m/^module type ([a-zA-Z0-9_']*)/); + do indexentry($1, " (functor)"), last + if (m/^module ([a-zA-Z0-9_']*)\s*\(/); + do indexentry($1, " (module)"), last + if (m/^module ([a-zA-Z0-9_']*)/); + } + print $_; + } + print "\n"; +} +do end_verbatim() if $inverbatim; +print "\\end{comment}\n" if $incomment; + +sub indexentry { + local ($_, $comment) = @_; + return if m/^$/ || m/^[a-zA-Z]$/; + s/([@|!])/"$1/g; + if (! m|`|) { + $s = "`"; + } elsif (! m|~|) { + $s = "~"; + } elsif (! m/\|/) { + $s = "|"; + } else { + die("Can't find quote character for $_"); + } + push (@index, "\\index{$_$comment@\\verb$s$_$s$comment}"); +} + +sub end_verbatim { + print "\\end{verbatim}\n"; + foreach $idx (@index) { + print $idx, "%\n"; + } + undef(@index); + $inverbatim = 0; +} diff --git a/manual/tools/htmlcut b/manual/tools/htmlcut new file mode 100755 index 00000000..be079abb --- /dev/null +++ b/manual/tools/htmlcut @@ -0,0 +1,111 @@ +#!/usr/local/bin/perl +# Split an HTML file into smaller nodes. +# Split at

    headers and also at some

    headers. + +$h0 = "H0"; +$h1 = "H1"; +$h2 = "H2"; + +# Parse options + +option: +while(1) { + $_ = $ARGV[0]; + if (/^-([0-9]+)$/) { + $split2[$1] = 1; + } + elsif (/^-article/) { + $h0 = "H1"; + $h1 = "H2"; + $h2 = "H3"; + } + else { + last option; + } + shift(@ARGV); +} + +$infile = $ARGV[0]; + +# Find URL's for the links + +$level0 = 0; +$level1 = 0; +$uselabel = 1; +open(INPUT, $infile); +while() { + if (m|^<$h0>(.*)|o) { + $level0++; + $currfile = "node" . ($level1 + 1) . ".html"; + $lblnum = $level0; + $uselabel = 0; + } + if (m|^<$h1>(.*)|o) { + $level1++; + $level2 = 0; + $currfile = "node$level1.html"; + $lblnum = $level1; + $uselabel = 1; + } + if (m|^<$h2>(.*)|o) { + $level2++; + if ($split2[$level1]) { $currfile = "node$level1.$level2.html"; } + $lblnum = "$level1.$level2"; + } + s||do set_url($1)|ige; +} + +sub set_url { + local ($lbl) = @_; + if ($uselabel) { + $url{$lbl} = "$currfile#$lbl"; + } else { + $url{$lbl} = $currfile; + } + $label{$lbl} = $lblnum; +} + +# Cut the file + +$level1 = 0; +open(INPUT, $infile); +while() { + if (m|^<$h0>(.*)|o) { + if ($level2 > 0) { print FILE1 "\n"; } + select(STDOUT); + if ($level1 >= 1) { print ""; } + print "<$h2>$1\n"; + if ($level1 >= 1) { print "
      "; } + next; + } + if (m|^<$h1>(.*)|o) { + if ($level2 > 0) { print FILE1 "
    \n"; } + $level1++; + $level2 = 0; + select(STDOUT); + if ($level1 == 1) { print "