From: Timo Röhling Date: Sat, 23 Oct 2021 17:17:50 +0000 (+0200) Subject: Migrate to astc-encoder API from Debian package X-Git-Tag: archive/raspbian/1.9.25+dfsg2-8+rpi1~1^2~11 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=93467fb508fc41fb3e548197c0033392859fa1e0;p=filament.git Migrate to astc-encoder API from Debian package Gbp-Pq: Name 0008-Migrate-to-astc-encoder-API-from-Debian-package.patch --- diff --git a/libs/imageio/src/BlockCompression.cpp b/libs/imageio/src/BlockCompression.cpp index 57b607e..16cc0d0 100644 --- a/libs/imageio/src/BlockCompression.cpp +++ b/libs/imageio/src/BlockCompression.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -29,22 +30,17 @@ #define STB_DXT_IMPLEMENTATION #include +extern "C" +{ + uint16_t float_to_sf16(float val); +} + namespace image { static LinearImage extendToFourChannels(LinearImage source); CompressedTexture astcCompress(const LinearImage& original, AstcConfig config) { - // If this is the first time, initialize the ARM encoder tables. - - static bool first = true; - if (first) { - test_inappropriate_extended_precision(); - prepare_angular_tables(); - build_quantization_mode_table(); - first = false; - } - // Check the validity of the given block size. using Format = CompressedFormat; @@ -81,231 +77,94 @@ CompressedTexture astcCompress(const LinearImage& original, AstcConfig config) { return {}; } - // Create an input image for the ARM encoder in a format that it can consume. - // It expects four-channel data, so we extend or curtail the channel count in a reasonable way. - // The encoder can take half-floats or bytes, but we always give it half-floats. - - LinearImage source = extendToFourChannels(original); - const uint32_t width = source.getWidth(); - const uint32_t height = source.getHeight(); - astc_codec_image* input_image = allocate_image(16, width, height, 1, 0); - for (int y = 0; y < height; y++) { - auto imagedata16 = input_image->imagedata16[0][y]; - float const* src = source.getPixelRef(0, y); - for (int x = 0; x < width; x++) { - imagedata16[4 * x] = float_to_sf16(src[4 * x], SF_NEARESTEVEN); - imagedata16[4 * x + 1] = float_to_sf16(src[4 * x + 1], SF_NEARESTEVEN); - imagedata16[4 * x + 2] = float_to_sf16(src[4 * x + 2], SF_NEARESTEVEN); - imagedata16[4 * x + 3] = float_to_sf16(src[4 * x + 3], SF_NEARESTEVEN); - } - } - - // Determine the bitrate based on the specified block size. - - int xdim_2d = config.blocksize.x, ydim_2d = config.blocksize.y; - const float log10_texels_2d = std::log((float)(xdim_2d * ydim_2d)) / std::log(10.0f); - const float bitrate = 128.0 / (xdim_2d * ydim_2d); - - // We do not fully support 3D textures yet, but we include some of the 3D config params anyway. - - int xdim_3d, ydim_3d, zdim_3d; - find_closest_blockdim_3d(bitrate, &xdim_3d, &ydim_3d, &zdim_3d, 0); - const float log10_texels_3d = std::log((float)(xdim_3d * ydim_3d * zdim_3d)) / log(10.0f); - - // Set up presets. - - int plimit_autoset; - float oplimit_autoset; - float dblimit_autoset_2d; - float dblimit_autoset_3d; - float bmc_autoset; - float mincorrel_autoset; - int maxiters_autoset; - int pcdiv; - + // Configure encoder + astcenc_config my_config; + float quality; switch (config.quality) { case AstcPreset::VERYFAST: - plimit_autoset = 2; - oplimit_autoset = 1.0; - dblimit_autoset_2d = fmax(70 - 35 * log10_texels_2d, 53 - 19 * log10_texels_2d); - dblimit_autoset_3d = fmax(70 - 35 * log10_texels_3d, 53 - 19 * log10_texels_3d); - bmc_autoset = 25; - mincorrel_autoset = 0.5; - maxiters_autoset = 1; - switch (ydim_2d) { - case 4: pcdiv = 240; break; - case 5: pcdiv = 56; break; - case 6: pcdiv = 64; break; - case 8: pcdiv = 47; break; - case 10: pcdiv = 36; break; - case 12: pcdiv = 30; break; - default: pcdiv = 30; break; - } + quality = ASTCENC_PRE_FASTEST; break; case AstcPreset::FAST: - plimit_autoset = 4; - oplimit_autoset = 1.0; - dblimit_autoset_2d = fmax(85 - 35 * log10_texels_2d, 63 - 19 * log10_texels_2d); - dblimit_autoset_3d = fmax(85 - 35 * log10_texels_3d, 63 - 19 * log10_texels_3d); - bmc_autoset = 50; - mincorrel_autoset = 0.5; - maxiters_autoset = 1; - switch (ydim_2d) { - case 4: pcdiv = 60; break; - case 5: pcdiv = 27; break; - case 6: pcdiv = 30; break; - case 8: pcdiv = 24; break; - case 10: pcdiv = 16; break; - case 12: pcdiv = 20; break; - default: pcdiv = 20; break; - } + quality = ASTCENC_PRE_FAST; break; case AstcPreset::MEDIUM: - plimit_autoset = 25; - oplimit_autoset = 1.2; - dblimit_autoset_2d = fmax(95 - 35 * log10_texels_2d, 70 - 19 * log10_texels_2d); - dblimit_autoset_3d = fmax(95 - 35 * log10_texels_3d, 70 - 19 * log10_texels_3d); - bmc_autoset = 75; - mincorrel_autoset = 0.75; - maxiters_autoset = 2; - switch (ydim_2d) { - case 4: pcdiv = 25; break; - case 5: pcdiv = 15; break; - case 6: pcdiv = 15; break; - case 8: pcdiv = 10; break; - case 10: pcdiv = 8; break; - case 12: pcdiv = 6; break; - default: pcdiv = 6; break; - } + quality = ASTCENC_PRE_MEDIUM; break; case AstcPreset::THOROUGH: - plimit_autoset = 100; - oplimit_autoset = 2.5; - dblimit_autoset_2d = fmax(105 - 35 * log10_texels_2d, 77 - 19 * log10_texels_2d); - dblimit_autoset_3d = fmax(105 - 35 * log10_texels_3d, 77 - 19 * log10_texels_3d); - bmc_autoset = 95; - mincorrel_autoset = 0.95f; - maxiters_autoset = 4; - switch (ydim_2d) { - case 4: pcdiv = 12; break; - case 5: pcdiv = 7; break; - case 6: pcdiv = 7; break; - case 8: pcdiv = 5; break; - case 10: pcdiv = 4; break; - case 12: pcdiv = 3; break; - default: pcdiv = 3; break; - } + quality = ASTCENC_PRE_THOROUGH; break; case AstcPreset::EXHAUSTIVE: - plimit_autoset = 1 << 10; - oplimit_autoset = 1000.0; - dblimit_autoset_2d = 999.0f; - dblimit_autoset_3d = 999.0f; - bmc_autoset = 100; - mincorrel_autoset = 0.99; - maxiters_autoset = 4; - switch (ydim_2d) { - case 4: pcdiv = 3; break; - case 5: pcdiv = 1; break; - case 6: pcdiv = 1; break; - case 8: pcdiv = 1; break; - case 10: pcdiv = 1; break; - case 12: pcdiv = 1; break; - default: pcdiv = 1; break; - } + quality = ASTCENC_PRE_EXHAUSTIVE; break; } + astcenc_config_init( + config.semantic == AstcSemantic::COLORS_HDR ? + ASTCENC_PRF_HDR : + (config.srgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR), + config.blocksize.x, + config.blocksize.y, + 1, + quality, + config.semantic == AstcSemantic::NORMALS ? ASTCENC_FLG_MAP_NORMAL : 0, + &my_config + ); + + // Create encoding context + const int threadcount = 1; // std::thread::hardware_concurrency(); + astcenc_context *context; + astcenc_context_alloc(&my_config, threadcount, &context); - if (plimit_autoset < 1) { - plimit_autoset = 1; - } else if (plimit_autoset > PARTITION_COUNT) { - plimit_autoset = PARTITION_COUNT; - } + // Create an input image for the ARM encoder in a format that it can consume. + // It expects four-channel data, so we extend or curtail the channel count in a reasonable way. + // The encoder can take half-floats or bytes, but we always give it half-floats. - error_weighting_params ewp; - ewp.rgb_power = 1.0f; - ewp.alpha_power = 1.0f; - ewp.rgb_base_weight = 1.0f; - ewp.alpha_base_weight = 1.0f; - ewp.rgb_mean_weight = 0.0f; - ewp.rgb_stdev_weight = 0.0f; - ewp.alpha_mean_weight = 0.0f; - ewp.alpha_stdev_weight = 0.0f; - ewp.rgb_mean_and_stdev_mixing = 0.0f; - ewp.mean_stdev_radius = 0; - ewp.enable_rgb_scale_with_alpha = 0; - ewp.alpha_radius = 0; - ewp.block_artifact_suppression = 0.0f; - ewp.rgba_weights[0] = 1.0f; - ewp.rgba_weights[1] = 1.0f; - ewp.rgba_weights[2] = 1.0f; - ewp.rgba_weights[3] = 1.0f; - ewp.ra_normal_angular_scale = 0; - ewp.max_refinement_iters = maxiters_autoset; - ewp.block_mode_cutoff = bmc_autoset / 100.0f; - ewp.texel_avg_error_limit = pow(0.1f, dblimit_autoset_2d * 0.1f) * 65535.0f * 65535.0f; - ewp.partition_1_to_2_limit = oplimit_autoset; - ewp.lowest_correlation_cutoff = mincorrel_autoset; - ewp.partition_search_limit = plimit_autoset; - - // For now we do not support 3D textures but we keep the variable names consistent - // with what's found in the ARM standalone tool. - int xdim = xdim_2d, ydim = ydim_2d, zdim = 1; - expand_block_artifact_suppression(xdim, ydim, zdim, &ewp); - - // Perform compression. - - swizzlepattern swz_encode = { 0, 1, 2, 3 }; - swizzlepattern swz_decode = { 0, 1, 2, 3 }; - astc_decode_mode decode_mode; - switch (config.semantic) { - case AstcSemantic::COLORS_LDR: - decode_mode = config.srgb ? DECODE_LDR_SRGB : DECODE_LDR; - break; - case AstcSemantic::COLORS_HDR: - decode_mode = DECODE_HDR; - break; - case AstcSemantic::NORMALS: - decode_mode = config.srgb ? DECODE_LDR_SRGB : DECODE_LDR; - ewp.rgba_weights[0] = 1.0f; - ewp.rgba_weights[1] = 0.0f; - ewp.rgba_weights[2] = 0.0f; - ewp.rgba_weights[3] = 1.0f; - ewp.ra_normal_angular_scale = 1; - swz_encode.r = 0; - swz_encode.g = 0; - swz_encode.b = 0; - swz_encode.a = 1; - swz_decode.r = 0; - swz_decode.g = 3; - swz_decode.b = 6; - swz_decode.a = 5; - ewp.block_artifact_suppression = 1.8f; - ewp.mean_stdev_radius = 3; - ewp.rgb_mean_weight = 0; - ewp.rgb_stdev_weight = 50; - ewp.rgb_mean_and_stdev_mixing = 0.0; - ewp.alpha_mean_weight = 0; - ewp.alpha_stdev_weight = 50; - break; + LinearImage source = extendToFourChannels(original); + const uint32_t width = source.getWidth(); + const uint32_t height = source.getHeight(); + astcenc_image input_image; + uint16_t *imagedata = new uint16_t[4 * width * height]; + input_image.dim_x = width; + input_image.dim_y = height; + input_image.dim_z = 1; + input_image.data_type = ASTCENC_TYPE_F16; + input_image.data = reinterpret_cast(&imagedata); + for (int y = 0; y < height; y++) { + auto imagedata16 = &imagedata[4 * width * y]; + float const* src = source.getPixelRef(0, y); + for (int x = 0; x < width; x++) { + imagedata16[4 * x] = float_to_sf16(src[4 * x]); + imagedata16[4 * x + 1] = float_to_sf16(src[4 * x + 1]); + imagedata16[4 * x + 2] = float_to_sf16(src[4 * x + 2]); + imagedata16[4 * x + 3] = float_to_sf16(src[4 * x + 3]); + } } - const int threadcount = std::thread::hardware_concurrency(); - - const int xsize = input_image->xsize; - const int ysize = input_image->ysize; - const int zsize = input_image->zsize; - const int xblocks = (xsize + xdim - 1) / xdim; - const int yblocks = (ysize + ydim - 1) / ydim; - const int zblocks = (zsize + zdim - 1) / zdim; + const int xblocks = (width + my_config.block_x - 1) / my_config.block_x; + const int yblocks = (height + my_config.block_y - 1) / my_config.block_y; + const astcenc_swizzle swz = { ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A }; - uint32_t size = xblocks * yblocks * zblocks * 16; + uint32_t size = xblocks * yblocks * 16; uint8_t* buffer = new uint8_t[size]; - encode_astc_image(input_image, nullptr, xdim, ydim, zdim, &ewp, decode_mode, - swz_encode, swz_decode, buffer, 0, threadcount); - - destroy_image(input_image); + // TODO Run this threadcount times in parallel + astcenc_error err = astcenc_compress_image( + context, + &input_image, + &swz, + buffer, + size, + 0 + ); + + delete[] imagedata; + astcenc_context_free(context); + + if (err != 0) + { + std::cerr << "astcCompress: error: " << astcenc_get_error_string(err) << "\n"; + free(buffer); + return {}; + } return { .format = format,