#include <algorithm>
#include <cmath>
#include <cstring>
+#include <iostream>
#include <thread>
#include <astcenc.h>
#define STB_DXT_IMPLEMENTATION
#include <stb_dxt.h>
+extern "C"
+{
+ uint16_t float_to_sf16(float val);
+}
+
namespace image {
static LinearImage extendToFourChannels(LinearImage source);
CompressedTexture astcCompress(const LinearImage& original, AstcConfig config) {
- // If this is the first time, initialize the ARM encoder tables.
-
- static bool first = true;
- if (first) {
- test_inappropriate_extended_precision();
- prepare_angular_tables();
- build_quantization_mode_table();
- first = false;
- }
-
// Check the validity of the given block size.
using Format = CompressedFormat;
return {};
}
- // Create an input image for the ARM encoder in a format that it can consume.
- // It expects four-channel data, so we extend or curtail the channel count in a reasonable way.
- // The encoder can take half-floats or bytes, but we always give it half-floats.
-
- LinearImage source = extendToFourChannels(original);
- const uint32_t width = source.getWidth();
- const uint32_t height = source.getHeight();
- astc_codec_image* input_image = allocate_image(16, width, height, 1, 0);
- for (int y = 0; y < height; y++) {
- auto imagedata16 = input_image->imagedata16[0][y];
- float const* src = source.getPixelRef(0, y);
- for (int x = 0; x < width; x++) {
- imagedata16[4 * x] = float_to_sf16(src[4 * x], SF_NEARESTEVEN);
- imagedata16[4 * x + 1] = float_to_sf16(src[4 * x + 1], SF_NEARESTEVEN);
- imagedata16[4 * x + 2] = float_to_sf16(src[4 * x + 2], SF_NEARESTEVEN);
- imagedata16[4 * x + 3] = float_to_sf16(src[4 * x + 3], SF_NEARESTEVEN);
- }
- }
-
- // Determine the bitrate based on the specified block size.
-
- int xdim_2d = config.blocksize.x, ydim_2d = config.blocksize.y;
- const float log10_texels_2d = std::log((float)(xdim_2d * ydim_2d)) / std::log(10.0f);
- const float bitrate = 128.0 / (xdim_2d * ydim_2d);
-
- // We do not fully support 3D textures yet, but we include some of the 3D config params anyway.
-
- int xdim_3d, ydim_3d, zdim_3d;
- find_closest_blockdim_3d(bitrate, &xdim_3d, &ydim_3d, &zdim_3d, 0);
- const float log10_texels_3d = std::log((float)(xdim_3d * ydim_3d * zdim_3d)) / log(10.0f);
-
- // Set up presets.
-
- int plimit_autoset;
- float oplimit_autoset;
- float dblimit_autoset_2d;
- float dblimit_autoset_3d;
- float bmc_autoset;
- float mincorrel_autoset;
- int maxiters_autoset;
- int pcdiv;
-
+ // Configure encoder
+ astcenc_config my_config;
+ float quality;
switch (config.quality) {
case AstcPreset::VERYFAST:
- plimit_autoset = 2;
- oplimit_autoset = 1.0;
- dblimit_autoset_2d = fmax(70 - 35 * log10_texels_2d, 53 - 19 * log10_texels_2d);
- dblimit_autoset_3d = fmax(70 - 35 * log10_texels_3d, 53 - 19 * log10_texels_3d);
- bmc_autoset = 25;
- mincorrel_autoset = 0.5;
- maxiters_autoset = 1;
- switch (ydim_2d) {
- case 4: pcdiv = 240; break;
- case 5: pcdiv = 56; break;
- case 6: pcdiv = 64; break;
- case 8: pcdiv = 47; break;
- case 10: pcdiv = 36; break;
- case 12: pcdiv = 30; break;
- default: pcdiv = 30; break;
- }
+ quality = ASTCENC_PRE_FASTEST;
break;
case AstcPreset::FAST:
- plimit_autoset = 4;
- oplimit_autoset = 1.0;
- dblimit_autoset_2d = fmax(85 - 35 * log10_texels_2d, 63 - 19 * log10_texels_2d);
- dblimit_autoset_3d = fmax(85 - 35 * log10_texels_3d, 63 - 19 * log10_texels_3d);
- bmc_autoset = 50;
- mincorrel_autoset = 0.5;
- maxiters_autoset = 1;
- switch (ydim_2d) {
- case 4: pcdiv = 60; break;
- case 5: pcdiv = 27; break;
- case 6: pcdiv = 30; break;
- case 8: pcdiv = 24; break;
- case 10: pcdiv = 16; break;
- case 12: pcdiv = 20; break;
- default: pcdiv = 20; break;
- }
+ quality = ASTCENC_PRE_FAST;
break;
case AstcPreset::MEDIUM:
- plimit_autoset = 25;
- oplimit_autoset = 1.2;
- dblimit_autoset_2d = fmax(95 - 35 * log10_texels_2d, 70 - 19 * log10_texels_2d);
- dblimit_autoset_3d = fmax(95 - 35 * log10_texels_3d, 70 - 19 * log10_texels_3d);
- bmc_autoset = 75;
- mincorrel_autoset = 0.75;
- maxiters_autoset = 2;
- switch (ydim_2d) {
- case 4: pcdiv = 25; break;
- case 5: pcdiv = 15; break;
- case 6: pcdiv = 15; break;
- case 8: pcdiv = 10; break;
- case 10: pcdiv = 8; break;
- case 12: pcdiv = 6; break;
- default: pcdiv = 6; break;
- }
+ quality = ASTCENC_PRE_MEDIUM;
break;
case AstcPreset::THOROUGH:
- plimit_autoset = 100;
- oplimit_autoset = 2.5;
- dblimit_autoset_2d = fmax(105 - 35 * log10_texels_2d, 77 - 19 * log10_texels_2d);
- dblimit_autoset_3d = fmax(105 - 35 * log10_texels_3d, 77 - 19 * log10_texels_3d);
- bmc_autoset = 95;
- mincorrel_autoset = 0.95f;
- maxiters_autoset = 4;
- switch (ydim_2d) {
- case 4: pcdiv = 12; break;
- case 5: pcdiv = 7; break;
- case 6: pcdiv = 7; break;
- case 8: pcdiv = 5; break;
- case 10: pcdiv = 4; break;
- case 12: pcdiv = 3; break;
- default: pcdiv = 3; break;
- }
+ quality = ASTCENC_PRE_THOROUGH;
break;
case AstcPreset::EXHAUSTIVE:
- plimit_autoset = 1 << 10;
- oplimit_autoset = 1000.0;
- dblimit_autoset_2d = 999.0f;
- dblimit_autoset_3d = 999.0f;
- bmc_autoset = 100;
- mincorrel_autoset = 0.99;
- maxiters_autoset = 4;
- switch (ydim_2d) {
- case 4: pcdiv = 3; break;
- case 5: pcdiv = 1; break;
- case 6: pcdiv = 1; break;
- case 8: pcdiv = 1; break;
- case 10: pcdiv = 1; break;
- case 12: pcdiv = 1; break;
- default: pcdiv = 1; break;
- }
+ quality = ASTCENC_PRE_EXHAUSTIVE;
break;
}
+ astcenc_config_init(
+ config.semantic == AstcSemantic::COLORS_HDR ?
+ ASTCENC_PRF_HDR :
+ (config.srgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR),
+ config.blocksize.x,
+ config.blocksize.y,
+ 1,
+ quality,
+ config.semantic == AstcSemantic::NORMALS ? ASTCENC_FLG_MAP_NORMAL : 0,
+ &my_config
+ );
+
+ // Create encoding context
+ const int threadcount = 1; // std::thread::hardware_concurrency();
+ astcenc_context *context;
+ astcenc_context_alloc(&my_config, threadcount, &context);
- if (plimit_autoset < 1) {
- plimit_autoset = 1;
- } else if (plimit_autoset > PARTITION_COUNT) {
- plimit_autoset = PARTITION_COUNT;
- }
+ // Create an input image for the ARM encoder in a format that it can consume.
+ // It expects four-channel data, so we extend or curtail the channel count in a reasonable way.
+ // The encoder can take half-floats or bytes, but we always give it half-floats.
- error_weighting_params ewp;
- ewp.rgb_power = 1.0f;
- ewp.alpha_power = 1.0f;
- ewp.rgb_base_weight = 1.0f;
- ewp.alpha_base_weight = 1.0f;
- ewp.rgb_mean_weight = 0.0f;
- ewp.rgb_stdev_weight = 0.0f;
- ewp.alpha_mean_weight = 0.0f;
- ewp.alpha_stdev_weight = 0.0f;
- ewp.rgb_mean_and_stdev_mixing = 0.0f;
- ewp.mean_stdev_radius = 0;
- ewp.enable_rgb_scale_with_alpha = 0;
- ewp.alpha_radius = 0;
- ewp.block_artifact_suppression = 0.0f;
- ewp.rgba_weights[0] = 1.0f;
- ewp.rgba_weights[1] = 1.0f;
- ewp.rgba_weights[2] = 1.0f;
- ewp.rgba_weights[3] = 1.0f;
- ewp.ra_normal_angular_scale = 0;
- ewp.max_refinement_iters = maxiters_autoset;
- ewp.block_mode_cutoff = bmc_autoset / 100.0f;
- ewp.texel_avg_error_limit = pow(0.1f, dblimit_autoset_2d * 0.1f) * 65535.0f * 65535.0f;
- ewp.partition_1_to_2_limit = oplimit_autoset;
- ewp.lowest_correlation_cutoff = mincorrel_autoset;
- ewp.partition_search_limit = plimit_autoset;
-
- // For now we do not support 3D textures but we keep the variable names consistent
- // with what's found in the ARM standalone tool.
- int xdim = xdim_2d, ydim = ydim_2d, zdim = 1;
- expand_block_artifact_suppression(xdim, ydim, zdim, &ewp);
-
- // Perform compression.
-
- swizzlepattern swz_encode = { 0, 1, 2, 3 };
- swizzlepattern swz_decode = { 0, 1, 2, 3 };
- astc_decode_mode decode_mode;
- switch (config.semantic) {
- case AstcSemantic::COLORS_LDR:
- decode_mode = config.srgb ? DECODE_LDR_SRGB : DECODE_LDR;
- break;
- case AstcSemantic::COLORS_HDR:
- decode_mode = DECODE_HDR;
- break;
- case AstcSemantic::NORMALS:
- decode_mode = config.srgb ? DECODE_LDR_SRGB : DECODE_LDR;
- ewp.rgba_weights[0] = 1.0f;
- ewp.rgba_weights[1] = 0.0f;
- ewp.rgba_weights[2] = 0.0f;
- ewp.rgba_weights[3] = 1.0f;
- ewp.ra_normal_angular_scale = 1;
- swz_encode.r = 0;
- swz_encode.g = 0;
- swz_encode.b = 0;
- swz_encode.a = 1;
- swz_decode.r = 0;
- swz_decode.g = 3;
- swz_decode.b = 6;
- swz_decode.a = 5;
- ewp.block_artifact_suppression = 1.8f;
- ewp.mean_stdev_radius = 3;
- ewp.rgb_mean_weight = 0;
- ewp.rgb_stdev_weight = 50;
- ewp.rgb_mean_and_stdev_mixing = 0.0;
- ewp.alpha_mean_weight = 0;
- ewp.alpha_stdev_weight = 50;
- break;
+ LinearImage source = extendToFourChannels(original);
+ const uint32_t width = source.getWidth();
+ const uint32_t height = source.getHeight();
+ astcenc_image input_image;
+ uint16_t *imagedata = new uint16_t[4 * width * height];
+ input_image.dim_x = width;
+ input_image.dim_y = height;
+ input_image.dim_z = 1;
+ input_image.data_type = ASTCENC_TYPE_F16;
+ input_image.data = reinterpret_cast<void**>(&imagedata);
+ for (int y = 0; y < height; y++) {
+ auto imagedata16 = &imagedata[4 * width * y];
+ float const* src = source.getPixelRef(0, y);
+ for (int x = 0; x < width; x++) {
+ imagedata16[4 * x] = float_to_sf16(src[4 * x]);
+ imagedata16[4 * x + 1] = float_to_sf16(src[4 * x + 1]);
+ imagedata16[4 * x + 2] = float_to_sf16(src[4 * x + 2]);
+ imagedata16[4 * x + 3] = float_to_sf16(src[4 * x + 3]);
+ }
}
- const int threadcount = std::thread::hardware_concurrency();
-
- const int xsize = input_image->xsize;
- const int ysize = input_image->ysize;
- const int zsize = input_image->zsize;
- const int xblocks = (xsize + xdim - 1) / xdim;
- const int yblocks = (ysize + ydim - 1) / ydim;
- const int zblocks = (zsize + zdim - 1) / zdim;
+ const int xblocks = (width + my_config.block_x - 1) / my_config.block_x;
+ const int yblocks = (height + my_config.block_y - 1) / my_config.block_y;
+ const astcenc_swizzle swz = { ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A };
- uint32_t size = xblocks * yblocks * zblocks * 16;
+ uint32_t size = xblocks * yblocks * 16;
uint8_t* buffer = new uint8_t[size];
- encode_astc_image(input_image, nullptr, xdim, ydim, zdim, &ewp, decode_mode,
- swz_encode, swz_decode, buffer, 0, threadcount);
-
- destroy_image(input_image);
+ // TODO Run this threadcount times in parallel
+ astcenc_error err = astcenc_compress_image(
+ context,
+ &input_image,
+ &swz,
+ buffer,
+ size,
+ 0
+ );
+
+ delete[] imagedata;
+ astcenc_context_free(context);
+
+ if (err != 0)
+ {
+ std::cerr << "astcCompress: error: " << astcenc_get_error_string(err) << "\n";
+ free(buffer);
+ return {};
+ }
return {
.format = format,