1
votes

I want to render characters that have utf-8 size >= 2 bytes. I have already got everything done. There is one problem, though. When a character is drawn, there is also a something following it image

To get a glyph data I use freetype. This is the very minimal implementation, the actual code contains kerning, SDF, etc.

What I think needs explantaion is a atlas. Method "TextureAtlas::PackTexture(data, w, h)" packs a texture data and returns position, origin - top left corner - within the atlas w and h range. So a first char has a origin = [0, 0] and the next char with width lets say 50 will have the origin at [50, 0]. Shortly saying.

    enum
    {
        DPI = 72,
        HIGHRES = 64
    };

    struct Glyph
    {
        uint32 codepoint = -1;
        uint32 width = 0; 
        uint32 height = 0;

        Vector2<int> bearing = 0;
        Vector2<float> advance = 0.0f;
        float s0, t0, s1, t1;
    };

    class TextureFont
    {
    public:
        TextureFont() = default;

        bool Initialize();
        void LoadFromFile(const std::string& filePath, float fontSize);

        Glyph* getGlyph(const char8_t* codepoint);
        Glyph* FindGlyph(const char8_t* codepoint);

        uint32 LoadGlyph(const char8_t* codepoint);

        int InitFreeType(float size);

        char* filename;

        vector<Glyph> glyphs;
        TextureAtlas atlas;

        FT_Library library;
        FT_Face face;

        float fontSize = 0.0f;
        float ascender = 0.0f;
        float descender = 0.0f;
        float height = 0.0f;
    };  
int CharFromUtf8(unsigned int* out_char, const char* in_text, const char* in_text_end)
    {
        unsigned int c = (unsigned int)-1;
        const unsigned char* str = (const unsigned char*)in_text;
        if (!(*str & 0x80)) {
            c = (unsigned int)(*str++);
            *out_char = c;
            return 1;
        }
        if ((*str & 0xe0) == 0xc0) {
            *out_char = 0xFFFD;
            if (in_text_end && in_text_end - (const char*)str < 2) return 1;
            if (*str < 0xc2) return 2;
            c = (unsigned int)((*str++ & 0x1f) << 6);
            if ((*str & 0xc0) != 0x80) return 2;
            c += (*str++ & 0x3f);
            *out_char = c;
            return 2;
        }
        if ((*str & 0xf0) == 0xe0) {
            *out_char = 0xFFFD;
            if (in_text_end && in_text_end - (const char*)str < 3) return 1;
            if (*str == 0xe0 && (str[1] < 0xa0 || str[1] > 0xbf)) return 3;
            if (*str == 0xed && str[1] > 0x9f) return 3;
            c = (unsigned int)((*str++ & 0x0f) << 12);
            if ((*str & 0xc0) != 0x80) return 3;
            c += (unsigned int)((*str++ & 0x3f) << 6);
            if ((*str & 0xc0) != 0x80) return 3;
            c += (*str++ & 0x3f);
            *out_char = c;
            return 3;
        }
        if ((*str & 0xf8) == 0xf0) {
            *out_char = 0xFFFD;
            if (in_text_end && in_text_end - (const char*)str < 4) return 1;
            if (*str > 0xf4) return 4;
            if (*str == 0xf0 && (str[1] < 0x90 || str[1] > 0xbf)) return 4;
            if (*str == 0xf4 && str[1] > 0x8f) return 4; 
            c = (unsigned int)((*str++ & 0x07) << 18);
            if ((*str & 0xc0) != 0x80) return 4;
            c += (unsigned int)((*str++ & 0x3f) << 12);
            if ((*str & 0xc0) != 0x80) return 4;
            c += (unsigned int)((*str++ & 0x3f) << 6);
            if ((*str & 0xc0) != 0x80) return 4;
            c += (*str++ & 0x3f);
            if ((c & 0xFFFFF800) == 0xD800) return 4;
            *out_char = c;
            return 4;
        }
        *out_char = 0;
        return 0;
    }

    bool TextureFont::Initialize()
    {
        FT_Size_Metrics metrics;

        if (!InitFreeType(fontSize * 100.0f)) {
            return false;
        }

        metrics = face->size->metrics;
        ascender = (metrics.ascender >> 6) / 100.0f;
        descender = (metrics.descender >> 6) / 100.0f;
        height = (metrics.height >> 6) / 100.0f;

        FT_Done_Face(face);
        FT_Done_FreeType(library);

        return true;
    }

    int TextureFont::InitFreeType(float size)
    {
        FT_Matrix matrix = {
            static_cast<int>((1.0 / HIGHRES) * 0x10000L),
            static_cast<int>((0.0)           * 0x10000L),
            static_cast<int>((0.0)           * 0x10000L),
            static_cast<int>((1.0)           * 0x10000L)};
        FT_Error error;
        error = FT_Init_FreeType(&library);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not Init FreeType!\n");
            FT_Done_FreeType(library);
            return 0;
        }

        error = FT_New_Face(library, filename, 0, &face);

        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not create a new face!\n");
            FT_Done_FreeType(library);
            return 0;
        }

        error = FT_Select_Charmap(face, FT_ENCODING_UNICODE);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not select charmap!\n");
            FT_Done_Face(face);
            return 0;
        }

        error = FT_Set_Char_Size(face, static_cast<ulong>(size * HIGHRES), 0, DPI * HIGHRES, DPI);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not set char size!\n");
            FT_Done_Face(face);
            return 0;
        }

        FT_Set_Transform(face, &matrix, NULL);

        return 1;
    }

    void TextureFont::LoadFromFile(const std::string& filePath, float fontSize)
    {
        atlas.Create(512, 1);
        std::fill(atlas.buffer.begin(), atlas.buffer.end(), 0);
        this->fontSize = fontSize;  
        this->filename = strdup(filePath.c_str());

        Initialize();
    }

    Glyph* TextureFont::getGlyph(const char8_t* codepoint)
    {
        if (Glyph* glyph = FindGlyph(codepoint)) {
            return glyph;
        }

        if (LoadGlyph(codepoint)) {
            return FindGlyph(codepoint);
        }

        return nullptr;
    }

    Glyph* TextureFont::FindGlyph(const char8_t* codepoint)
    {
        Glyph* glyph = nullptr;
        uint32 ucodepoint;
        CharFromUtf8(&ucodepoint, (char*)codepoint, NULL);
        for (uint32 i = 0; i < glyphs.size(); ++i) {
            glyph = &glyphs[i];
            if (glyph->codepoint == ucodepoint) {
                return glyph;
            }
        }

        return nullptr;
    }

    uint32 TextureFont::LoadGlyph(const char8_t* codepoint)
    {
        FT_Error error = NULL;
        FT_Glyph ftGlyph = nullptr;
        FT_GlyphSlot slot = nullptr;
        FT_Bitmap bitmap;

        if (!InitFreeType(fontSize)) {
            return 0;
        }

        if (FindGlyph(codepoint)) {
            FT_Done_Face(face);
            FT_Done_FreeType(library);
            return 1;
        }

        unsigned int cp;
        CharFromUtf8(&cp, (char*)codepoint, NULL);
        uint32 glyphIndex = FT_Get_Char_Index(face, cp);

        int flag = 0;
        flag |= FT_LOAD_RENDER;
        flag |= FT_LOAD_FORCE_AUTOHINT;

        error = FT_Load_Glyph(face, glyphIndex, flag);
        if (error) {
            EngineLogError("FREE_TYPE_ERROR: Could not load the glyph (line {})!\n", __LINE__);
            FT_Done_Face(face);
            FT_Done_FreeType(library);
            return 0;
        }

        slot = face->glyph;
        bitmap = slot->bitmap;
        int glyphTop = slot->bitmap_top;
        int glyphLeft = slot->bitmap_left;

        uint32 srcWidth = bitmap.width / atlas.bytesPerPixel;
        uint32 srcHeight = bitmap.rows;

        uint32 tgtWidth = srcWidth;
        uint32 tgtHeight = srcHeight;

        auto buffer = std::make_unique<uchar[]>(tgtWidth * tgtHeight * atlas.bytesPerPixel);

        uchar* destPointer = buffer.get();
        uchar* srcPointer = bitmap.buffer;

        for (uint32 i = 0; i < srcHeight; ++i) {
            memcpy(destPointer, srcPointer, bitmap.width);
            destPointer += tgtWidth * atlas.bytesPerPixel;
            srcPointer += bitmap.pitch;
        }

        auto origin = atlas.PackTexture(buffer.get(), { tgtWidth, tgtHeight });

        float x = origin.x;
        float y = origin.y;

        Glyph current;
        current.codepoint = cp;
        current.width = tgtWidth;
        current.height = tgtHeight;
        current.bearing.x = glyphLeft;
        current.bearing.y = glyphTop;
        current.s0 = x / (float)atlas.textureSize.w;
        current.t0 = y / (float)atlas.textureSize.h;
        current.s1 = (x + tgtWidth) / (float)atlas.textureSize.w;
        current.t1 = (y + tgtHeight) / (float)atlas.textureSize.h;

        current.advance.x = slot->advance.x / (float)HIGHRES;
        current.advance.y = slot->advance.y / (float)HIGHRES;

        glyphs.push_back(current);

        FT_Done_Glyph(ftGlyph);
        FT_Done_Face(face);
        FT_Done_FreeType(library);

        return 1;
    } 

to render a string (a single char in this case) I loop through string size, get a glyph, update the atlas and setup render data.

text is a simple quad with a texture on with proper uvs. I do not think that it is necessary to explain what is inside AddVertexData, because it do not cause a problem.

void DrawString(const std::u8string& string, float x, float y)
    {
        for (const auto& c : string) {
            auto glyph = textureFont.getGlyph(&c);

            auto& t = *(Texture2D*)texture.get();
            t.UpdateData(textureFont.atlas.buffer.data());

            float x0 = x + static_cast<float>(glyph->bearing.x);
            float y0 = y + (textureFont.ascender + textureFont.descender - static_cast<float>(glyph->bearing.y));
            float x1 = x0 + static_cast<float>(glyph->width);
            float y1 = y0 + static_cast<float>(glyph->height);

            float u0 = glyph->s0;
            float v0 = glyph->t0;
            float u1 = glyph->s1;
            float v1 = glyph->t1;

            //            position                uv                      color
            AddVertexData(Vector2<float>(x0, y0), Vector2<float>(u0, v0), 0xff0000ff);
            AddVertexData(Vector2<float>(x0, y1), Vector2<float>(u0, v1), 0xff0000ff);
            AddVertexData(Vector2<float>(x1, y1), Vector2<float>(u1, v1), 0xff0000ff);
            AddVertexData(Vector2<float>(x1, y0), Vector2<float>(u1, v0), 0xff0000ff);

            // indices for DrawElements() call
            // 0, 1, 2, 2, 3, 0
            AddRectElements();

            x += glyph->advance.x;
        }
    }

ę is utf-8 size == 2, so the loop runs twice, but render only 1 character and does not know the second character (because there is not any second character), so it renders the empty quad.

How to get rid of the quad that follows the character I want to render?

4
Using const auto& c : string to access several characters is surprising.Jarod42
Note: there are two problems: Unicode codepoints requires 1 to 4 bytes in UTF-8, but for rendering one glyph/character you may have to 8 codepoints (or more) (modifiers), and more characters could be merged in a ligature by the fonts (so true displayed glyphs). [a cursive font uses many ligatures ("by definition of cursive")]. This is "the reference" on best practices: unicode.org/reports/tr29/tr29-35.htmlGiacomo Catenazzi

4 Answers

2
votes

In your DrawString function you have the loop

for (const auto& c : string)

That loop will iterate byte by byte over the string. So if the string contains the two-byte "ę" character, then the first iteration will get the first byte, and the second iteration will get the second byte.

You can't use a range-based for loop here, since you need to skip bytes in the string. Either use an iterator-bases loop or an indexed-based loop.

For example

for (size_t i = 0; i < string.size(); /* nothing */) {
    // Here you need to get the number of bytes for the current character
    // Then you should increment the index by that amount
    i += byte_count_for_current_character;

    // ... rest of code
}
1
votes

Your issue is in DrawString with for (const auto& c : string)

You should skip extra characters used to encode previous glyph, those matching with 0b10......:

for (const auto& c : string) {
    if ((c & 0b1100'0000) == 0b1000'0000) {
        continue;
    }
// ...
}

or advance to the number of byte read by last glyph.

1
votes

Both calls to your actual UTF-8 decoding function CharFromUtf8 ignore its return value, which is the number of bytes the string pointer should be advanced. Instead of the for (const auto& c : string) you should have a pointer which you advance by the the return value on each iteration.

Additionally, since you will be using the CharFromUtf8 function inside that loop, you will know both the Unicode codepoint and the number of bytes to advance. Then you can refactor your TextureFont to take unsigned int (i.e. codepoints) as arguments rather than letting it do the UTF-8 decoding. This would be a better separation of concerns.

1
votes

Other answers have already identified the problem with using a range-based-for loop directly with a std::u8string variable. Assuming that code point based enumeration is what you want (it probably isn't since, in general, correct glyph selection depends on surrounding code points; you probably want to iterate over extended grapheme clusters), you can use a library like text_view to provide range-based-for support for iteration of code points. That loop wound then look like:

auto tv = make_text_view<utf8_encoding>(string);
for (const auto& cp : tv) {
  ...
}