Fix bug when splitting text containing CJK chars

In Segment.Split, we didn't take cell width into account when calculating the offset, which resulted in some "fun" bugs. I've added a new overload for Segment.Split and obsoleted the old one. Closes #150
2026-02-02 10:59:58 +08:00 · 2020-12-16 23:30:03 +01:00
parent ee305702e8
commit 6932c95731
5 changed files with 125 additions and 29 deletions
--- a/src/Spectre.Console.Tests/Expectations/PanelTests.Should_Wrap_Table_With_CJK_Tables_In_Panel_Correctly.verified.txt
+++ b/src/Spectre.Console.Tests/Expectations/PanelTests.Should_Wrap_Table_With_CJK_Tables_In_Panel_Correctly.verified.txt
@@ -0,0 +1,7 @@
+┌──────────┐
+│ ┌──────┐ │
+│ │ 测试 │ │
+│ ├──────┤ │
+│ │ 测试 │ │
+│ └──────┘ │
+└──────────┘
--- a/src/Spectre.Console.Tests/Unit/PanelTests.cs
+++ b/src/Spectre.Console.Tests/Unit/PanelTests.cs
@@ -1,5 +1,7 @@
 using System.Collections.Generic;
+using System.Text;
 using System.Threading.Tasks;
+using Shouldly;
 using Spectre.Console.Rendering;
 using VerifyXunit;
 using Xunit;
@@ -267,5 +269,23 @@ namespace Spectre.Console.Tests.Unit
            // Then
            return Verifier.Verify(console.Output);
        }
+
+        [Fact]
+        public Task Should_Wrap_Table_With_CJK_Tables_In_Panel_Correctly()
+        {
+            // Given
+            var console = new PlainConsole(width: 80);
+
+            var table = new Table();
+            table.AddColumn("测试");
+            table.AddRow("测试");
+            var panel = new Panel(table);
+
+            // When
+            console.Render(panel);
+
+            // Then
+            return Verifier.Verify(console.Output);
+        }
    }
 }
--- a/src/Spectre.Console.Tests/Unit/SegmentTests.cs
+++ b/src/Spectre.Console.Tests/Unit/SegmentTests.cs
@@ -22,18 +22,43 @@ namespace Spectre.Console.Tests.Unit
        [UsesVerify]
        public sealed class TheSplitMethod
        {
-            [Fact]
-            public Task Should_Split_Segment_Correctly()
+            [Theory]
+            [InlineData("Foo Bar", 0, "", "Foo Bar")]
+            [InlineData("Foo Bar", 1, "F", "oo Bar")]
+            [InlineData("Foo Bar", 2, "Fo", "o Bar")]
+            [InlineData("Foo Bar", 3, "Foo", " Bar")]
+            [InlineData("Foo Bar", 4, "Foo ", "Bar")]
+            [InlineData("Foo Bar", 5, "Foo B", "ar")]
+            [InlineData("Foo Bar", 6, "Foo Ba", "r")]
+            [InlineData("Foo Bar", 7, "Foo Bar", null)]
+            [InlineData("Foo 测试 Bar", 0, "", "Foo 测试 Bar")]
+            [InlineData("Foo 测试 Bar", 1, "F", "oo 测试 Bar")]
+            [InlineData("Foo 测试 Bar", 2, "Fo", "o 测试 Bar")]
+            [InlineData("Foo 测试 Bar", 3, "Foo", " 测试 Bar")]
+            [InlineData("Foo 测试 Bar", 4, "Foo ", "测试 Bar")]
+            [InlineData("Foo 测试 Bar", 5, "Foo 测", "试 Bar")]
+            [InlineData("Foo 测试 Bar", 6, "Foo 测", "试 Bar")]
+            [InlineData("Foo 测试 Bar", 7, "Foo 测试", " Bar")]
+            [InlineData("Foo 测试 Bar", 8, "Foo 测试", " Bar")]
+            [InlineData("Foo 测试 Bar", 9, "Foo 测试 ", "Bar")]
+            [InlineData("Foo 测试 Bar", 10, "Foo 测试 B", "ar")]
+            [InlineData("Foo 测试 Bar", 11, "Foo 测试 Ba", "r")]
+            [InlineData("Foo 测试 Bar", 12, "Foo 测试 Bar", null)]
+            public void Should_Split_Segment_Correctly(string text, int offset, string expectedFirst, string expectedSecond)
            {
                // Given
                var style = new Style(Color.Red, Color.Green, Decoration.Bold);
-                var segment = new Segment("Foo Bar", style);
+                var context = new RenderContext(Encoding.UTF8, false);
+                var segment = new Segment(text, style);

                // When
-                var result = segment.Split(3);
+                var (first, second) = segment.Split(context, offset);

                // Then
-                return Verifier.Verify(result);
+                first.Text.ShouldBe(expectedFirst);
+                first.Style.ShouldBe(style);
+                second?.Text?.ShouldBe(expectedSecond);
+                second?.Style?.ShouldBe(style);
            }
        }

--- a/src/Spectre.Console/Internal/Text/Cell.cs
+++ b/src/Spectre.Console/Internal/Text/Cell.cs
@@ -8,32 +8,34 @@ namespace Spectre.Console.Internal
    {
        public static int GetCellLength(RenderContext context, string text)
        {
-            return text.Sum(rune =>
-            {
-                if (context.LegacyConsole)
-                {
-                    // Is it represented by a single byte?
-                    // In that case we don't have to calculate the
-                    // actual cell width.
-                    if (context.Encoding.GetByteCount(new[] { rune }) == 1)
-                    {
-                        return 1;
-                    }
-                }
+            return text.Sum(rune => GetCellLength(context, rune));
+        }

-                // TODO: We need to figure out why Segment.SplitLines fails
-                // if we let wcwidth (which returns -1 instead of 1)
-                // calculate the size for new line characters.
-                // That is correct from a Unicode perspective, but the
-                // algorithm was written before wcwidth was added and used
-                // to work with string length and not cell length.
-                if (rune == '\n')
+        public static int GetCellLength(RenderContext context, char rune)
+        {
+            if (context.LegacyConsole)
+            {
+                // Is it represented by a single byte?
+                // In that case we don't have to calculate the
+                // actual cell width.
+                if (context.Encoding.GetByteCount(new[] { rune }) == 1)
                {
                    return 1;
                }
+            }

-                return UnicodeCalculator.GetWidth(rune);
-            });
+            // TODO: We need to figure out why Segment.SplitLines fails
+            // if we let wcwidth (which returns -1 instead of 1)
+            // calculate the size for new line characters.
+            // That is correct from a Unicode perspective, but the
+            // algorithm was written before wcwidth was added and used
+            // to work with string length and not cell length.
+            if (rune == '\n')
+            {
+                return 1;
+            }
+
+            return UnicodeCalculator.GetWidth(rune);
        }
    }
 }
--- a/src/Spectre.Console/Rendering/Segment.cs
+++ b/src/Spectre.Console/Rendering/Segment.cs
@@ -3,6 +3,7 @@ using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;
 using System.Text;
+using Spectre.Console.Internal;

 namespace Spectre.Console.Rendering
 {
@@ -145,6 +146,7 @@ namespace Spectre.Console.Rendering
        /// </summary>
        /// <param name="offset">The offset where to split the segment.</param>
        /// <returns>One or two new segments representing the split.</returns>
+        [Obsolete("Use Split(RenderContext, Int32) instead")]
        public (Segment First, Segment? Second) Split(int offset)
        {
            if (offset < 0)
@@ -162,6 +164,44 @@ namespace Spectre.Console.Rendering
                new Segment(Text.Substring(offset, Text.Length - offset), Style));
        }

+        /// <summary>
+        /// Splits the segment at the offset.
+        /// </summary>
+        /// <param name="context">The render context.</param>
+        /// <param name="offset">The offset where to split the segment.</param>
+        /// <returns>One or two new segments representing the split.</returns>
+        public (Segment First, Segment? Second) Split(RenderContext context, int offset)
+        {
+            if (offset < 0)
+            {
+                return (this, null);
+            }
+
+            if (offset >= CellCount(context))
+            {
+                return (this, null);
+            }
+
+            var index = 0;
+            if (offset > 0)
+            {
+                var accumulated = 0;
+                foreach (var character in Text)
+                {
+                    index++;
+                    accumulated += Cell.GetCellLength(context, character);
+                    if (accumulated >= offset)
+                    {
+                        break;
+                    }
+                }
+            }
+
+            return (
+                new Segment(Text.Substring(0, index), Style),
+                new Segment(Text.Substring(index, Text.Length - index), Style));
+        }
+
        /// <summary>
        /// Clones the segment.
        /// </summary>
@@ -219,14 +259,16 @@ namespace Spectre.Console.Rendering
            while (stack.Count > 0)
            {
                var segment = stack.Pop();
+                var segmentLength = segment.CellCount(context);

                // Does this segment make the line exceed the max width?
-                if (line.CellCount(context) + segment.CellCount(context) > maxWidth)
+                var lineLength = line.CellCount(context);
+                if (lineLength + segmentLength > maxWidth)
                {
-                    var diff = -(maxWidth - (line.Length + segment.Text.Length));
+                    var diff = -(maxWidth - (lineLength + segmentLength));
                    var offset = segment.Text.Length - diff;

-                    var (first, second) = segment.Split(offset);
+                    var (first, second) = segment.Split(context, offset);

                    line.Add(first);
                    lines.Add(line);