Skip to content

Commit 671f8ed

Browse files
stackunderflow111stack_underflow
and
stack_underflow
authored
fix the surrogate utf8 feature when custom characterEscapes is used (#1399)
Co-authored-by: stack_underflow <[email protected]>
1 parent 0aa97a7 commit 671f8ed

File tree

4 files changed

+47
-0
lines changed

4 files changed

+47
-0
lines changed

release-notes/CREDITS-2.x

+6
Original file line numberDiff line numberDiff line change
@@ -461,3 +461,9 @@ Justin Gosselin (@jgosselin-accesso)
461461
* Reported #1359: Non-surrogate characters being incorrectly combined when
462462
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
463463
(2.18.2)
464+
465+
Haruki (@stackunderflow111)
466+
* Reported #1398: feature COMBINE_UNICODE_SURROGATES_IN_UTF8 doesn't work
467+
when custom characterEscape is used
468+
(2.18.2)
469+

release-notes/VERSION-2.x

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ a pure JSON library.
2424
(reported by @Rodenstock)
2525
(fix contributed by @pjfanning)
2626

27+
#1398: Fix issue that feature COMBINE_UNICODE_SURROGATES_IN_UTF8 doesn't work
28+
when custom characterEscape is used
29+
(reported and fixed by @stackunderflow111)
30+
2731
2.18.2 (27-Nov-2024)
2832

2933
#1359: Non-surrogate characters being incorrectly combined when

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

+20
Original file line numberDiff line numberDiff line change
@@ -1732,6 +1732,16 @@ private final void _writeCustomStringSegment2(final char[] cbuf, int offset, fin
17321732
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
17331733
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
17341734
} else {
1735+
// 3- or 4-byte character
1736+
if (_isStartOfSurrogatePair(ch)) {
1737+
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
1738+
if (combineSurrogates && offset < end) {
1739+
char highSurrogate = (char) ch;
1740+
char lowSurrogate = cbuf[offset++];
1741+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1742+
continue;
1743+
}
1744+
}
17351745
outputPtr = _outputMultiByteChar(ch, outputPtr);
17361746
}
17371747
}
@@ -1789,6 +1799,16 @@ private final void _writeCustomStringSegment2(final String text, int offset, fin
17891799
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
17901800
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
17911801
} else {
1802+
// 3- or 4-byte character
1803+
if (_isStartOfSurrogatePair(ch)) {
1804+
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
1805+
if (combineSurrogates && offset < end) {
1806+
char highSurrogate = (char) ch;
1807+
char lowSurrogate = text.charAt(offset++);
1808+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1809+
continue;
1810+
}
1811+
}
17921812
outputPtr = _outputMultiByteChar(ch, outputPtr);
17931813
}
17941814
}

src/test/java/com/fasterxml/jackson/core/write/SurrogateWrite223Test.java

+17
Original file line numberDiff line numberDiff line change
@@ -123,4 +123,21 @@ void checkNonSurrogates() throws Exception {
123123
assertTrue(json.contains("foo\u3042bar"));
124124
assertTrue(json.contains("\"test_emoji\":\"\uD83D\uDE0A\""));
125125
}
126+
127+
@Test
128+
void checkSurrogateWithCharacterEscapes() throws Exception {
129+
JsonFactory f = JsonFactory.builder()
130+
.enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
131+
.build();
132+
f.setCharacterEscapes(JsonpCharacterEscapes.instance());
133+
ByteArrayOutputStream out = new ByteArrayOutputStream();
134+
try (JsonGenerator gen = f.createGenerator(out)) {
135+
gen.writeStartObject();
136+
// Outside the BMP; 0x1F60A - emoji
137+
gen.writeStringField("test_emoji", new String(Character.toChars(0x1F60A)));
138+
gen.writeEndObject();
139+
}
140+
String json = out.toString("UTF-8");
141+
assertEquals("{\"test_emoji\":\"\uD83D\uDE0A\"}", json);
142+
}
126143
}

0 commit comments

Comments
 (0)