Skip to content

Commit 37a29d9

Browse files
authored
Add logical type annotation for UnknownType (#3154)
1 parent 05dc538 commit 37a29d9

File tree

4 files changed

+68
-2
lines changed

4 files changed

+68
-2
lines changed

parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java

+41
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,12 @@ protected LogicalTypeAnnotation fromString(List<String> params) {
146146
protected LogicalTypeAnnotation fromString(List<String> params) {
147147
return float16Type();
148148
}
149+
},
150+
UNKNOWN {
151+
@Override
152+
protected LogicalTypeAnnotation fromString(List<String> params) {
153+
return unknownType();
154+
}
149155
};
150156

151157
protected abstract LogicalTypeAnnotation fromString(List<String> params);
@@ -316,6 +322,10 @@ public static Float16LogicalTypeAnnotation float16Type() {
316322
return Float16LogicalTypeAnnotation.INSTANCE;
317323
}
318324

325+
public static UnknownLogicalTypeAnnotation unknownType() {
326+
return UnknownLogicalTypeAnnotation.INSTANCE;
327+
}
328+
319329
public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation {
320330
private static final StringLogicalTypeAnnotation INSTANCE = new StringLogicalTypeAnnotation();
321331

@@ -989,6 +999,33 @@ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
989999
}
9901000
}
9911001

1002+
public static class UnknownLogicalTypeAnnotation extends LogicalTypeAnnotation {
1003+
private static final UnknownLogicalTypeAnnotation INSTANCE = new UnknownLogicalTypeAnnotation();
1004+
1005+
private UnknownLogicalTypeAnnotation() {}
1006+
1007+
@Override
1008+
public OriginalType toOriginalType() {
1009+
// No OriginalType for UknownType
1010+
return null;
1011+
}
1012+
1013+
@Override
1014+
public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T> logicalTypeAnnotationVisitor) {
1015+
return logicalTypeAnnotationVisitor.visit(this);
1016+
}
1017+
1018+
@Override
1019+
LogicalTypeToken getType() {
1020+
return LogicalTypeToken.UNKNOWN;
1021+
}
1022+
1023+
@Override
1024+
PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
1025+
return PrimitiveStringifier.UNKNOWN_STRINGIFIER;
1026+
}
1027+
}
1028+
9921029
// This logical type annotation is implemented to support backward compatibility with ConvertedType.
9931030
// The new logical type representation in parquet-format doesn't have any interval type,
9941031
// thus this annotation is mapped to UNKNOWN.
@@ -1162,5 +1199,9 @@ default Optional<T> visit(MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
11621199
default Optional<T> visit(Float16LogicalTypeAnnotation float16LogicalType) {
11631200
return empty();
11641201
}
1202+
1203+
default Optional<T> visit(UnknownLogicalTypeAnnotation unknownLogicalTypeAnnotation) {
1204+
return empty();
1205+
}
11651206
}
11661207
}

parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java

+7
Original file line numberDiff line numberDiff line change
@@ -449,4 +449,11 @@ String stringifyNotNull(Binary value) {
449449
return Float16.toFloatString(value);
450450
}
451451
};
452+
453+
static final PrimitiveStringifier UNKNOWN_STRINGIFIER = new PrimitiveStringifier("UNKNOWN_STRINGIFIER") {
454+
455+
public String stringify(Binary ignored) {
456+
return "UNKNOWN";
457+
}
458+
};
452459
}

parquet-column/src/main/java/org/apache/parquet/schema/Types.java

+6
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,12 @@ public Optional<Boolean> visit(
472472
LogicalTypeAnnotation.Float16LogicalTypeAnnotation.BYTES, float16LogicalType);
473473
}
474474

475+
@Override
476+
public Optional<Boolean> visit(
477+
LogicalTypeAnnotation.UnknownLogicalTypeAnnotation unknownLogicalType) {
478+
return Optional.of(true);
479+
}
480+
475481
@Override
476482
public Optional<Boolean> visit(
477483
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {

parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java

+14-2
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,11 @@ public Optional<LogicalType> visit(LogicalTypeAnnotation.Float16LogicalTypeAnnot
515515
return of(LogicalType.FLOAT16(new Float16Type()));
516516
}
517517

518+
@Override
519+
public Optional<LogicalType> visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation intervalLogicalType) {
520+
return of(LogicalType.UNKNOWN(new NullType()));
521+
}
522+
518523
@Override
519524
public Optional<LogicalType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
520525
return of(LogicalType.UNKNOWN(new NullType()));
@@ -894,7 +899,8 @@ enum SortOrder {
894899
LogicalTypeAnnotation.StringLogicalTypeAnnotation.class,
895900
LogicalTypeAnnotation.EnumLogicalTypeAnnotation.class,
896901
LogicalTypeAnnotation.JsonLogicalTypeAnnotation.class,
897-
LogicalTypeAnnotation.Float16LogicalTypeAnnotation.class)));
902+
LogicalTypeAnnotation.Float16LogicalTypeAnnotation.class,
903+
LogicalTypeAnnotation.UnknownLogicalTypeAnnotation.class)));
898904

899905
/**
900906
* Returns whether to use signed order min and max with a type. It is safe to
@@ -997,6 +1003,12 @@ public Optional<SortOrder> visit(
9971003
return of(SortOrder.SIGNED);
9981004
}
9991005

1006+
@Override
1007+
public Optional<SortOrder> visit(
1008+
LogicalTypeAnnotation.UnknownLogicalTypeAnnotation unknownLogicalTypeAnnotation) {
1009+
return of(SortOrder.UNKNOWN);
1010+
}
1011+
10001012
@Override
10011013
public Optional<SortOrder> visit(
10021014
LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
@@ -1167,7 +1179,7 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(LogicalType type) {
11671179
IntType integer = type.getINTEGER();
11681180
return LogicalTypeAnnotation.intType(integer.bitWidth, integer.isSigned);
11691181
case UNKNOWN:
1170-
return null;
1182+
return LogicalTypeAnnotation.unknownType();
11711183
case TIMESTAMP:
11721184
TimestampType timestamp = type.getTIMESTAMP();
11731185
return LogicalTypeAnnotation.timestampType(timestamp.isAdjustedToUTC, convertTimeUnit(timestamp.unit));

0 commit comments

Comments
 (0)