asn1: Fix long-standing IMPLICIT tagging brokenness
This commit _mostly_ fixes the Heimdal ASN.1 compiler to properly support IMPLICIT tagging in most if not all the many cases where it didn't already, as you could see in lib/asn1/canthandle.asn1 prior to this commit. This fix is a bit of a hack in that a proper fix would change the function prototypes of the encode/decode/length functions generated by the compiler to take an optional IMPLICIT tag to tag with instead of the type they code. That fix would not be localized to lib/asn1/ however, and would change the API and ABI of generated code (which is mostly not an ABI for Heimdal, but still, some external projects would have to make changes). Instead, for IMPLICIT tags we currently depend on the IMPLICIT tag and the sub-type's tag having the same size -- this can be fixed with extra allocation on the encoder side as we do on the decoder side, but we might leave it for later. The issue we're fixing manifested as: -- The [CONTEXT 0] tag in Bar below was turned into an EXPLICIT tag -- instead of an IMPLICIT one, netting the DER encoding for the `foo` -- member as: -- [CONTEXT 0] [UNIVERSAL Seq] [UNIVERSAL Int] <encoding of i> -- instead of the correct: -- [CONTEXT 0] [UNIVERSAL Int] <encoding of i> Foo ::= SEQUENCE { i INTEGER } Bar ::= SEQUENCE { foo [0] IMPLICIT Foo } or Foo ::= INTEGER Bar ::= SEQUENCE { foo [0] IMPLICIT Foo } -- tag context 0 marked -- constructed! I've reviewed this in part by reviewing the output of the compiler before and after this change using this procedure: - Run an earlier version of the ASN.1 compiler output for all modules in lib/asn1/. Save these in a different location. - Run this (or later) version of the ASN.1 compiler output for the same modules, adding --original-order for modules that have been manually sorted already (e.g., rfc2459.asn1). - Run clang-format on the saved and newest generated C source and header files. - Diff the generated output. Substantial differences will relate to handling of IMPLICIT tagging. These are particularly evident in the tcg.asn1 module, which uses a lot of those. Later commits add test data (certificates with extensions that use IMPLICIT tagging) taken from external specifications as well, which exercise this fix. Non-urgent brokenness yet to be fixed: - When the IMPLICIT tag and the tag of the underlying type require differing numbers of bytes to encode, the encoding and decoding will fail. The prototypes of generated length_*() functions make it impossible to do much better. - SET OF <primitive> still crashes the compiler (not a new bug). Futures: - Unwind hackery in cms.asn1 that worked around our lack of proper IMPLICIT tagging support. Here are some of the generated code deltas one expects to see around this commit: $ git checkout $earlier_version $ ./autogen.sh $ mkdir build $ cd build $ ../configure ... $ make -j4 $ make check $ cd lib/asn1 $ for i in *.c; do [[ $i = asn1parse.? || $i = lex.? || $i = *.h ]] && continue clang-format -i $i $i cmp /tmp/save/$i $i && echo NO DIFFS: $i && continue; echo DIFF: $i done NO DIFFS: asn1_cms_asn1.c NO DIFFS: asn1_digest_asn1.c NO DIFFS: asn1_err.c NO DIFFS: asn1_krb5_asn1.c /tmp/save/asn1_kx509_asn1.c asn1_kx509_asn1.c differ: byte 6433, line 264 DIFF: asn1_kx509_asn1.c NO DIFFS: asn1_ocsp_asn1.c NO DIFFS: asn1_pkcs10_asn1.c /tmp/save/asn1_pkcs12_asn1.c asn1_pkcs12_asn1.c differ: byte 12934, line 455 DIFF: asn1_pkcs12_asn1.c NO DIFFS: asn1_pkcs8_asn1.c NO DIFFS: asn1_pkcs9_asn1.c NO DIFFS: asn1_pkinit_asn1.c /tmp/save/asn1_rfc2459_asn1.c asn1_rfc2459_asn1.c differ: byte 20193, line 532 DIFF: asn1_rfc2459_asn1.c NO DIFFS: asn1_rfc4043_asn1.c /tmp/save/asn1_rfc4108_asn1.c asn1_rfc4108_asn1.c differ: byte 595, line 26 DIFF: asn1_rfc4108_asn1.c /tmp/save/asn1_tcg_asn1.c asn1_tcg_asn1.c differ: byte 31835, line 1229 DIFF: asn1_tcg_asn1.c /tmp/save/asn1_test_asn1.c asn1_test_asn1.c differ: byte 384, line 21 DIFF: asn1_test_asn1.c /tmp/save/test_template_asn1-template.c test_template_asn1-template.c differ: byte 650, line 20 DIFF: test_template_asn1-template.c $ $ cd ../.. $ git checkout $newer_version $ make -j4 && make check $ cd lib/asn1 $ for i in *.[ch]; do [[ $i = asn1parse.? || $i = lex.? || $i = *.h ]] && continue clang-format -i $i $i cmp /tmp/save/$i $i && echo NO DIFFS: $i && continue diff -ubw /tmp/save/$i $i done | $PAGER and one should see deltas such as the following: - a small enhancement to handling of OPTIONAL members: (data)->macData = calloc(1, sizeof(*(data)->macData)); if ((data)->macData == NULL) goto fail; e = decode_PKCS12_MacData(p, len, (data)->macData, &l); - if (e) { + if (e == ASN1_MISSING_FIELD) { free((data)->macData); (data)->macData = NULL; + } else if (e) { + goto fail; } else { p += l; len -= l; ret += l; - more complete handling of DEFAULTed members: e = decode_FWReceiptVersion(p, len, &(data)->version, &l); - if (e) + if (e == ASN1_MISSING_FIELD) { + (data)->version = 1; + } else if (e) { goto fail; - p += l; - len -= l; - ret += l; + } else { + p += l; + len -= l; + ret += l; + } { - replacement of tags with implicit tags (encode side): /* targetUri */ if ((data)->targetUri) { size_t Top_tag_oldret HEIMDAL_UNUSED_ATTRIBUTE = ret; ret = 0; e = encode_URIReference(p, len, (data)->targetUri, &l); if (e) return e; p -= l; len -= l; ret += l; - e = der_put_length_and_tag(p, len, ret, ASN1_C_CONTEXT, PRIM, 4, &l); + e = der_replace_tag(p, len, ASN1_C_CONTEXT, CONS, 4); if (e) return e; p -= l; len -= l; ret += l; ret += Top_tag_oldret; } - replacement of tags with implicit tags (decode side): strengthOfFunction_oldlen = len; if (strengthOfFunction_datalen > len) { e = ASN1_OVERRUN; goto fail; } len = strengthOfFunction_datalen; - e = decode_StrengthOfFunction(p, len, (data)->strengthOfFunction, &l); - if (e) - goto fail; - p += l; - len -= l; - ret += l; + { + unsigned char *pcopy; + pcopy = calloc(1, len); + if (pcopy == 0) { + e = ENOMEM; + goto fail; + } + memcpy(pcopy, p, len); + e = der_replace_tag(pcopy, len, ASN1_C_UNIV, PRIM, 0); + if (e) + goto fail; + e = decode_StrengthOfFunction(p, len, (data)->strengthOfFunction, &l); + if (e) + goto fail; + p += l; + len -= l; + ret += l; + free(pcopy); + } len = strengthOfFunction_oldlen - strengthOfFunction_datalen; } } { size_t profileOid_datalen, profileOid_oldlen; - correct determination of implicit tag constructed vs no for IMPLICT- tagged named primitive types: { size_t profileUri_datalen, profileUri_oldlen; Der_type profileUri_type; e = der_match_tag_and_length(p, len, ASN1_C_CONTEXT, &profileUri_type, 2, &profileUri_datalen, &l); - if (e == 0 && profileUri_type != PRIM) { + if (e == 0 && profileUri_type != CONS) { e = ASN1_BAD_ID; } if (e) { (data)->profileUri = NULL; } else { (data)->profileUri = calloc(1, sizeof(*(data)->profileUri)); if ((data)->profileUri == NULL) { e = ENOMEM; goto fail; } - correct determination of length of IMPLICT-tagged OIDs: if ((data)->profileOid) { size_t Top_tag_oldret = ret; ret = 0; ret += der_length_oid((data)->profileOid); + ret += 1 + der_length_len(ret); ret += Top_tag_oldret; } These deltas should be examined with the corresponding ASN.1 module at hand, cross-referencing the source code to the ASN.1 type definitions and manually applying X.690 rules to double-check the choices of primitive vs. constructed tag, and the choices of when to replace tags and when not.
This commit is contained in:
@@ -116,6 +116,7 @@ encode_type (const char *name, const Type *t, const char *tmpstr)
|
||||
"e = encode_%s(p, len, %s, &l);\n"
|
||||
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
|
||||
t->symbol->gen_name, name);
|
||||
constructed = !is_primitive_type(t);
|
||||
break;
|
||||
case TInteger:
|
||||
if(t->members) {
|
||||
@@ -387,17 +388,89 @@ encode_type (const char *name, const Type *t, const char *tmpstr)
|
||||
break;
|
||||
case TTag: {
|
||||
char *tname = NULL;
|
||||
int replace_tag = 0;
|
||||
int prim = !(t->tag.tagclass != ASN1_C_UNIV &&
|
||||
t->tag.tagenv == TE_EXPLICIT) &&
|
||||
is_primitive_type(t->subtype);
|
||||
int c;
|
||||
if (asprintf (&tname, "%s_tag", tmpstr) < 0 || tname == NULL)
|
||||
errx(1, "malloc");
|
||||
c = encode_type (name, t->subtype, tname);
|
||||
fprintf (codefile,
|
||||
"e = der_put_length_and_tag (p, len, ret, %s, %s, %s, &l);\n"
|
||||
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
|
||||
classname(t->tag.tagclass),
|
||||
c ? "CONS" : "PRIM",
|
||||
valuename(t->tag.tagclass, t->tag.tagvalue));
|
||||
free (tname);
|
||||
/* Explicit tags are always constructed */
|
||||
if (!c && t->tag.tagclass != ASN1_C_UNIV && t->tag.tagenv == TE_EXPLICIT)
|
||||
c = 1;
|
||||
/*
|
||||
* HACK HACK HACK
|
||||
*
|
||||
* This is part of the fix to the bug where we treat IMPLICIT tags of
|
||||
* named types as EXPLICIT. I.e.
|
||||
*
|
||||
* Foo ::= SEQUENCE { ... }
|
||||
* Bar ::= SEQUENCE { foo [0] IMPLICIT Foo }
|
||||
*
|
||||
* would get a context [0] constructed tag *and* a universal sequence
|
||||
* constructed tag when it should get only the first tag.
|
||||
*
|
||||
* Properly fixing this would require changing the signatures of the
|
||||
* encode, lenght, and decode functions we generate to take an optional
|
||||
* tag to replace the one the encoder would generate / decoder would
|
||||
* expect. That would change the ABI, which... isn't stable, but it's
|
||||
* a bit soon to make that change.
|
||||
*
|
||||
* So, we're looking for IMPLICIT tags of named SEQUENCE/SET types, and
|
||||
* if we see any, we generate code to replace the tag.
|
||||
*
|
||||
* NOTE WELL: We're assuming that the length of the encoding of the tag
|
||||
* of the subtype and the length of the encoding of the
|
||||
* IMPLICIT tag are the same.
|
||||
*
|
||||
* To avoid this we'll need to generate new length_tag_*
|
||||
* functions or else we'll need to add a boolean argument to
|
||||
* the length_* functions we generate to count only the
|
||||
* length of the tag of the type. The latter is an ABI
|
||||
* change. Or we'll need to enhance asn1_compile to be able
|
||||
* to load multiple modules so that we use the AST of the
|
||||
* modules to internally compute the length of types and
|
||||
* tags. The latter would be great anyways as it would
|
||||
* allow the computation of tag lengths for tagged types to
|
||||
* be constant.
|
||||
*
|
||||
* NOTE WELL: We *do* "replace" the tags of IMPLICIT-tagged primitive
|
||||
* types, but our primitive codec functions leave those tags
|
||||
* out, which is why we don't have to der_replace_tag() them
|
||||
* here.
|
||||
*/
|
||||
if (t->tag.tagenv == TE_IMPLICIT && !prim &&
|
||||
t->subtype->type != TSequenceOf && t->subtype->type != TSetOf &&
|
||||
t->subtype->type != TChoice) {
|
||||
if (t->subtype->symbol &&
|
||||
(t->subtype->type == TSequence ||
|
||||
t->subtype->type == TSet))
|
||||
replace_tag = 1;
|
||||
else if (t->subtype->symbol && strcmp(t->subtype->symbol->name, "heim_any"))
|
||||
replace_tag = 1;
|
||||
} else if (t->tag.tagenv == TE_IMPLICIT && prim && t->subtype->symbol)
|
||||
/*
|
||||
* Because the subtype is named we are generating its codec
|
||||
* functions, and those will be adding their UNIVERSAL or whatever
|
||||
* tags unlike our raw primtive codec library.
|
||||
*/
|
||||
replace_tag = 1;
|
||||
if (replace_tag)
|
||||
fprintf(codefile,
|
||||
"e = der_replace_tag (p, len, %s, %s, %s);\n"
|
||||
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
|
||||
classname(t->tag.tagclass),
|
||||
c ? "CONS" : "PRIM",
|
||||
valuename(t->tag.tagclass, t->tag.tagvalue));
|
||||
else
|
||||
fprintf(codefile,
|
||||
"e = der_put_length_and_tag (p, len, ret, %s, %s, %s, &l);\n"
|
||||
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
|
||||
classname(t->tag.tagclass),
|
||||
c ? "CONS" : "PRIM",
|
||||
valuename(t->tag.tagclass, t->tag.tagvalue));
|
||||
free(tname);
|
||||
break;
|
||||
}
|
||||
case TChoice:{
|
||||
|
Reference in New Issue
Block a user