asn1: Fix long-standing IMPLICIT tagging brokenness

This commit _mostly_ fixes the Heimdal ASN.1 compiler to properly
support IMPLICIT tagging in most if not all the many cases where it
didn't already, as you could see in lib/asn1/canthandle.asn1 prior to
this commit.

This fix is a bit of a hack in that a proper fix would change the
function prototypes of the encode/decode/length functions generated by
the compiler to take an optional IMPLICIT tag to tag with instead of the
type they code.  That fix would not be localized to lib/asn1/ however,
and would change the API and ABI of generated code (which is mostly not
an ABI for Heimdal, but still, some external projects would have to make
changes).

Instead, for IMPLICIT tags we currently depend on the IMPLICIT tag and
the sub-type's tag having the same size -- this can be fixed with extra
allocation on the encoder side as we do on the decoder side, but we
might leave it for later.

The issue we're fixing manifested as:

  -- The [CONTEXT 0] tag in Bar below was turned into an EXPLICIT tag
  -- instead of an IMPLICIT one, netting the DER encoding for the `foo`
  -- member as:
  --    [CONTEXT 0] [UNIVERSAL Seq] [UNIVERSAL Int] <encoding of i>
  -- instead of the correct:
  --    [CONTEXT 0] [UNIVERSAL Int] <encoding of i>
  Foo ::= SEQUENCE { i INTEGER }
  Bar ::= SEQUENCE { foo [0] IMPLICIT Foo }

or

  Foo ::= INTEGER
  Bar ::= SEQUENCE { foo [0] IMPLICIT Foo } -- tag context 0 marked
                                            -- constructed!

I've reviewed this in part by reviewing the output of the compiler
before and after this change using this procedure:

 - Run an earlier version of the ASN.1 compiler output for all
   modules in lib/asn1/.  Save these in a different location.

 - Run this (or later) version of the ASN.1 compiler output for
   the same modules, adding --original-order for modules that
   have been manually sorted already (e.g., rfc2459.asn1).

 - Run clang-format on the saved and newest generated C source
   and header files.

 - Diff the generated output.  Substantial differences will
   relate to handling of IMPLICIT tagging.  These are
   particularly evident in the tcg.asn1 module, which uses a lot
   of those.

Later commits add test data (certificates with extensions that use
IMPLICIT tagging) taken from external specifications as well, which
exercise this fix.

Non-urgent brokenness yet to be fixed:

 - When the IMPLICIT tag and the tag of the underlying type require
   differing numbers of bytes to encode, the encoding and decoding will
   fail.  The prototypes of generated length_*() functions make it
   impossible to do much better.

 - SET OF <primitive> still crashes the compiler (not a new bug).

Futures:

 - Unwind hackery in cms.asn1 that worked around our lack of proper
   IMPLICIT tagging support.

Here are some of the generated code deltas one expects to see around
this commit:

$ git checkout $earlier_version
$ ./autogen.sh
$ mkdir build
$ cd build
$ ../configure ...
$ make -j4
$ make check
$ cd lib/asn1
$ for i in *.c; do
      [[ $i = asn1parse.? || $i = lex.? || $i = *.h ]] && continue
      clang-format -i $i $i
      cmp /tmp/save/$i $i && echo NO DIFFS: $i && continue; echo DIFF: $i
  done
NO DIFFS: asn1_cms_asn1.c
NO DIFFS: asn1_digest_asn1.c
NO DIFFS: asn1_err.c
NO DIFFS: asn1_krb5_asn1.c
/tmp/save/asn1_kx509_asn1.c asn1_kx509_asn1.c differ: byte 6433, line 264
DIFF: asn1_kx509_asn1.c
NO DIFFS: asn1_ocsp_asn1.c
NO DIFFS: asn1_pkcs10_asn1.c
/tmp/save/asn1_pkcs12_asn1.c asn1_pkcs12_asn1.c differ: byte 12934, line 455
DIFF: asn1_pkcs12_asn1.c
NO DIFFS: asn1_pkcs8_asn1.c
NO DIFFS: asn1_pkcs9_asn1.c
NO DIFFS: asn1_pkinit_asn1.c
/tmp/save/asn1_rfc2459_asn1.c asn1_rfc2459_asn1.c differ: byte 20193, line 532
DIFF: asn1_rfc2459_asn1.c
NO DIFFS: asn1_rfc4043_asn1.c
/tmp/save/asn1_rfc4108_asn1.c asn1_rfc4108_asn1.c differ: byte 595, line 26
DIFF: asn1_rfc4108_asn1.c
/tmp/save/asn1_tcg_asn1.c asn1_tcg_asn1.c differ: byte 31835, line 1229
DIFF: asn1_tcg_asn1.c
/tmp/save/asn1_test_asn1.c asn1_test_asn1.c differ: byte 384, line 21
DIFF: asn1_test_asn1.c
/tmp/save/test_template_asn1-template.c test_template_asn1-template.c differ: byte 650, line 20
DIFF: test_template_asn1-template.c
$
$ cd ../..
$ git checkout $newer_version
$ make -j4 && make check
$ cd lib/asn1
$ for i in *.[ch]; do
    [[ $i = asn1parse.? || $i = lex.? || $i = *.h ]] && continue
    clang-format -i $i $i
    cmp /tmp/save/$i $i && echo NO DIFFS: $i && continue
    diff -ubw /tmp/save/$i $i
  done | $PAGER

and one should see deltas such as the following:

 - a small enhancement to handling of OPTIONAL members:

     (data)->macData = calloc(1, sizeof(*(data)->macData));
     if ((data)->macData == NULL)
       goto fail;
     e = decode_PKCS12_MacData(p, len, (data)->macData, &l);
-    if (e) {
+    if (e == ASN1_MISSING_FIELD) {
       free((data)->macData);
       (data)->macData = NULL;
+    } else if (e) {
+      goto fail;
     } else {
       p += l;
       len -= l;
       ret += l;

 - more complete handling of DEFAULTed members:

     e = decode_FWReceiptVersion(p, len, &(data)->version, &l);
-    if (e)
+    if (e == ASN1_MISSING_FIELD) {
+      (data)->version = 1;
+    } else if (e) {
       goto fail;
-    p += l;
-    len -= l;
-    ret += l;
+    } else {
+      p += l;
+      len -= l;
+      ret += l;
+    }
     {

 - replacement of tags with implicit tags (encode side):

   /* targetUri */
   if ((data)->targetUri) {
     size_t Top_tag_oldret HEIMDAL_UNUSED_ATTRIBUTE = ret;
     ret = 0;
     e = encode_URIReference(p, len, (data)->targetUri, &l);
     if (e)
       return e;
     p -= l;
     len -= l;
     ret += l;

-    e = der_put_length_and_tag(p, len, ret, ASN1_C_CONTEXT, PRIM, 4, &l);
+    e = der_replace_tag(p, len, ASN1_C_CONTEXT, CONS, 4);
     if (e)
       return e;
     p -= l;
     len -= l;
     ret += l;

     ret += Top_tag_oldret;
   }

 - replacement of tags with implicit tags (decode side):

         strengthOfFunction_oldlen = len;
         if (strengthOfFunction_datalen > len) {
           e = ASN1_OVERRUN;
           goto fail;
         }
         len = strengthOfFunction_datalen;
-        e = decode_StrengthOfFunction(p, len, (data)->strengthOfFunction, &l);
-        if (e)
-          goto fail;
-        p += l;
-        len -= l;
-        ret += l;
+        {
+          unsigned char *pcopy;
+          pcopy = calloc(1, len);
+          if (pcopy == 0) {
+            e = ENOMEM;
+            goto fail;
+          }
+          memcpy(pcopy, p, len);
+          e = der_replace_tag(pcopy, len, ASN1_C_UNIV, PRIM, 0);
+          if (e)
+            goto fail;
+          e = decode_StrengthOfFunction(p, len, (data)->strengthOfFunction, &l);
+          if (e)
+            goto fail;
+          p += l;
+          len -= l;
+          ret += l;
+          free(pcopy);
+        }
         len = strengthOfFunction_oldlen - strengthOfFunction_datalen;
       }
     }
     {
       size_t profileOid_datalen, profileOid_oldlen;

 - correct determination of implicit tag constructed vs no for IMPLICT-
   tagged named primitive types:

     {
       size_t profileUri_datalen, profileUri_oldlen;
       Der_type profileUri_type;
       e = der_match_tag_and_length(p, len, ASN1_C_CONTEXT, &profileUri_type, 2,
                                    &profileUri_datalen, &l);
-      if (e == 0 && profileUri_type != PRIM) {
+      if (e == 0 && profileUri_type != CONS) {
         e = ASN1_BAD_ID;
       }
       if (e) {
         (data)->profileUri = NULL;
       } else {
         (data)->profileUri = calloc(1, sizeof(*(data)->profileUri));
         if ((data)->profileUri == NULL) {
           e = ENOMEM;
           goto fail;
         }

 - correct determination of length of IMPLICT-tagged OIDs:

   if ((data)->profileOid) {
     size_t Top_tag_oldret = ret;
     ret = 0;
     ret += der_length_oid((data)->profileOid);
+    ret += 1 + der_length_len(ret);
     ret += Top_tag_oldret;
   }

These deltas should be examined with the corresponding ASN.1 module at
hand, cross-referencing the source code to the ASN.1 type definitions
and manually applying X.690 rules to double-check the choices of
primitive vs. constructed tag, and the choices of when to replace tags
and when not.
This commit is contained in:
Nicolas Williams
2020-12-28 17:46:15 -06:00
parent 24543c4a31
commit 89389bc7a0
11 changed files with 249 additions and 60 deletions

View File

@@ -116,6 +116,7 @@ encode_type (const char *name, const Type *t, const char *tmpstr)
"e = encode_%s(p, len, %s, &l);\n"
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
t->symbol->gen_name, name);
constructed = !is_primitive_type(t);
break;
case TInteger:
if(t->members) {
@@ -387,17 +388,89 @@ encode_type (const char *name, const Type *t, const char *tmpstr)
break;
case TTag: {
char *tname = NULL;
int replace_tag = 0;
int prim = !(t->tag.tagclass != ASN1_C_UNIV &&
t->tag.tagenv == TE_EXPLICIT) &&
is_primitive_type(t->subtype);
int c;
if (asprintf (&tname, "%s_tag", tmpstr) < 0 || tname == NULL)
errx(1, "malloc");
c = encode_type (name, t->subtype, tname);
fprintf (codefile,
"e = der_put_length_and_tag (p, len, ret, %s, %s, %s, &l);\n"
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
classname(t->tag.tagclass),
c ? "CONS" : "PRIM",
valuename(t->tag.tagclass, t->tag.tagvalue));
free (tname);
/* Explicit tags are always constructed */
if (!c && t->tag.tagclass != ASN1_C_UNIV && t->tag.tagenv == TE_EXPLICIT)
c = 1;
/*
* HACK HACK HACK
*
* This is part of the fix to the bug where we treat IMPLICIT tags of
* named types as EXPLICIT. I.e.
*
* Foo ::= SEQUENCE { ... }
* Bar ::= SEQUENCE { foo [0] IMPLICIT Foo }
*
* would get a context [0] constructed tag *and* a universal sequence
* constructed tag when it should get only the first tag.
*
* Properly fixing this would require changing the signatures of the
* encode, lenght, and decode functions we generate to take an optional
* tag to replace the one the encoder would generate / decoder would
* expect. That would change the ABI, which... isn't stable, but it's
* a bit soon to make that change.
*
* So, we're looking for IMPLICIT tags of named SEQUENCE/SET types, and
* if we see any, we generate code to replace the tag.
*
* NOTE WELL: We're assuming that the length of the encoding of the tag
* of the subtype and the length of the encoding of the
* IMPLICIT tag are the same.
*
* To avoid this we'll need to generate new length_tag_*
* functions or else we'll need to add a boolean argument to
* the length_* functions we generate to count only the
* length of the tag of the type. The latter is an ABI
* change. Or we'll need to enhance asn1_compile to be able
* to load multiple modules so that we use the AST of the
* modules to internally compute the length of types and
* tags. The latter would be great anyways as it would
* allow the computation of tag lengths for tagged types to
* be constant.
*
* NOTE WELL: We *do* "replace" the tags of IMPLICIT-tagged primitive
* types, but our primitive codec functions leave those tags
* out, which is why we don't have to der_replace_tag() them
* here.
*/
if (t->tag.tagenv == TE_IMPLICIT && !prim &&
t->subtype->type != TSequenceOf && t->subtype->type != TSetOf &&
t->subtype->type != TChoice) {
if (t->subtype->symbol &&
(t->subtype->type == TSequence ||
t->subtype->type == TSet))
replace_tag = 1;
else if (t->subtype->symbol && strcmp(t->subtype->symbol->name, "heim_any"))
replace_tag = 1;
} else if (t->tag.tagenv == TE_IMPLICIT && prim && t->subtype->symbol)
/*
* Because the subtype is named we are generating its codec
* functions, and those will be adding their UNIVERSAL or whatever
* tags unlike our raw primtive codec library.
*/
replace_tag = 1;
if (replace_tag)
fprintf(codefile,
"e = der_replace_tag (p, len, %s, %s, %s);\n"
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
classname(t->tag.tagclass),
c ? "CONS" : "PRIM",
valuename(t->tag.tagclass, t->tag.tagvalue));
else
fprintf(codefile,
"e = der_put_length_and_tag (p, len, ret, %s, %s, %s, &l);\n"
"if (e) return e;\np -= l; len -= l; ret += l;\n\n",
classname(t->tag.tagclass),
c ? "CONS" : "PRIM",
valuename(t->tag.tagclass, t->tag.tagvalue));
free(tname);
break;
}
case TChoice:{