Add decompilation logic for more constructs

* Function calls decompiled correctly
    * Add boolean type and boolean constants in the grammar
    * Allow angled braces in identifier names
    * Overload the `transpile` to also allow input streams
    * Update the generate script accordingly
This commit is contained in:
Dhruv Maroo
2023-04-23 17:08:31 +05:30
parent 6debceb8b5
commit 425c633efe
12 changed files with 2087 additions and 1719 deletions

View File

@@ -4,6 +4,6 @@ import ghidrust.decompiler.parser.c.gen.CParser;
public class Run {
public static void main(String[] args) {
System.out.println(CParser.transpile("int main(int a, int b) {\n int a = 5; int b = 3; a = 3; int c; return a + b;\n}"));
System.out.println(CParser.transpile(System.in));
}
}

View File

@@ -94,8 +94,12 @@ public class CVisitor implements CParserVisitor {
rust_code.append("fn ");
rust_code.append(node.jjtGetChild(1).jjtAccept(this, data));
rust_code.append("-> ");
rust_code.append(node.jjtGetChild(0).jjtAccept(this, data));
String ret_type = (String) node.jjtGetChild(0).jjtAccept(this, data);
if (!ret_type.equals("")) {
rust_code.append("-> ");
}
rust_code.append(ret_type);
rust_code.append(" {\n");
indent_level++;
rust_code.append(node.jjtGetChild(2).jjtAccept(this, data));
@@ -220,8 +224,11 @@ public class CVisitor implements CParserVisitor {
public Object visit(ASTParameterDeclaration node, Object data) {
StringBuilder sb = new StringBuilder("");
sb.append(node.jjtGetChild(1).jjtAccept(this, data));
sb.append(": ");
if (node.jjtGetNumChildren() > 1) {
sb.append(node.jjtGetChild(1).jjtAccept(this, data));
sb.append(": ");
}
sb.append(node.jjtGetChild(0).jjtAccept(this, data));
return sb.toString();
}
@@ -355,7 +362,14 @@ public class CVisitor implements CParserVisitor {
}
public Object visit(ASTCastExpression node, Object data) {
return defaultSpacedVisit(node, data, " ", false);
StringBuilder sb = new StringBuilder("");
if (node.jjtGetNumChildren() > 1) {
sb.append(node.jjtGetChild(1).jjtAccept(this, data));
sb.append(" as ");
}
sb.append(node.jjtGetChild(0).jjtAccept(this, data));
return sb.toString();
}
public Object visit(ASTUnaryExpression node, Object data) {
@@ -367,6 +381,21 @@ public class CVisitor implements CParserVisitor {
}
public Object visit(ASTPostfixExpression node, Object data) {
StringBuilder sb = new StringBuilder("");
sb.append(node.jjtGetChild(0).jjtAccept(this, data));
if (node.choice == 2) {
/* Function call */
sb.append("(");
for (int i = 1; i < node.jjtGetNumChildren(); i++) {
sb.append(node.jjtGetChild(i).jjtAccept(this, data));
}
sb.append(")");
return sb.toString();
}
return defaultSpacedVisit(node, data, " ", false);
}
@@ -375,7 +404,7 @@ public class CVisitor implements CParserVisitor {
}
public Object visit(ASTArgumentExpressionList node, Object data) {
return defaultVisit(node, data);
return defaultSpacedVisit(node, data, ", ", false);
}
}
/*

View File

@@ -4,8 +4,8 @@ package ghidrust.decompiler.parser.c.gen;
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
public
class ASTGhostStringToken extends SimpleNode {
String str_value;
String str_val;
public ASTGhostStringToken(int id) {
super(id);
}
@@ -22,12 +22,12 @@ class ASTGhostStringToken extends SimpleNode {
visitor.visit(this, data);
}
public void setValue(String s) {
value = s;
public String getValue() {
return str_val;
}
public String getValue() {
return str_value;
public void setValue(String value) {
this.str_val = value;
}
}
/* JavaCC - OriginalChecksum=7d91f560265b12b4f437803bcd66b7ba (do not edit this line) */

View File

@@ -4,6 +4,8 @@ package ghidrust.decompiler.parser.c.gen;
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
public
class ASTPostfixExpression extends SimpleNode {
public int choice;
public ASTPostfixExpression(int id) {
super(id);
}

View File

@@ -4,8 +4,8 @@ package ghidrust.decompiler.parser.c.gen;
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
public
class ASTStringToken extends SimpleNode {
String str_value;
String str_val;
public ASTStringToken(int id) {
super(id);
}
@@ -22,12 +22,12 @@ class ASTStringToken extends SimpleNode {
visitor.visit(this, data);
}
public void setValue(String s) {
str_value = s;
public String getValue() {
return str_val;
}
public String getValue() {
return str_value;
public void setValue(String value) {
this.str_val = value;
}
}
/* JavaCC - OriginalChecksum=c289df07a5b51163b866d4bfab28fb00 (do not edit this line) */

View File

@@ -4,7 +4,7 @@ package ghidrust.decompiler.parser.c.gen;
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
public
class ASTTypeStringToken extends SimpleNode {
String str_value;
String str_val;
public ASTTypeStringToken(int id) {
super(id);
@@ -22,12 +22,12 @@ class ASTTypeStringToken extends SimpleNode {
visitor.visit(this, data);
}
public void setValue(String s) {
str_value = s;
public String getValue() {
return str_val;
}
public String getValue() {
return str_value;
public void setValue(String value) {
this.str_val = value;
}
}
/* JavaCC - OriginalChecksum=726f5c0cafc2cba1f5ca608dfd54d3e2 (do not edit this line) */

File diff suppressed because it is too large Load Diff

View File

@@ -27,81 +27,85 @@ public interface CParserConstants {
/** RegularExpression Id. */
int STRING_LITERAL = 19;
/** RegularExpression Id. */
int UNDEFINED_TYPE = 20;
int BOOLEAN = 20;
/** RegularExpression Id. */
int CONTINUE = 21;
int UNDEFINED_TYPE = 21;
/** RegularExpression Id. */
int VOLATILE = 22;
int CONTINUE = 22;
/** RegularExpression Id. */
int REGISTER = 23;
int VOLATILE = 23;
/** RegularExpression Id. */
int UNSIGNED = 24;
int REGISTER = 24;
/** RegularExpression Id. */
int TYPEDEF = 25;
int UNSIGNED = 25;
/** RegularExpression Id. */
int DFLT = 26;
int TYPEDEF = 26;
/** RegularExpression Id. */
int DOUBLE = 27;
int DFLT = 27;
/** RegularExpression Id. */
int SIZEOF = 28;
int DOUBLE = 28;
/** RegularExpression Id. */
int SWITCH = 29;
int SIZEOF = 29;
/** RegularExpression Id. */
int RETURN = 30;
int SWITCH = 30;
/** RegularExpression Id. */
int EXTERN = 31;
int RETURN = 31;
/** RegularExpression Id. */
int STRUCT = 32;
int EXTERN = 32;
/** RegularExpression Id. */
int STATIC = 33;
int STRUCT = 33;
/** RegularExpression Id. */
int SIGNED = 34;
int STATIC = 34;
/** RegularExpression Id. */
int WHILE = 35;
int SIGNED = 35;
/** RegularExpression Id. */
int BREAK = 36;
int WHILE = 36;
/** RegularExpression Id. */
int UNION = 37;
int BREAK = 37;
/** RegularExpression Id. */
int CONST = 38;
int UNION = 38;
/** RegularExpression Id. */
int FLOAT = 39;
int CONST = 39;
/** RegularExpression Id. */
int SHORT = 40;
int FLOAT = 40;
/** RegularExpression Id. */
int ELSE = 41;
int SHORT = 41;
/** RegularExpression Id. */
int CASE = 42;
int ELSE = 42;
/** RegularExpression Id. */
int LONG = 43;
int CASE = 43;
/** RegularExpression Id. */
int ENUM = 44;
int LONG = 44;
/** RegularExpression Id. */
int AUTO = 45;
int ENUM = 45;
/** RegularExpression Id. */
int VOID = 46;
int AUTO = 46;
/** RegularExpression Id. */
int CHAR = 47;
int VOID = 47;
/** RegularExpression Id. */
int GOTO = 48;
int CHAR = 48;
/** RegularExpression Id. */
int FOR = 49;
int GOTO = 49;
/** RegularExpression Id. */
int INT = 50;
int FOR = 50;
/** RegularExpression Id. */
int IF = 51;
int INT = 51;
/** RegularExpression Id. */
int DO = 52;
int IF = 52;
/** RegularExpression Id. */
int CODE = 53;
int DO = 53;
/** RegularExpression Id. */
int IDENTIFIER = 54;
int BOOL_TYPE = 54;
/** RegularExpression Id. */
int LETTER = 55;
int CODE = 55;
/** RegularExpression Id. */
int DIGIT = 56;
int IDENTIFIER = 56;
/** RegularExpression Id. */
int SEPARATOR = 57;
int LETTER = 57;
/** RegularExpression Id. */
int DIGIT = 58;
/** RegularExpression Id. */
int SEPARATOR = 59;
/** Lexical state. */
int DEFAULT = 0;
@@ -130,6 +134,7 @@ public interface CParserConstants {
"<EXPONENT>",
"<CHARACTER_LITERAL>",
"<STRING_LITERAL>",
"<BOOLEAN>",
"<UNDEFINED_TYPE>",
"\"continue\"",
"\"volatile\"",
@@ -163,6 +168,7 @@ public interface CParserConstants {
"\"int\"",
"\"if\"",
"\"do\"",
"\"bool\"",
"\"code\"",
"<IDENTIFIER>",
"<LETTER>",

View File

@@ -31,6 +31,10 @@ public class CParser/*@bgen(jjtree)*/implements CParserTreeConstants/*@egen*/ {/
// Run the parser
public static String transpile(String c_code) {
InputStream stream = new ByteArrayInputStream(c_code.getBytes(StandardCharsets.UTF_8));
return transpile(stream);
}
public static String transpile(InputStream stream) {
c_parser = new CParser(stream);
try {
@@ -84,7 +88,8 @@ TOKEN : {
| <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+>
| <CHARACTER_LITERAL: "\'" (~["\'","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"])) "\'">
| <STRING_LITERAL: "\"" ( ~["\"","\\","\n","\r"] | "\\" ( ["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"] | ( ["\n","\r"] | "\r\n")))* "\"">
| <UNDEFINED_TYPE: "undefined" (<DECIMAL_LITERAL>)?> // Ghidra specific unknown type
| <BOOLEAN: "true" | "false">
| <UNDEFINED_TYPE: "undefined" (<DECIMAL_LITERAL>)?> // Ghidra specific unknown type
}
TOKEN : {
@@ -120,6 +125,7 @@ TOKEN : {
<INT: "int"> |
<IF: "if"> |
<DO: "do"> |
<BOOL_TYPE: "bool"> |
<CODE: "code"> // code ptr, Ghidra specific
}
@@ -309,7 +315,7 @@ void TypeSpecifier() :
try {
/*@egen*/
( t = <VOID> | t = <CHAR> | t = <SHORT> | t = <INT> | t = <LONG> | t = <FLOAT> | t = <DOUBLE> | t = <SIGNED> |
t = <UNSIGNED> | t = <CODE> | t = <UNDEFINED_TYPE> )/*@bgen(jjtree)*/
t = <UNSIGNED> | t = <BOOL_TYPE> | t = <CODE> | t = <UNDEFINED_TYPE> )/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
@@ -871,7 +877,7 @@ void Statement() : {/*@bgen(jjtree) Statement */
{/*@bgen(jjtree) Statement */
try {
/*@egen*/
( LOOKAHEAD(2) LabeledStatement() |
( LOOKAHEAD(Identifier() ":") LabeledStatement() |
ExpressionStatement() |
CompoundStatement() |
SelectionStatement() |
@@ -1702,19 +1708,29 @@ void UnaryOperator() : {/*@bgen(jjtree) UnaryOperator */
}
void PostfixExpression() : {/*@bgen(jjtree) PostfixExpression */
ASTPostfixExpression jjtn000 = new ASTPostfixExpression(JJTPOSTFIXEXPRESSION);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/}
ASTPostfixExpression jjtn000 = new ASTPostfixExpression(JJTPOSTFIXEXPRESSION);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/
int choice = 0;
}
{/*@bgen(jjtree) PostfixExpression */
try {
/*@egen*/
PrimaryExpression() ( "[" Expression() "]" |
"(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" |
"." Identifier() |
"->" Identifier() |
"++" |
"--" )*/*@bgen(jjtree)*/
PrimaryExpression() ( "[" Expression() "]" { choice = 1; } |
"(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" { choice = 2; } |
"." Identifier() { choice = 3; } |
"->" Identifier() | { choice = 4; }
"++" | { choice = 5; }
"--" { choice = 6; } )*/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/
{
jjtn000.choice = choice;
}/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
@@ -1801,26 +1817,49 @@ void ArgumentExpressionList() : {/*@bgen(jjtree) ArgumentExpressionList */
/*@egen*/
}
void Identifier() :
ASTStringToken Identifier() :
{/*@bgen(jjtree) StringToken */
ASTStringToken jjtn000 = new ASTStringToken(JJTSTRINGTOKEN);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/
Token t;
Token r = null;
ASTStringToken t = null;
Token s = null;
int choice;
}
{/*@bgen(jjtree) StringToken */
try {
/*@egen*/
t = <IDENTIFIER>/*@bgen(jjtree)*/
(LOOKAHEAD(<IDENTIFIER>) r = <IDENTIFIER> { choice = 0; } | [ r = <IDENTIFIER> ] "<" t = Identifier() ">" [ s = <IDENTIFIER> ] { choice = 1; })/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/
{
jjtn000.setValue(t.image);
if (choice == 1) {
jjtn000.setValue((r != null ? r.image : "") + "<" + t.getValue() + ">" + (s != null ? s.image : ""));
} else {
jjtn000.setValue(r.image);
}
return jjtn000;
}/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
@@ -1840,7 +1879,7 @@ void Constant() :
{/*@bgen(jjtree) StringToken */
try {
/*@egen*/
(t = <INTEGER_LITERAL> | t = <FLOATING_POINT_LITERAL> | t = <CHARACTER_LITERAL> | t = <STRING_LITERAL>)/*@bgen(jjtree)*/
(t = <INTEGER_LITERAL> | t = <FLOATING_POINT_LITERAL> | t = <CHARACTER_LITERAL> | t = <STRING_LITERAL> | t = <BOOLEAN>)/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;

View File

@@ -27,6 +27,10 @@ public class CParser {
// Run the parser
public static String transpile(String c_code) {
InputStream stream = new ByteArrayInputStream(c_code.getBytes(StandardCharsets.UTF_8));
return transpile(stream);
}
public static String transpile(InputStream stream) {
c_parser = new CParser(stream);
try {
@@ -80,7 +84,8 @@ TOKEN : {
| <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+>
| <CHARACTER_LITERAL: "\'" (~["\'","\\","\n","\r"] | "\\" (["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"])) "\'">
| <STRING_LITERAL: "\"" ( ~["\"","\\","\n","\r"] | "\\" ( ["n","t","b","r","f","\\","\'","\""] | ["0"-"7"] (["0"-"7"])? | ["0"-"3"] ["0"-"7"] ["0"-"7"] | ( ["\n","\r"] | "\r\n")))* "\"">
| <UNDEFINED_TYPE: "undefined" (<DECIMAL_LITERAL>)?> // Ghidra specific unknown type
| <BOOLEAN: "true" | "false">
| <UNDEFINED_TYPE: "undefined" (<DECIMAL_LITERAL>)?> // Ghidra specific unknown type
}
TOKEN : {
@@ -116,6 +121,7 @@ TOKEN : {
<INT: "int"> |
<IF: "if"> |
<DO: "do"> |
<BOOL_TYPE: "bool"> |
<CODE: "code"> // code ptr, Ghidra specific
}
@@ -173,7 +179,7 @@ void TypeSpecifier() #TypeStringToken :
}
{
( t = <VOID> | t = <CHAR> | t = <SHORT> | t = <INT> | t = <LONG> | t = <FLOAT> | t = <DOUBLE> | t = <SIGNED> |
t = <UNSIGNED> | t = <CODE> | t = <UNDEFINED_TYPE> )
t = <UNSIGNED> | t = <BOOL_TYPE> | t = <CODE> | t = <UNDEFINED_TYPE> )
{
jjtThis.setValue(t.image);
}
@@ -285,7 +291,7 @@ void DirectAbstractDeclarator() : {}
void Statement() : {}
{
( LOOKAHEAD(2) LabeledStatement() |
( LOOKAHEAD(Identifier() ":") LabeledStatement() |
ExpressionStatement() |
CompoundStatement() |
SelectionStatement() |
@@ -451,14 +457,19 @@ void UnaryOperator() : {}
( "&" | "*" | "+" | "-" | "~" | "!" )
}
void PostfixExpression() : {}
void PostfixExpression() : {
int choice = 0;
}
{
PrimaryExpression() ( "[" Expression() "]" |
"(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" |
"." Identifier() |
"->" Identifier() |
"++" |
"--" )*
PrimaryExpression() ( "[" Expression() "]" { choice = 1; } |
"(" [ LOOKAHEAD(ArgumentExpressionList() ) ArgumentExpressionList() ] ")" { choice = 2; } |
"." Identifier() { choice = 3; } |
"->" Identifier() | { choice = 4; }
"++" | { choice = 5; }
"--" { choice = 6; } )*
{
jjtThis.choice = choice;
}
}
void PrimaryExpression() : {}
@@ -473,14 +484,23 @@ void ArgumentExpressionList() : {}
AssignmentExpression() ( "," AssignmentExpression() )*
}
void Identifier() #StringToken :
ASTStringToken Identifier() #StringToken :
{
Token t;
Token r = null;
ASTStringToken t = null;
Token s = null;
int choice;
}
{
t = <IDENTIFIER>
(LOOKAHEAD(<IDENTIFIER>) r = <IDENTIFIER> { choice = 0; } | [ r = <IDENTIFIER> ] "<" t = Identifier() ">" [ s = <IDENTIFIER> ] { choice = 1; })
{
jjtThis.setValue(t.image);
if (choice == 1) {
jjtThis.setValue((r != null ? r.image : "") + "<" + t.getValue() + ">" + (s != null ? s.image : ""));
} else {
jjtThis.setValue(r.image);
}
return jjtThis;
}
}
@@ -489,7 +509,7 @@ void Constant() #StringToken :
Token t;
}
{
(t = <INTEGER_LITERAL> | t = <FLOATING_POINT_LITERAL> | t = <CHARACTER_LITERAL> | t = <STRING_LITERAL>)
(t = <INTEGER_LITERAL> | t = <FLOATING_POINT_LITERAL> | t = <CHARACTER_LITERAL> | t = <STRING_LITERAL> | t = <BOOLEAN>)
{
jjtThis.setValue(t.image);
}

View File

@@ -4,8 +4,11 @@
cd c/gen
for file in AST*Token.java; do
mv -- "$file" "${file%.java}.bak"
BACKUP_FILES="ASTPostfixExpression \
$(ls -1 AST*Token.java | cut -d. -f1 | tr '\n' ' ')"
for file in $BACKUP_FILES; do
mv -- "${file}.java" "${file}.bak"
done
rm -f *.java c.jj
@@ -19,6 +22,6 @@ for file in *.java; do
sed -i '1s/^/package ghidrust.decompiler.parser.c.gen;\n\n/' $file
done
for file in AST*Token.bak; do
mv -- "$file" "${file%.bak}.java"
for file in $BACKUP_FILES; do
mv -- "${file}.bak" "${file}.java"
done