feat: AST based entity refactor (#17434)

* task: AST based entity refactor

* implemented refactor logic

* jest cases with string manipulation using AST logic

* comments and indentation

* added evalVersion to request
This commit is contained in:
ChandanBalajiBP 2022-10-18 12:07:06 +05:30 committed by GitHub
parent a13301808d
commit e5cdfbe445
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 312 additions and 108 deletions

View File

@ -9,6 +9,13 @@ type ScriptToIdentifiersType = {
evalVersion?: number;
};
type entityRefactorType = {
script: string;
oldName: string;
newName: string;
evalVersion?: number;
};
type MultipleScriptToIdentifiersType = {
scripts: string[];
evalVersion?: number;
@ -63,4 +70,26 @@ export default class AstController extends BaseController {
);
}
}
async entityRefactorController(req: Request, res: Response) {
try {
// By default the application eval version is set to be 2
const { script, oldName, newName, evalVersion }: entityRefactorType =
req.body;
const data = await AstService.entityRefactor(
script,
oldName,
newName,
evalVersion
);
return super.sendResponse(res, data);
} catch (err) {
return super.sendError(
res,
super.serverErrorMessaage,
[err.message],
StatusCodes.INTERNAL_SERVER_ERROR
);
}
}
}

View File

@ -20,5 +20,11 @@ router.post(
validator.validateRequest,
astController.getIdentifierDataFromMultipleScripts
);
router.post(
"/entity-refactor",
AstRules.getScriptValidator(),
validator.validateRequest,
astController.entityRefactorController
);
export default router;

View File

@ -1,4 +1,4 @@
import { extractIdentifierInfoFromCode } from "@shared/ast";
import { extractIdentifierInfoFromCode, entityRefactorFromCode } from "@shared/ast";
export default class AstService {
static async extractIdentifierDataFromScript(
@ -19,5 +19,27 @@ export default class AstService {
reject(err);
}
});
}
static async entityRefactor(
script,
oldName,
newName,
evalVersion
): Promise<any> {
return new Promise((resolve, reject) => {
try {
const refactoredCode = entityRefactorFromCode(
script,
oldName,
newName,
evalVersion
);
resolve(refactoredCode);
} catch (err) {
reject(err);
}
});
}
}

View File

@ -13,6 +13,31 @@ const multipleScripts = {
],
};
const entityRefactor = [
{
script: "ApiNever",
oldName: "ApiNever",
newName: "ApiForever",
},
{
script: "ApiNever.data",
oldName: "ApiNever",
newName: "ApiForever",
},
{
script:
"// ApiNever \n function ApiNever(abc) {let foo = \"I'm getting data from ApiNever but don't rename this string\" + ApiNever.data; \n if(true) { return ApiNever }}",
oldName: "ApiNever",
newName: "ApiForever",
},
{
script:
"//ApiNever \n function ApiNever(abc) {let ApiNever = \"I'm getting data from ApiNever but don't rename this string\" + ApiNever.data; \n if(true) { return ApiNever }}",
oldName: "ApiNever",
newName: "ApiForever",
},
];
afterAll((done) => {
app.close();
done();
@ -64,4 +89,39 @@ describe("AST tests", () => {
expect(response.body.data).toEqual(expectedResponse);
});
});
entityRefactor.forEach(async (input, index) => {
it(`Entity refactor test case ${index + 1}`, async () => {
const expectedResponse = [
{ script: "ApiForever", count: 1 },
{ script: "ApiForever.data", count: 1 },
{
script:
"// ApiNever \n function ApiNever(abc) {let foo = \"I'm getting data from ApiNever but don't rename this string\" + ApiForever.data; \n if(true) { return ApiForever }}",
count: 2,
},
{
script:
"//ApiNever \n function ApiNever(abc) {let ApiNever = \"I'm getting data from ApiNever but don't rename this string\" + ApiNever.data; \n if(true) { return ApiNever }}",
count: 0,
},
];
await supertest(app)
.post(`${RTS_BASE_API_PATH}/ast/entity-refactor`, {
JSON: true,
})
.send(input)
.expect(200)
.then((response) => {
expect(response.body.success).toEqual(true);
expect(response.body.data.script).toEqual(
expectedResponse[index].script
);
expect(response.body.data.count).toEqual(
expectedResponse[index].count
);
});
});
});
});

View File

@ -9,6 +9,7 @@ import {
isPropertyAFunctionNode,
getAST,
extractIdentifierInfoFromCode,
entityRefactorFromCode,
extractInvalidTopLevelMemberExpressionsFromCode,
getFunctionalParamsFromNode,
isTypeOfFunction,
@ -39,6 +40,7 @@ export {
isPropertyAFunctionNode,
getAST,
extractIdentifierInfoFromCode,
entityRefactorFromCode,
extractInvalidTopLevelMemberExpressionsFromCode,
getFunctionalParamsFromNode,
isTypeOfFunction,

View File

@ -1,4 +1,4 @@
import { parse, Node, SourceLocation, Options } from "acorn";
import acorn, { parse, Node, SourceLocation, Options, Comment } from "acorn";
import { ancestor, simple } from "acorn-walk";
import { ECMA_VERSION, NodeTypes } from "./constants/ast";
import { has, isFinite, isString, memoize, toPath } from "lodash";
@ -85,6 +85,13 @@ interface LiteralNode extends Node {
value: string | boolean | null | number | RegExp;
}
type NodeList = {
references: Set<string>;
functionalParams: Set<string>;
variableDeclarations: Set<string>;
identifierList: Array<IdentifierNode>;
};
// https://github.com/estree/estree/blob/master/es5.md#property
export interface PropertyNode extends Node {
type: NodeTypes.Property;
@ -206,12 +213,7 @@ export const extractIdentifierInfoFromCode = (
evaluationVersion: number,
invalidIdentifiers?: Record<string, unknown>
): IdentifierInfo => {
// List of all references found
const references = new Set<string>();
// List of variables declared within the script. All identifiers and member expressions derived from declared variables will be removed
const variableDeclarations = new Set<string>();
// List of functional params declared within the script. All identifiers and member expressions derived from functional params will be removed
let functionalParams = new Set<string>();
let ast: Node = { end: 0, start: 0, type: "" };
try {
const sanitizedScript = sanitizeScript(code, evaluationVersion);
@ -226,6 +228,23 @@ export const extractIdentifierInfoFromCode = (
*/
const wrappedCode = wrapCode(sanitizedScript);
ast = getAST(wrappedCode);
let { references, functionalParams, variableDeclarations }: NodeList =
ancestorWalk(ast);
const referencesArr = Array.from(references).filter((reference) => {
// To remove references derived from declared variables and function params,
// We extract the topLevelIdentifier Eg. Api1.name => Api1
const topLevelIdentifier = toPath(reference)[0];
return !(
functionalParams.has(topLevelIdentifier) ||
variableDeclarations.has(topLevelIdentifier) ||
has(invalidIdentifiers, topLevelIdentifier)
);
});
return {
references: referencesArr,
functionalParams: Array.from(functionalParams),
variables: Array.from(variableDeclarations),
};
} catch (e) {
if (e instanceof SyntaxError) {
// Syntax error. Ignore and return empty list
@ -237,108 +256,68 @@ export const extractIdentifierInfoFromCode = (
}
throw e;
}
};
/*
* We do an ancestor walk on the AST in order to extract all references. For example, for member expressions and identifiers, we need to know
* what surrounds the identifier (its parent and ancestors), ancestor walk will give that information in the callback
* doc: https://github.com/acornjs/acorn/tree/master/acorn-walk
*/
ancestor(ast, {
Identifier(node: Node, ancestors: Node[]) {
/*
* We are interested in identifiers. Due to the nature of AST, Identifier nodes can
* also be nested inside MemberExpressions. For deeply nested object references, there
* could be nesting of many MemberExpressions. To find the final reference, we will
* try to find the top level MemberExpression that does not have a MemberExpression parent.
* */
let candidateTopLevelNode: IdentifierNode | MemberExpressionNode =
node as IdentifierNode;
let depth = ancestors.length - 2; // start "depth" with first parent
while (depth > 0) {
const parent = ancestors[depth];
if (
isMemberExpressionNode(parent) &&
/* Member expressions that are "computed" (with [ ] search)
and the ones that have optional chaining ( a.b?.c )
will be considered top level node.
We will stop looking for further parents */
/* "computed" exception - isArrayAccessorNode
Member expressions that are array accessors with static index - [9]
will not be considered top level.
We will continue looking further. */
(!parent.computed || isArrayAccessorNode(parent)) &&
!parent.optional
) {
candidateTopLevelNode = parent;
depth = depth - 1;
} else {
// Top level found
break;
}
export const entityRefactorFromCode = (
script: string,
oldName: string,
newName: string,
evaluationVersion: number,
invalidIdentifiers?: Record<string, unknown>
): Record<string, string | number> | string => {
let ast: Node = { end: 0, start: 0, type: "" };
//Copy of script to refactor
let refactorScript = script;
//Difference in length of oldName and newName
let nameLengthDiff: number = newName.length - oldName.length;
//Offset index used for deciding location of oldName.
let refactorOffset: number = 0;
//Count of refactors on the script
let refactorCount: number = 0;
try {
const sanitizedScript = sanitizeScript(script, evaluationVersion);
ast = getAST(sanitizedScript);
let {
references,
functionalParams,
variableDeclarations,
identifierList,
}: NodeList = ancestorWalk(ast);
let identifierArray = Array.from(identifierList) as Array<IdentifierNode>;
const referencesArr = Array.from(references).filter((reference, index) => {
const topLevelIdentifier = toPath(reference)[0];
let shouldUpdateNode = !(
functionalParams.has(topLevelIdentifier) ||
variableDeclarations.has(topLevelIdentifier) ||
has(invalidIdentifiers, topLevelIdentifier)
);
//check if node should be updated
if (shouldUpdateNode && identifierArray[index].name === oldName) {
//Replace the oldName by newName
//Get start index from node and get subarray from index 0 till start
//Append above with new name
//Append substring from end index from the node till end of string
//Offset variable is used to alter the position based on `refactorOffset`
refactorScript =
refactorScript.substring(
0,
identifierArray[index].start + refactorOffset
) +
newName +
refactorScript.substring(identifierArray[index].end + refactorOffset);
refactorOffset += nameLengthDiff;
++refactorCount;
}
if (isIdentifierNode(candidateTopLevelNode)) {
// If the node is an Identifier, just save that
references.add(candidateTopLevelNode.name);
} else {
// For MemberExpression Nodes, we will construct a final reference string and then add
// it to the references list
const memberExpIdentifier = constructFinalMemberExpIdentifier(
candidateTopLevelNode
);
references.add(memberExpIdentifier);
}
},
VariableDeclarator(node: Node) {
// keep a track of declared variables so they can be
// removed from the final list of references
if (isVariableDeclarator(node)) {
variableDeclarations.add(node.id.name);
}
},
FunctionDeclaration(node: Node) {
// params in function declarations are also counted as references so we keep
// track of them and remove them from the final list of references
if (!isFunctionDeclaration(node)) return;
functionalParams = new Set([
...functionalParams,
...getFunctionalParamNamesFromNode(node),
]);
},
FunctionExpression(node: Node) {
// params in function expressions are also counted as references so we keep
// track of them and remove them from the final list of references
if (!isFunctionExpression(node)) return;
functionalParams = new Set([
...functionalParams,
...getFunctionalParamNamesFromNode(node),
]);
},
ArrowFunctionExpression(node: Node) {
// params in arrow function expressions are also counted as references so we keep
// track of them and remove them from the final list of references
if (!isArrowFunctionExpression(node)) return;
functionalParams = new Set([
...functionalParams,
...getFunctionalParamNamesFromNode(node),
]);
},
});
const referencesArr = Array.from(references).filter((reference) => {
// To remove references derived from declared variables and function params,
// We extract the topLevelIdentifier Eg. Api1.name => Api1
const topLevelIdentifier = toPath(reference)[0];
return !(
functionalParams.has(topLevelIdentifier) ||
variableDeclarations.has(topLevelIdentifier) ||
has(invalidIdentifiers, topLevelIdentifier)
);
});
return {
references: referencesArr,
functionalParams: Array.from(functionalParams),
variables: Array.from(variableDeclarations),
};
return shouldUpdateNode;
});
return { script: refactorScript, count: refactorCount };
} catch (e) {
if (e instanceof SyntaxError) {
// Syntax error. Ignore and return empty list
return "Syntax Error";
}
throw e;
}
};
export type functionParam = { paramName: string; defaultValue: unknown };
@ -515,3 +494,107 @@ export const extractInvalidTopLevelMemberExpressionsFromCode = (
return invalidTopLevelMemberExpressionsArray;
};
const ancestorWalk = (ast: Node): NodeList => {
//List of all Identifier nodes
const identifierList = new Array<IdentifierNode>();
// List of all references found
const references = new Set<string>();
// List of variables declared within the script. All identifiers and member expressions derived from declared variables will be removed
const variableDeclarations = new Set<string>();
// List of functional params declared within the script. All identifiers and member expressions derived from functional params will be removed
let functionalParams = new Set<string>();
/*
* We do an ancestor walk on the AST in order to extract all references. For example, for member expressions and identifiers, we need to know
* what surrounds the identifier (its parent and ancestors), ancestor walk will give that information in the callback
* doc: https://github.com/acornjs/acorn/tree/master/acorn-walk
*/
ancestor(ast, {
Identifier(node: Node, ancestors: Node[]) {
/*
* We are interested in identifiers. Due to the nature of AST, Identifier nodes can
* also be nested inside MemberExpressions. For deeply nested object references, there
* could be nesting of many MemberExpressions. To find the final reference, we will
* try to find the top level MemberExpression that does not have a MemberExpression parent.
* */
let candidateTopLevelNode: IdentifierNode | MemberExpressionNode =
node as IdentifierNode;
let depth = ancestors.length - 2; // start "depth" with first parent
while (depth > 0) {
const parent = ancestors[depth];
if (
isMemberExpressionNode(parent) &&
/* Member expressions that are "computed" (with [ ] search)
and the ones that have optional chaining ( a.b?.c )
will be considered top level node.
We will stop looking for further parents */
/* "computed" exception - isArrayAccessorNode
Member expressions that are array accessors with static index - [9]
will not be considered top level.
We will continue looking further. */
(!parent.computed || isArrayAccessorNode(parent)) &&
!parent.optional
) {
candidateTopLevelNode = parent;
depth = depth - 1;
} else {
// Top level found
break;
}
}
identifierList.push(node as IdentifierNode);
if (isIdentifierNode(candidateTopLevelNode)) {
// If the node is an Identifier, just save that
references.add(candidateTopLevelNode.name);
} else {
// For MemberExpression Nodes, we will construct a final reference string and then add
// it to the references list
const memberExpIdentifier = constructFinalMemberExpIdentifier(
candidateTopLevelNode
);
references.add(memberExpIdentifier);
}
},
VariableDeclarator(node: Node) {
// keep a track of declared variables so they can be
// removed from the final list of references
if (isVariableDeclarator(node)) {
variableDeclarations.add(node.id.name);
}
},
FunctionDeclaration(node: Node) {
// params in function declarations are also counted as references so we keep
// track of them and remove them from the final list of references
if (!isFunctionDeclaration(node)) return;
functionalParams = new Set([
...functionalParams,
...getFunctionalParamNamesFromNode(node),
]);
},
FunctionExpression(node: Node) {
// params in function expressions are also counted as references so we keep
// track of them and remove them from the final list of references
if (!isFunctionExpression(node)) return;
functionalParams = new Set([
...functionalParams,
...getFunctionalParamNamesFromNode(node),
]);
},
ArrowFunctionExpression(node: Node) {
// params in arrow function expressions are also counted as references so we keep
// track of them and remove them from the final list of references
if (!isArrowFunctionExpression(node)) return;
functionalParams = new Set([
...functionalParams,
...getFunctionalParamNamesFromNode(node),
]);
},
});
return {
references,
functionalParams,
variableDeclarations,
identifierList,
};
};

View File

@ -6,6 +6,8 @@ export function sanitizeScript(js: string, evaluationVersion: number) {
// We remove any line breaks from the beginning of the script because that
// makes the final function invalid. We also unescape any escaped characters
// so that eval can happen
//default value of evalutaion version is 2
evaluationVersion = evaluationVersion ? evaluationVersion : 2;
const trimmedJS = js.replace(beginsWithLineBreakRegex, '');
return evaluationVersion > 1 ? trimmedJS : unescapeJS(trimmedJS);
}