In this section and the next we implement the code generators to which the compile function dispatches.
In general, the output of each code generator will end with instructions—generated by the function compile_linkage—that implement the required linkage. If the linkage is "return" then we must generate the instruction go_to(reg("continue")). This needs the continue register and does not modify any registers. If the linkage is "next", then we needn't include any additional instructions. Otherwise, the linkage is a label, and we generate a go_to to that label, an instruction that does not need or modify any registers.
function compile_linkage(linkage) {
return linkage === "return"
? make_instruction_sequence(list("continue"), null,
list(go_to(reg("continue"))))
: linkage === "next"
? make_instruction_sequence(null, null, null)
: make_instruction_sequence(null, null,
list(go_to(label(linkage))));
}
function end_with_linkage(linkage, instruction_sequence) {
return preserving(list("continue"),
instruction_sequence,
compile_linkage(linkage));
}The code generators for literal expressions and names construct instruction sequences that assign the required value to the target register and then proceed as specified by the linkage descriptor.
The literal value is extracted at compile time from the component being compiled and put into the constant part of the assign instruction. For a name, an instruction is generated to use the lookup_symbol_value operation when the compiled program is run, to look up the value associated with a symbol in the current environment. Like a literal value, the symbol is extracted at compile time from the component being compiled. Thus symbol_of_name(component) is executed only once, when the program is being compiled, and the symbol appears as a constant in the assign instruction.
function compile_literal(component, target, linkage) {
const literal = literal_value(component);
return end_with_linkage(linkage,
make_instruction_sequence(null, list(target),
list(assign(target, constant(literal)))));
}
function compile_name(component, target, linkage) {
const symbol = symbol_of_name(component);
return end_with_linkage(linkage,
make_instruction_sequence(list("env"), list(target),
list(assign(target,
list(op("lookup_symbol_value"),
constant(symbol),
reg("env"))))));
}Assignments and declarations are handled much as they are in the interpreter. The function compile_assignment_declaration recursively generates code that computes the value to be associated with the symbol and appends to it a two-instruction sequence that updates the value associated with the symbol in the environment and assigns the value of the whole component (the assigned value for an assignment or undefined for a declaration) to the target register. The recursive compilation has target val and linkage "next" so that the code will put its result into val and continue with the code that is appended after it. The appending is done preserving env, since the environment is needed for updating the symbol–value association and the code for computing the value could be the compilation of a complex expression that might modify the registers in arbitrary ways.
function compile_assignment(component, target, linkage) {
return compile_assignment_declaration(
assignment_symbol(component),
assignment_value_expression(component),
reg("val"),
target, linkage);
}
function compile_declaration(component, target, linkage) {
return compile_assignment_declaration(
declaration_symbol(component),
declaration_value_expression(component),
constant(undefined),
target, linkage);
}
function compile_assignment_declaration(
symbol, value_expression, final_value,
target, linkage) {
const get_value_code = compile(value_expression, "val", "next");
return end_with_linkage(linkage,
preserving(list("env"),
get_value_code,
make_instruction_sequence(list("env", "val"),
list(target),
list(perform(list(op("assign_symbol_value"),
constant(symbol),
reg("val"),
reg("env"))),
assign(target, final_value)))));
}The code for a conditional compiled with a given target and linkage has the form $\langle{}$compilation of predicate, target val, linkage "next"$\rangle$ test(list(op("is_falsy"), reg("val"))), branch(label("false_branch")), "true_branch", $\langle{}$compilation of consequent with given target and given linkage or after_cond$\rangle$ "false_branch", $\langle{}$compilation of alternative with given target and linkage$\rangle$ "after_cond"
To generate this code, we compile the predicate, consequent, and alternative, and combine the resulting code with instructions to test the predicate result and with newly generated labels to mark the true and false branches and the end of the conditional.[1] In this arrangement of code, we must branch around the true branch if the test is false. The only slight complication is in how the linkage for the true branch should be handled. If the linkage for the conditional is "return" or a label, then the true and false branches will both use this same linkage. If the linkage is "next", the true branch ends with a jump around the code for the false branch to the label at the end of the conditional.
function compile_conditional(component, target, linkage) {
const t_branch = make_label("true_branch");
const f_branch = make_label("false_branch");
const after_cond = make_label("after_cond");
const consequent_linkage =
linkage === "next" ? after_cond : linkage;
const p_code = compile(conditional_predicate(component),
"val", "next");
const c_code = compile(conditional_consequent(component),
target, consequent_linkage);
const a_code = compile(conditional_alternative(component),
target, linkage);
return preserving(list("env", "continue"),
p_code,
append_instruction_sequences(
make_instruction_sequence(list("val"), null,
list(test(list(op("is_falsy"), reg("val"))),
branch(label(f_branch)))),
append_instruction_sequences(
parallel_instruction_sequences(
append_instruction_sequences(t_branch, c_code),
append_instruction_sequences(f_branch, a_code)),
after_cond)));
}The compilation of statement sequences parallels their evaluation in the explicit-control evaluator with one exception: If a return statement appears anywhere in a sequence, we treat it as if it were the last statement. Each statement of the sequence is compiled—the last statement (or a return statement) with the linkage specified for the sequence, and the other statements with linkage "next" (to execute the rest of the sequence). The instruction sequences for the individual statements are appended to form a single instruction sequence, such that env (needed for the rest of the sequence) and continue (possibly needed for the linkage at the end of the sequence) are preserved.[2]
function compile_sequence(seq, target, linkage) {
return is_empty_sequence(seq)
? compile_literal(make_literal(undefined), target, linkage)
: is_last_statement(seq) ||
is_return_statement(first_statement(seq))
? compile(first_statement(seq), target, linkage)
: preserving(list("env", "continue"),
compile(first_statement(seq), target, "next"),
compile_sequence(rest_statements(seq),
target, linkage));
}dead codeafter the return statement that can never be executed. Removing the is_return_statement check does not change the behavior of the object program; however, there are many reasons not to compile dead code, which are beyond the scope of this book (security, compilation time, size of the object code, etc.), and many compilers give warnings for dead code.[3]
A block is compiled by prepending an assign instruction to the compiled body of the block. The assignment extends the current environment by a frame that binds the names declared in the block to the value "*unassigned*". This operation both needs and modifies the env register.
function compile_block(stmt, target, linkage) {
const body = block_body(stmt);
const locals = scan_out_declarations(body);
const unassigneds = list_of_unassigned(locals);
return append_instruction_sequences(
make_instruction_sequence(list("env"), list("env"),
list(assign("env", list(op("extend_environment"),
constant(locals),
constant(unassigneds),
reg("env"))))),
compile(body, target, linkage));
}Lambda expressions construct functions. The object code for a lambda expression must have the form $\langle{}$construct function object and assign it to target register$\rangle$ $\langle{}$linkage$\rangle$ When we compile the lambda expression, we also generate the code for the function body. Although the body won't be executed at the time of function construction, it is convenient to insert it into the object code right after the code for the lambda expression. If the linkage for the lambda expression is a label or "return", this is fine. But if the linkage is "next", we will need to skip around the code for the function body by using a linkage that jumps to a label that is inserted after the body. The object code thus has the form $\langle{}$construct function object and assign it to target register$\rangle$ $\langle{}$code for given linkage$\rangle$ $or$ go_to(label("after_lambda")) $\langle{}$compilation of function body$\rangle$ "after_lambda"
The function compile_lambda_expression generates the code for constructing the function object followed by the code for the function body. The function object will be constructed at run time by combining the current environment (the environment at the point of declaration) with the entry point to the compiled function body (a newly generated label).[4]
function compile_lambda_expression(exp, target, linkage) {
const fun_entry = make_label("entry");
const after_lambda = make_label("after_lambda");
const lambda_linkage =
linkage === "next" ? after_lambda : linkage;
return append_instruction_sequences(
tack_on_instruction_sequence(
end_with_linkage(lambda_linkage,
make_instruction_sequence(list("env"),
list(target),
list(assign(target,
list(op("make_compiled_function"),
label(fun_entry),
reg("env")))))),
compile_lambda_body(exp, fun_entry)),
after_lambda);
}The function compile_lambda_body constructs the code for the body of the function. This code begins with a label for the entry point. Next come instructions that will cause the runtime evaluation environment to switch to the correct environment for evaluating the function body—namely, the environment of the function, extended to include the bindings of the parameters to the arguments with which the function is called. After this comes the code for the function body, augmented to ensure that it ends with a return statement. The augmented body is compiled with target val so that its return value will be placed in val. The linkage descriptor passed to this compilation is irrelevant, as it will be ignored.[5] Since a linkage argument is required, we arbitrarily pick "next".
function compile_lambda_body(exp, fun_entry) {
const params = lambda_parameter_symbols(exp);
return append_instruction_sequences(
make_instruction_sequence(list("env", "fun", "argl"),
list("env"),
list(fun_entry,
assign("env",
list(op("compiled_function_env"),
reg("fun"))),
assign("env",
list(op("extend_environment"),
constant(params),
reg("argl"),
reg("env"))))),
compile(append_return_undefined(lambda_body(exp)),
"val", "next"));
}To ensure that all functions end by executing a return statement, compile_lambda_body appends to the lambda body a return statement whose return expression is the literal undefined. To do so, it uses the function append_return_undefined, which constructs the parser's tagged-list representation (from section 4.1.2) of a sequence consisting of the body and a return undefined; statement.
function append_return_undefined(body) {
return list("sequence", list(body,
list("return_statement",
list("literal", undefined))));
}
Hint: The answer depends on how we define dead code. One possible (and useful)
definition is code following a return statement in a sequence
—but
what about code in the consequent
branch of if (false) $\ldots$ or
code following a
call to run_forever() in exercise 4.15?
let label_counter = 0;
function new_label_number() {
label_counter = label_counter + 1;
return label_counter;
}
function make_label(string) {
return string + stringify(new_label_number());
}
function make_compiled_function(entry, env) {
return list("compiled_function", entry, env);
}
function is_compiled_function(fun) {
return is_tagged_list(fun, "compiled_function");
}
function compiled_function_entry(c_fun) {
return head(tail(c_fun));
}
function compiled_function_env(c_fun) {
return head(tail(tail(c_fun)));
}