编译原理-LLVM IR语法

问题1: getelementptr

请给出 IR.md 中提到的两种 getelementptr 用法的区别,并稍加解释:

%2 = getelementptr [10 x i32], [10 x i32]* %1, i32 0, i32 %0
%2 = getelementptr i32, i32* %1, i32 %0

两种GEP都是取数组中的特定元素的地址指针，区别在于（1）计算基础类型为数组指针，（2）中的计算基础类型为元素指针， (1)中的用法是借助长度为10，数据类型为i32的数组指针取其中某个元素的地址指针，其中数据索引的偏移有两个，因为数据类型是[10 x i32],第一个偏移对应的内存偏移量为偏移值（0） x 10 x 4，第二个偏移对应的内存偏移量为偏移值（%0） x 4，偏移量与原地址相加计算得到的数据存入%2中，类型为i32；（2）中的用法是通过元素指针和偏移值得到某个元素的地址指针，数据类型为i32，因此只有一个偏移值，对应的内存偏移量为偏移值（%0） x 4，偏移量与原地址相加计算得到的数据存入%2中，类型为i32。

问题2: cpp 与 .ll 的对应

assign

不含有转移指令，因此整个函数主体部分与.ll文件main函数部分对应

fun

   std::vector<Type *> Ints(1, Int32Type);
   auto calleeFunType = FunctionType::get(Int32Type, Ints);
  auto calleeFun = Function::create(calleeFunType,
                                  "callee", module);
  auto bb = BasicBlock::create(module, "entry", calleeFun);
  builder->set_insert_point(bb);
  std::vector<Value *> args;  
  for (auto arg = calleeFun->arg_begin(); arg != calleeFun->arg_end(); arg++) {
    args.push_back(*arg);  
  }
  auto temp = builder->create_imul(CONST_INT(2),args[0]);
  builder->create_ret(temp);
/*对应：
define dso_local i32 @callee (i32 %0) #0 {
    %2 = mul i32 2,%0
    ret i32 %2
}
*/
  auto mainFun = Function::create(FunctionType::get(Int32Type, {}),
                                  "main", module);
  bb = BasicBlock::create(module, "entry", mainFun);
  builder->set_insert_point(bb);
  auto call = builder->create_call(calleeFun, {CONST_INT(110)});
/*对应：
define dso_local i32 @main () #0{
    %1 = call i32 @callee(i32 110)
*/
  builder->create_ret(call);
/*对应：
    ret i32 %1
*/

 auto mainFun = Function::create(FunctionType::get(Int32Type, {}),
                                  "main", module);
  auto bb = BasicBlock::create(module, "entry", mainFun);
  builder->set_insert_point(bb);

  auto aAlloca = builder->create_alloca(FloatTypenum);
  builder->create_store(CONST_FP(5.555),aAlloca);
  auto aload = builder->create_load(aAlloca);
  auto fcmp =  builder->create_fcmp_gt(aload,CONST_FP(1.0));
  auto trueBB = BasicBlock::create(module, "trueBB", mainFun);  
  auto falseBB = BasicBlock::create(module, "falseBB", mainFun);
  auto br = builder->create_cond_br(fcmp, trueBB, falseBB); 
/*对应：
    %1 = alloca float
    store float 0x40163851E0000000, float* %1
    %2 = load float, float* %1
    %3 = fcmp ogt float %2,1.000000e+00
    br i1 %3, label %4, label %5
*/
  builder->set_insert_point(trueBB); 
  builder->create_ret(CONST_INT(233));
/*对应：
    4:
    ret i32 233
*/
  builder->set_insert_point(falseBB);
  builder->create_ret(CONST_INT(0));
/*对应：
	5:
    ret i32 0
*/

while

 
  auto aAlloca = builder->create_alloca(Int32Type);
  auto iAlloca = builder->create_alloca(Int32Type);
  builder->create_store(CONST_INT(10),aAlloca);
  builder->create_store(CONST_INT(0),iAlloca);
  builder->create_br(loopBB);
/*对应：
	%1 = alloca i32
    store i32 10,i32* %1
    %2 = alloca i32
    store i32 0,i32* %2
    br label %3
 */
  builder->set_insert_point(loopBB);
  auto iLoad = builder->create_load(iAlloca); 
  auto icmp =  builder->create_icmp_lt(iLoad,CONST_INT(10));
  auto br = builder->create_cond_br(icmp, trueBB, falseBB); 
/*对应：
    3:
    %4 = load i32, i32* %2
    %5 = icmp slt i32 %4,10
    br i1 %5,label %6,label %11
*/
  builder->set_insert_point(trueBB); 
  auto temp = builder->create_iadd(iLoad,CONST_INT(1));
  builder->create_store(temp,iAlloca);
  iLoad = builder->create_load(iAlloca);
  auto aLoad = builder->create_load(aAlloca);
  auto temp1 = builder->create_iadd(aLoad,iLoad);
  builder->create_store(temp1,aAlloca);
  builder->create_br(loopBB);
/*对应：
    6:
    %7 = add i32 %4,1
    store i32 %7, i32* %2
    %8 = load i32, i32* %2
    %9 = load i32, i32* %1
    %10 = add i32 %8,%9
    store i32 %10, i32* %1
    br label %3
 */
  builder->set_insert_point(falseBB); 
  aLoad = builder->create_load(aAlloca);
  builder->create_ret(aLoad);
 /*  对应：
    11:
    %12 = load i32, i32* %1
    ret i32 %12
*/

问题3: Visitor Pattern

分析 calc 程序在输入为 4 * (8 + 4 - 1) / 2 时的行为：

请画出该表达式对应的抽象语法树（使用 calc_ast.hpp 中的 CalcAST* 类型和在该类型中存储的值来表示），并给节点使用数字编号。
请指出示例代码在用访问者模式遍历该语法树时的遍历顺序。

序列请按如下格式指明（序号为问题 3.1 中的编号）： 3->2->5->1

抽象语法树：

遍历顺序：

1->2->3->4->6->8->7->9->11->14->16->12->15->10->13->5

实验要求

本次实验的核心任务是使用访问者模式来实现cminus-f 语言的LLVM IR 的自动生成。

实验难点

数据类型不匹配时及时进行数据转换，如以下情况
- assign语句
- 比较/计算等二元运算的参数类型不一致
- 计算数组下标的结果并非int型
- 函数参数类型和调用时的实参类型不匹配
- 函数返回值的表达式和类型不匹配
其中int,float之间的转换需要的函数为create_sitofp,create_fptosi,int1和int32之间的转换需要的函数为create_zext,create_icmp_ne(*,CONST_INT(0))。
区别元素值和元素地址，二者存储的数据类型均为Value *类型，本实验中，设置两个全局变量cur_val,cur_expr分别存储地址和值，其中地址仅在指针/数组运算以及赋值语句被用到。
区分指针类型和数组类型，根据lab2问题1，获取元素的地址有两种方法，分别是作为指针根据基元素的值进行运算，和作为数组类型的指针根据基元素的地址进行计算，在调用时要加以区分。
函数形参和实参的处理

实验设计

全局变量

number

在对基本块进行命名时，如果程序中存在多个类型相同的基本块，则会发生冲突，因此设置一个int类型的全局变量，每遇到需要显示命名的基本块，则将当前number的值转化为字符串加入基本块的命名中(std::to_string(number++))。
cur_val & cur_expr

cur_val记录当前数据对应的地址，如运行赋值语句时，右值计算的数值将存储到左值中对应的地址中，以及根据基地址计算数组某个元素对应的位置； cur_expr记录当前数据对应的值的拷贝，用于各类表达式的计算和赋值，调用create_load(cur_val)即可得到当前数据地址的对应的值。

优化

SelectionStmt部分，在trueBB，falseBB对应的语句执行结束后，如果在执行完的基本块中没有终止语句，则不会产生向下一个基本块的跳转。

int exist=0;  
if(builder->get_insert_block()->get_terminator()==nullptr)
  {
    retBB=BasicBlock::create(module.get(),"retBB"+std::to_string(number),cur_fun);
    builder->create_br(retBB);
    exist=1;
  }
if(builder->get_insert_block()->get_terminator()==nullptr)
{
    if(exist==0)
    {
      retBB=BasicBlock::create(module.get(),"retBB"+std::to_string(number),cur_fun);
      builder->create_br(retBB);
      exist=1;
    }
 	else  builder->create_br(retBB);
}

iteration同理

数组负下标检测,调用neg_idx_expect函数，仅对负下标进行检测，不会对数组的上界进行检查（即越界访问）

auto b=builder->create_icmp_lt(cur_expr,CONST_INT(0));
auto trueBB=...;
autofalseBB=...;
auto br=builder->create_cond_br(b,trueBB,falseBB);
builder->set_insert_point(trueBB);
auto err=scope.find("neg_idx_except");
builder->create_call(err,{});        
builder->create_br(falseBB);
builder->set_insert_point(falseBB);

函数声明部分，对函数参数的处理仅在FunDeclaration部分完成，未调用ASTParam部分。

实验总结

实验框架的基本思路是访问者模式实现LR的自动生成，和lab2中问题三的思路类似。
加深了对数组和指针类型的理解，以及对基本块，跳转语句等的理解
对C++的语法掌握更多