跳至正文

编译原理lab2

内容目录

编译原理lab2

1 对如下PL/0源程序,列出在词法分析过程中,该源程序被解读出的一个个字符串以及与此相对应的变量id、num、sym的内容。只写到procedure p即可,以表格形式给出。

const a=27,b=80;
var x,y,z;
procedure p;
 var f;
 begin
 f:=(a+b)/3;
 end;
begin
 x:=3; call p;
end.
序号 字符串 id num sym
1 const const \ constsym
2 a a \ ident
3 = \ \ eql
4 27 \ 27 number
5 , \ \ comma
6 b b \ ident
7 = \ \ eql
8 80 \ 80 number
9 ; \ \ semicolon
10 var var \ varsym
11 x x \ ident
12 , \ \ comma
13 y y \ ident
14 , \ \ comma
15 z z \ ident
16 ; \ \ semicolon
17 procedure procedure \ procsym
18 p p \ ident
19 ; \ \ semicolon

2 对提供的PL/0编译器源代码(C语言版)进行裁减和改写,实现一个PL/0语言的独立词法分析程序。该词法分析程序读入PL/0 语言源程序,输出一个单词符号的序列:对于标识符和无符号整数,显示单词种别和单词自身的值两项内容;对于其他单词符号,仅显示其单词种别。(1)需要提交裁减和改写后的C源程序,请删除所有与词法分析无关的成分,显示输出格式自定;(2)输入源程序如下,给出词法分析程序输出截图。

var m, sum;
procedure fsum;
begin
if m>0 then
 begin
  sum:=sum+m;
  m:=m-1;
  call fsum
 end;
end;
begin
  m:=100;
  call fsum;
  write(sum)
end.

第2题修改后代码如下:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#define bool int
#define true 1
#define false 0

#define norw 13       /* 保留字个数 */
#define txmax 100     /* 符号表容量 */
#define nmax 14       /* 数字的最大位数 */
#define al 10         /* 标识符的最大长度 */

 /* 符号 */
enum symbol {
    nul, ident, number, plus, minus,
    times, slash, oddsym, eql, neq,
    lss, leq, gtr, geq, lparen,
    rparen, comma, semicolon, period, becomes,
    beginsym, endsym, ifsym, thensym, whilesym,
    writesym, readsym, dosym, callsym, constsym,
    varsym, procsym,
};
#define symnum 32

//将sym由int映射到string
const char* sym_int2str[] = {"nul","ident","number","plus","minus",
                      "times","slash","oddsym","eql","neq",
                      "lss","leq","gtr","geq","lparen",
                      "rparen","comma","semicolon","period","becomes",
                      "beginsym","endsym","ifsym","thensym","whilesym",
                      "writesym","readsym","dosym","callsym","constsym",
                      "varsym","procsym"};

/* 符号表中的类型 */
enum object {
    constant,
    variable,
    procedure,
};

char ch;            /* 存放当前读取的字符,getch 使用 */
enum symbol sym;    /* 当前的符号 */
char id[al + 1];      /* 当前ident,多出的一个字节用于存放0 */
int num;            /* 当前number */
int cc, ll;         /* getch使用的计数器,cc表示当前字符(ch)的位置 */
char line[81];      /* 读取行缓冲区 */
char a[al + 1];       /* 临时符号,多出的一个字节用于存放0 */
char word[norw][al];        /* 保留字 */
enum symbol wsym[norw];     /* 保留字对应的符号值 */
enum symbol ssym[256];      /* 单字符的符号值 */

FILE* fin;      /* 输入源文件 */
FILE* foutput;  /* 输出文件及出错示意(如有错) */
char fname[al];

void getsym();
void getch();
void init();

/* 主程序开始 */
int main()
{
    printf("Input pl/0 file?   ");
    scanf("%s", fname);       /* 输入文件名 */

    if ((fin = fopen(fname, "r")) == NULL)
    {
        printf("Can't open the input file!\n");
        exit(1);
    }

    ch = fgetc(fin);
    if (ch == EOF)    /* 文件为空 */
    {
        printf("The input file is empty!\n");
        fclose(fin);
        exit(1);
    }
    rewind(fin);

    if ((foutput = fopen("foutput.txt", "w")) == NULL)
    {
        printf("Can't open the output file!\n");
        exit(1);
    }

    init();     /* 初始化 */
    cc = ll = 0;
    ch = ' ';

    do getsym();
    while (sym != period);

    fclose(foutput);
    fclose(fin);

    return 0;
}

/*
 * 初始化
 */
void init()
{
    int i;

    /* 设置单字符符号 */
    for (i = 0; i <= 255; i++)
    {
        ssym[i] = nul;
    }
    ssym['+'] = plus;
    ssym['-'] = minus;
    ssym['*'] = times;
    ssym['/'] = slash;
    ssym['('] = lparen;
    ssym[')'] = rparen;
    ssym['='] = eql;
    ssym[','] = comma;
    ssym['.'] = period;
    ssym['#'] = neq;
    ssym[';'] = semicolon;

    /* 设置保留字名字,按照字母顺序,便于二分查找 */
    strcpy(&(word[0][0]), "begin");
    strcpy(&(word[1][0]), "call");
    strcpy(&(word[2][0]), "const");
    strcpy(&(word[3][0]), "do");
    strcpy(&(word[4][0]), "end");
    strcpy(&(word[5][0]), "if");
    strcpy(&(word[6][0]), "odd");
    strcpy(&(word[7][0]), "procedure");
    strcpy(&(word[8][0]), "read");
    strcpy(&(word[9][0]), "then");
    strcpy(&(word[10][0]), "var");
    strcpy(&(word[11][0]), "while");
    strcpy(&(word[12][0]), "write");

    /* 设置保留字符号 */
    wsym[0] = beginsym;
    wsym[1] = callsym;
    wsym[2] = constsym;
    wsym[3] = dosym;
    wsym[4] = endsym;
    wsym[5] = ifsym;
    wsym[6] = oddsym;
    wsym[7] = procsym;
    wsym[8] = readsym;
    wsym[9] = thensym;
    wsym[10] = varsym;
    wsym[11] = whilesym;
    wsym[12] = writesym;
}

/*
 * 过滤空格,读取一个字符
 * 每次读一行,存入line缓冲区,line被getsym取空后再读一行
 * 被函数getsym调用
 */
void getch()
{
    if (cc == ll) /* 判断缓冲区中是否有字符,若无字符,则读入下一行字符到缓冲区中 */
    {
        printf("-----------------------\n");
        fprintf(foutput,"-----------------------\n");
        printf("Current line : ");
        fprintf(foutput,"Current line : ");
        if (feof(fin))
        {
            printf("Program incomplete!\n");
            exit(1);
        }
        ll = 0;
        cc = 0;

        ch = ' ';
        while (ch != 10)
        {
            if (EOF == fscanf(fin, "%c", &ch))
            {
                line[ll] = 0;
                break;
            }

            printf("%c", ch);
            fprintf(foutput, "%c", ch);
            line[ll] = ch;
            ll++;
        }
    }
    ch = line[cc];
    cc++;
}

/*
 * 词法分析,获取一个符号
 */
void getsym()
{
    int i, j, k;

    while (ch == ' ' || ch == 10 || ch == 9)  /* 过滤空格、换行和制表符 */
    {
        getch();
    }
    if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) /* 当前的单词是标识符或是保留字 */
    {
        k = 0;
        do {
            if (k < al)
            {
                a[k] = ch;
                k++;
            }
            printf("%c",ch);
            fprintf(foutput,"%c",ch);
            getch();
        } while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'));
        a[k] = 0;
        strcpy(id, a);
        i = 0;
        j = norw - 1;
        do {    /* 搜索当前单词是否为保留字,使用二分法查找 */
            k = (i + j) / 2;
            if (strcmp(id, word[k]) <= 0)
            {
                j = k - 1;
            }
            if (strcmp(id, word[k]) >= 0)
            {
                i = k + 1;
            }
        } while (i <= j);
        if (i - 1 > j) /* 当前的单词是保留字 */
        {
            sym = wsym[k];
        }
        else /* 当前的单词是标识符 */
        {
            sym = ident;
        }
        printf(" : id = %s, sym = %s\n",id,sym_int2str[sym]);
        fprintf(foutput," : id = %s, sym = %s\n",id,sym_int2str[sym]);
    }
    else
    {
        if (ch >= '0' && ch <= '9') /* 当前的单词是数字 */
        {
            k = 0;
            num = 0;
            sym = number;
            do {
                num = 10 * num + ch - '0';
                k++;
                printf("%c",ch);
                fprintf(foutput,"%c",ch);
                getch();
            } while (ch >= '0' && ch <= '9'); /* 获取数字的值 */
            k--;

            printf(" : sym = %s,num = %d\n",sym_int2str[sym],num);
            fprintf(foutput," : sym = %s,num = %d\n",sym_int2str[sym],num);
        }
        else
        {
            if (ch == ':')        /* 检测赋值符号 */
            {
                printf("%c",ch);
                fprintf(foutput,"%c",ch);
                getch();
                if (ch == '=')
                {
                    sym = becomes;
                    printf("%c",ch);
                    fprintf(foutput,"%c",ch);
                    getch();
                    printf(" : sym = %s\n",sym_int2str[sym]);
                    fprintf(foutput," : sym = %s\n",sym_int2str[sym]);
                }
                else
                {
                    sym = nul;  /* 不能识别的符号 */
                }
            }
            else
            {
                if (ch == '<')     /* 检测小于或小于等于符号 */
                {
                    printf("%c",ch);
                    fprintf(foutput,"%c",ch);
                    getch();
                    if (ch == '=')
                    {
                        sym = leq;
                        printf("%c",ch);
                        fprintf(foutput,"%c",ch);
                        getch();
                    }
                    else
                    {
                        sym = lss;
                    }
                    printf(" : sym = %s\n",sym_int2str[sym]);
                    fprintf(foutput," : sym = %s\n",sym_int2str[sym]);
                }
                else
                {
                    if (ch == '>')     /* 检测大于或大于等于符号 */
                    {
                        printf("%c",ch);
                        fprintf(foutput,"%c",ch);
                        getch();
                        if (ch == '=')
                        {
                            sym = geq;
                            printf("%c",ch);
                            fprintf(foutput,"%c",ch);
                            getch();
                        }
                        else
                        {
                            sym = gtr;
                        }
                        printf(" : sym = %s\n",sym_int2str[sym]);
                        fprintf(foutput," : sym = %s\n",sym_int2str[sym]);
                    }
                    else
                    {
                        sym = ssym[ch];     /* 当符号不满足上述条件时,全部按照单字符符号处理 */
                        printf("%c",ch);
                        fprintf(foutput,"%c",ch);
                        if (sym != period)
                        {
                            getch();
                        }
                        printf(" : sym = %s\n",sym_int2str[sym]);
                        fprintf(foutput," : sym = %s\n",sym_int2str[sym]);
                    }
                }
            }
        }
    }
}

代码见https://gitee.com/WilliamSamShen/complier-concepts/blob/master/Lab/lab2/ex1_1/src/pl0compiler_syntax_analysis_simplified.c

输出截图如下:

a4200a5887e0771500081ad3368529f6.png
5534a850cf742bb0c27430a61c32489a.png

实验代码链接

https://gitee.com/WilliamSamShen/complier-concepts/tree/master/Lab/lab2