EXPERIMENT- 2
Lexical Analysis and Symbol Table Generation
M SANGEETHGOWTHAM
22BLC1386
AIM
Implement a lexical analyzer capable of tokenizing a simple programming language with
constructs like keywords, numbers, identifiers, operators, punctuators, and boolean
literals.
Classify tokens based on their types such as:
● Keywords
● Numbers (integer values)
● Identifiers (variables and function names)
● Operators
● Punctuators
● Boolean literals
DESIGN CONSTRAINTS
Implement a lexical analyzer capable of tokenizing a simple programming language with
constructs like keywords, numbers, identifiers, operators, punctuators, and boolean
literals.
Classify tokens based on their types such as:
● Keywords (e.g., if, else, int, return)
● Numbers (integer values)
● Identifiers (variables and function names)
● Operators (e.g., +, -, =)
● Punctuators (e.g., ;)
● Boolean literals (e.g., true, false)
Output Format:
● The tokenized representation of each statement will be printed.
● The symbol table will be displayed at the end of the process, listing all unique
identifiers encountered with their respective types.
● A validation message (Valid statement. or Invalid statement.) will be
shown for each processed statement.
Assumptions:
● The language syntax is simplified for the purposes of this lab and does not
include complex constructs like loops, conditionals, or functions.
● Only simple assignments are considered valid statements for this project.
Constraints on Token Length:
● Identifiers are assumed to be no longer than 50 characters.
● Numbers are assumed to be positive integers (no floating-point numbers or
negative values).
● Only single-character operators and basic punctuators (such as ;) are
considered.
CODE
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <map>
#include <string>
#include <stdexcept>
#include <cctype>
#include <iomanip>
using namespace std;
struct SymbolInfo {
string type;
double value;
};
struct LexicalEntry {
string name;
string type;
string category;
};
void vectorise(string st, char split, vector<vector<string>>&
compiled_stream) {
stringstream ss(st);
vector<string> split_vector;
string token;
while (getline(ss, token, split)) {
if(!token.empty()){
split_vector.push_back(token);
compiled_stream.push_back(split_vector);
bool isNumeric(const string& str) {
if (str.empty()) return false;
bool hasDecimal = false;
for (char c : str) {
if (!isdigit(c) && c != '-' && c != '.') {
return false;
if (c == '.') {
if (hasDecimal) {
return false;
hasDecimal = true;
}
return true;
double evaluateExpression(const string& expression, const
map<string, SymbolInfo>& symbolTable) {
if (isNumeric(expression)) {
return stod(expression);
} else if (symbolTable.count(expression)) {
return symbolTable.at(expression).value;
} else {
throw runtime_error("Invalid expression: " + expression);
void generateLexicalTable(const vector<vector<string>>&
compiled_stream, map<int, LexicalEntry>& lexicalTable) {
int nextId = 1;
// Keywords
lexicalTable[nextId++] = {"int", "Keyword", "Reserved"};
lexicalTable[nextId++] = {"float", "Keyword", "Reserved"};
//Operators
lexicalTable[nextId++] = {"=", "Operator", "Symbol"};
lexicalTable[nextId++] = {"+", "Operator", "Symbol"};
lexicalTable[nextId++] = {"-", "Operator", "Symbol"};
for (const auto& tokens : compiled_stream) {
for (const auto& token: tokens) {
bool found = false;
for (const auto& pair : lexicalTable){
if(pair.second.name == token){
found = true;
break;
if (!found) {
if (isNumeric(token)) {
lexicalTable[nextId++] = {token, "Literal", "Constant"};
} else {
lexicalTable[nextId++] = {token, "Identifier", "Variable"};
}
void processTokens(const vector<vector<string>>&
compiled_stream, const map<int, LexicalEntry>& lexicalTable,
map<string, SymbolInfo>& symbolTable, vector<string>&
tokenStream) {
for (const auto& tokens : compiled_stream) {
string lineTokenStream;
for(const auto& token : tokens){
for (const auto& pair : lexicalTable){
if (pair.second.name == token){
if(pair.second.type == "Keyword"){
lineTokenStream += "<" + to_string(pair.first) + ">";
else if(pair.second.type == "Identifier"){
lineTokenStream += "<id" + to_string(pair.first) + ">";
else if(pair.second.type == "Operator"){
lineTokenStream += "<op" + to_string(pair.first) +
">";
else if (pair.second.type == "Literal"){
lineTokenStream += "<" + to_string(pair.first) + ">";
break;
tokenStream.push_back(lineTokenStream);
if (tokens.size() >= 3) {
if ((tokens[0] == "int" || tokens[0] == "float") && tokens[2] ==
"=" ) {
string type = tokens[0];
string name = tokens[1];
string value_str = tokens[3];
if (symbolTable.count(name)) {
throw runtime_error("Variable " + name + " already
declared.");
} else if (!isNumeric(value_str)) {
throw runtime_error("Initial value must be an integer or
float: " + value_str);
} else {
double value = stod(value_str);
symbolTable[name] = { type, value };
} else if(tokens.size() >= 3 && tokens[1] == "="){
string name = tokens[0];
if(symbolTable.count(name)){
string expression = tokens[2];
try {
double newValue = evaluateExpression(expression,
symbolTable);
symbolTable[name].value = newValue;
} catch(const runtime_error& err){
throw runtime_error("Error calculating the value: " +
std::string(err.what()) );
} else {
throw runtime_error("Variable " + name + " not declared
before being used.");
} else {
if (!tokens.empty()) {
throw runtime_error("Invalid syntax found near token " +
tokens[0] + ".");
int main() {
vector<vector<string>> compiled_stream;
string txt;
ifstream MyReadFile("tokenise.txt");
if (!MyReadFile.is_open()) {
cerr << "Error: Could not open file tokenise.txt\n";
return 1;
}
while (getline(MyReadFile, txt)) {
vectorise(txt, ' ', compiled_stream);
map<int, LexicalEntry> lexicalTable;
generateLexicalTable(compiled_stream, lexicalTable);
cout << "Lexical Table:\n";
cout << setw(5) << "ID" << setw(10) << "Name" << setw(15) <<
"Type" << setw(15) << "Category" << endl;
for (const auto& pair : lexicalTable) {
cout << setw(5) << pair.first
<< setw(10) << pair.second.name
<< setw(15) << pair.second.type
<< setw(15) << pair.second.category << endl;
cout << endl;
map<string, SymbolInfo> symbolTable;
vector<string> tokenStream;
try{
processTokens(compiled_stream, lexicalTable, symbolTable,
tokenStream);
} catch(const runtime_error& error){
cerr << "Error: " << error.what() << endl;
return 1;
cout << "Symbol Table:\n";
cout << "Id_name\tType\tValue\n";
for (const auto& pair : symbolTable) {
cout << pair.first << "\t" << pair.second.type << "\t" <<
pair.second.value << endl;
cout << endl;
cout << "Token Stream (Line by Line):\n";
for(const auto& line: tokenStream){
cout << line << endl;
}
cout << endl;
return 0;
INPUT FILE
OUTPUT