DeveloperBreeze

Tutorials Programming Tutorials, Guides & Best Practices

Explore 149+ expertly crafted tutorials tutorials, components, and code examples. Stay productive and build faster with proven implementation strategies and design patterns from DeveloperBreeze.

Implementing a Domain-Specific Language (DSL) with LLVM and C++

Tutorial February 12, 2025

#include "DSL/Lexer.h"
#include <cctype>
#include <cstdlib>

Lexer::Lexer(const std::string& input) : input(input) {}

char Lexer::currentChar() {
    if (pos < input.size()) {
        return input[pos];
    }
    return '\0';
}

void Lexer::advance() {
    pos++;
}

void Lexer::skipWhitespace() {
    while (std::isspace(currentChar())) {
        advance();
    }
}

Token Lexer::number() {
    size_t start = pos;
    while (std::isdigit(currentChar()) || currentChar() == '.') {
        advance();
    }
    std::string numStr = input.substr(start, pos - start);
    double value = std::strtod(numStr.c_str(), nullptr);
    return { TokenType::Number, numStr, value };
}

Token Lexer::getNextToken() {
    skipWhitespace();

    char current = currentChar();

    if (current == '\0') {
        return { TokenType::EndOfFile, "", 0 };
    }
    if (std::isdigit(current) || current == '.') {
        return number();
    }

    Token token;
    token.text = std::string(1, current);
    token.value = 0;
    switch (current) {
        case '+': token.type = TokenType::Plus; break;
        case '-': token.type = TokenType::Minus; break;
        case '*': token.type = TokenType::Asterisk; break;
        case '/': token.type = TokenType::Slash; break;
        case '(': token.type = TokenType::LParen; break;
        case ')': token.type = TokenType::RParen; break;
        default: token.type = TokenType::Invalid; break;
    }
    advance();
    return token;
}

We’ll implement a recursive-descent parser that constructs an Abstract Syntax Tree (AST) from tokens. Our grammar is defined with standard operator precedence: