AALanguage
The best language for those who have nothing to do
Loading...
Searching...
No Matches
LexicalAnalyzer.cpp
Go to the documentation of this file.
1#include "LexicalAnalyzer.h"
2#include <iostream>
3#include <fstream>
4
5bool LexicalAnalyzer::is_service(Trie* service_trie, std::string s) {
6 return service_trie->find(s.begin(), s.end()) != nullptr;
7}
8
9bool LexicalAnalyzer::is_identifier(std::string s) {
10 static const std::regex r(R"([a-zA-Z_][\w]*)");
11 return std::regex_match(s, r);
12}
13
15 if (s == "true" || s == "false") return literal_type::logical;
16
17 static const std::regex integer_r(R"([0-9]{1,}(UL|ul|L|l|I|i|U|u|s|S|US|b|B|ui|UI)?)");
18 if (std::regex_match(s, integer_r)) return literal_type::integer;
19
20 static const std::regex real_r(R"(([0-9]{1,}\.[0-9]*)(e(\+|-)?[0-9]{1,})?(d|D|f|F)?)");
21 if (std::regex_match(s, real_r)) return literal_type::real;
22
23 static const std::regex symbol_r(R"(\'(.|\\n|\\t|\\0|\\'|\\")\')");
24 if (std::regex_match(s, symbol_r)) return literal_type::symbol;
25
26 static const std::regex string_r(R"(".*")");
27 if (std::regex_match(s, string_r)) return literal_type::string;
28
29 return literal_type::none;
30}
31
32bool LexicalAnalyzer::is_operation(std::string s) {
33 return s == "[" || s == "]" || s == "." || s == "++" || s == "--" || s == "+" || s == "-" || s == "!" || s == "~" || s == "*" || s == "/" || s == "%" ||
34 s == ">>" || s == "<<" || s == ">" || s == ">=" || s == "<" || s == "<=" || s == "==" || s == "!=" || s == "&" || s == "^" || s == "|" || s == "->" ||
35 s == "&&" || s == "||" || s == "=" || s == "+=" || s == "-=" || s == "*=" || s == "/=" || s == "%=" || s == ">>=" || s == "<<=" ||
36 s == "&=" || s == "^=" || s == "|=" || s == "->=";
37}
38
39bool LexicalAnalyzer::is_punctuation(std::string s) {
40 return s == ":" || s == ";" || s == "{" || s == "}";
41}
42
43bool LexicalAnalyzer::is_comma(std::string s) {
44 return s == ",";
45}
46
47bool LexicalAnalyzer::is_brace(std::string s) {
48 return s == "(" || s == ")";
49}
50
51bool LexicalAnalyzer::is_comment(std::string s) {
52 static const std::regex r(R"(//.*)");
53 return std::regex_match(s, r);
54}
55
57 if (is_comment(s)) return token_type::comment;
58 if (is_service(service_trie, s)) return token_type::service;
63 if (is_comma(s)) return token_type::comma;
64 if (is_brace(s)) return token_type::brace;
66}
67
68void LexicalAnalyzer::print_token(Trie* service_trie, std::string buffer) {
69 if (buffer.empty()) return;
70 auto type = LexicalAnalyzer::get_token_type(service_trie, buffer);
71 if (type == unknown) {
72 printf(("\x1B[31m(unknown, \"" + buffer + "\", line " + std::to_string(line_) + ")\n\033[0m").c_str());
73 ok_ = false;
74 exit(0);
75 }
76 if (type == comment) return;
77 tokens_.push_back(Token(type, buffer, line_));
78}
79
81 if (curr_ == tokens_.size()) {
82 return Token(unknown, "", -1);
83 }
84 return tokens_[curr_++];
85}
86
88 if (curr_ <= 1) return tokens_[0];
89 return tokens_[--curr_ - 1];
90}
91
93 return ok_;
94}
95
96std::vector<Token> LexicalAnalyzer::get_tokens() {
97 return tokens_;
98}
99
100std::vector<Token> LexicalAnalyzer::merge(std::vector<Token>& f, std::vector<Token>& s, int& fin_ind, int start_ptr) {
101 std::vector<Token> res;
102 for (int i = 0; i < start_ptr; ++i) {
103 res.push_back(f[i]);
104 }
105 for (int i = 0; i < s.size(); ++i) {
106 res.push_back(s[i]);
107 }
108 for (int i = fin_ind + 1; i < f.size(); ++i) {
109 res.push_back(f[i]);
110 }
111 fin_ind = s.size() + start_ptr;
112 return res;
113}
114
115LexicalAnalyzer::LexicalAnalyzer(std::string path, LibParser& lib_parser) {
116 Trie* service_trie = new Trie();
117 std::ifstream file("ServiceWords.txt");
118 std::string str;
119 while (std::getline(file, str)) {
120 if (str.empty()) continue;
121 service_trie->add(str.begin(), str.end());
122 }
123 file.close();
124
125 file = std::ifstream(path);
126
127 str.clear();
128
129 std::string buffer = "";
130 char c;
132 bool prev_quotation = false, prev_single = false;
133 while (file >> std::noskipws >> c) {
134 if (c == '\t') c = ' ';
135
136 if (buffer == "\"")
137 prev_quotation = true;
138 if (buffer == "\'")
139 prev_single = true;
140
141 if (c == ' ' && !prev_quotation && !prev_single || c == '\n' || buffer.empty() && (is_punctuation(std::string(1, c)) ||
142 c == ',' || c == '(' || c == ')')) {
143 if (buffer.empty() && (is_punctuation(std::string(1, c)) || c == ',' || c == '(' || c == ')'))
144 buffer.push_back(c);
145 if (c == '\n') prev_quotation = prev_single = false;
146 if (prev == token_type::comment && c == ' ') continue;
147 print_token(service_trie, buffer);
148 prev = token_type::unknown;
149 buffer.clear();
150 if (c == '\n') ++line_;
151 continue;
152 }
153 buffer.push_back(c);
154 auto type = get_token_type(service_trie, buffer);
155 auto char_type = get_token_type(service_trie, std::string(1, c));
156 if (type == token_type::unknown) {
157 if (prev_quotation || prev_single) continue;
158 if (char_type == token_type::operation || char_type == token_type::punctuation || char_type == token_type::comma ||
159 char_type == token_type::comment || char_type == token_type::brace) {
160 buffer.pop_back();
161 print_token(service_trie, buffer);
162 prev = get_token_type(service_trie, buffer);
163 buffer = std::string(1, c);
164 } else if (prev == token_type::operation || prev == token_type::punctuation || prev == token_type::comma || prev == token_type::brace) {
165 buffer.pop_back();
166 print_token(service_trie, buffer);
167 prev = get_token_type(service_trie, buffer);
168 buffer = std::string(1, c);
169 } else {
170 if (c == '"') {
171 prev_quotation = !prev_quotation;
172 } else if (c == '\'') {
173 prev_single = !prev_single;
174 }
175 if (prev_quotation || prev_single) continue;
176 print_token(service_trie, buffer);
177 exit(0);
178 }
179 } else if (prev_quotation || prev_single) {
180 prev_quotation = prev_single = false;
181 print_token(service_trie, buffer);
182 prev = token_type::unknown;
183 buffer.clear();
184 continue;
185 }
186 prev = get_token_type(service_trie, buffer);
187 }
188
189 print_token(service_trie, buffer);
190
191 int start_ptr = 0;
192 int ptr = 0;
193 int state = 0;
194 int state_line = -1;
195 std::string lib_buff = "";
196 while (ptr < tokens_.size()) {
197 if (state == 0) {
198 if (tokens_[ptr].value == "using") {
199 state = 1;
200 start_ptr = ptr;
201 state_line = tokens_[ptr].line;
202 lib_buff.clear();
203 }
204 ++ptr;
205 } else if (state == 1) {
206 if (tokens_[ptr].line != state_line)
207 throw std::exception(("Invalid token: ';' expected (" + std::to_string(tokens_[ptr].line) + " line)").c_str());
208 if (tokens_[ptr].value != ";") {
209 lib_buff += tokens_[ptr].value;
210 } else {
211 bool need_to_parse = lib_parser.add_lib(lib_buff);
212 if (need_to_parse) {
213 std::string lib_path = lib_parser.name_to_path(lib_buff);
214 auto lib_tokens = LexicalAnalyzer(lib_path, lib_parser).get_tokens();
215 tokens_ = merge(tokens_, lib_tokens, ptr, start_ptr);
216 state = 0;
217 continue;
218 }
219 }
220 ++ptr;
221 }
222 }
223 if (state != 0)
224 throw std::exception(("Invalid token: ';' expected (" + std::to_string(tokens_[tokens_.size() - 1].line) + " line)").c_str());
225}