update: added lexer

2026-02-09 23:11:33 +01:00
parent 280fa51f94
commit 6ad68a8752
1 changed files with 130 additions and 38 deletions
--- a/src/parser/lexer.c
+++ b/src/parser/lexer.c
@@ -6,7 +6,7 @@
 /*   By: sede-san <sede-san@student.42madrid.com    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2026/02/09 18:56:41 by sede-san          #+#    #+#             */
-/*   Updated: 2026/02/09 20:42:50 by sede-san         ###   ########.fr       */
+/*   Updated: 2026/02/09 23:09:28 by sede-san         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

@@ -15,10 +15,20 @@

 static t_token *tokenize(const char *line, size_t *start);
 static t_token_type get_token_type(const char *str);
+static t_token *token_new(t_token_type type, const char *text);
+static void token_clear(t_token *token);
+static t_token *read_token(t_token_type type, const char *line, size_t *i);
+static t_token *read_word(const char *line, size_t *i);
+static inline bool is_meta(char c);

+/**
+ * @brief Converts a command line string into a list of tokens.
+ * 
+ * @return A list of tokens or NULL on error.
+ */
 t_list *lex(
-	const char *line
-) {
+	const char *line)
+{
 	t_list *tokens;
 	t_token *token;
 	size_t i;
@@ -27,53 +37,135 @@ t_list  *lex(
 	i = 0;
 	while (line[i] != '\0')
 	{
-		// ignore spaces
 		while (ft_isspace(line[i]))
 			i++;
-		// create token
+		if (line[i] == '\0')
+			break;
 		token = tokenize(line, &i);
-		// add token to list
-		if (token != NULL)
 		ft_lstadd_back(&tokens, ft_lstnew(token));
+		if (token == NULL)
+		{
+			ft_lstclear(&tokens, (void (*)(void *))token_clear);
+			return (NULL);
+		}
 	}
 	return (tokens);
 }

-static t_token *tokenize(const char *line, size_t *start) {
+/**
+ * @return A new token or NULL on error.
+ */
+static t_token *tokenize(
+	const char *line,
+	size_t *start)
+{
 	t_token *token;
 	t_token_type type;

-	token = NULL;
 	if (line == NULL || line[*start] == '\0')
 		return (NULL);
 	type = get_token_type(line + *start);
-	(void)type;
-	// if (type != TOKEN_WORD)
-	// 	token = token_new(type, NULL);
-	// else
-	// 	token = read_word(line, start);
-	// if (token == NULL)
-	// 	(*start) += ft_strlen(token->value);
+	if (type != TOKEN_WORD)
+		token = read_token(type, line, start);
+	else
+		token = read_word(line, start);
 	return (token);
 }

-static t_token_type	get_token_type(const char *str)
+static t_token_type get_token_type(
+	const char *str
+)
 {
-	size_t						i;
-	static const t_map_entry	tokens[TOKENS_COUNT] = {
-	{PIPE_STR, (void *)TOKEN_PIPE},
-	{REDIRECT_IN_STR, (void *)TOKEN_REDIRECT_IN},
-	{REDIRECT_OUT_STR, (void *)TOKEN_REDIRECT_OUT},
-	{APPEND_STR, (void *)TOKEN_APPEND},
-	{HEREDOC_STR, (void *)TOKEN_HEREDOC}
-	};
-
-	i = 0;
-	while (i < TOKENS_COUNT)
+	if (str == NULL || str[0] == '\0')
+		return (TOKEN_WORD);
+	if (str[0] == '|')
+		return (TOKEN_PIPE);
+	if (str[0] == '<')
 	{
-		if (ft_strcmp(str, tokens[i].key) == 0)
-			return ((t_token_type)tokens[i].value);
-		i++;
+		if (str[1] == '<')
+			return (TOKEN_HEREDOC);
+		return (TOKEN_REDIRECT_IN);
+	}
+	if (str[0] == '>')
+	{
+		if (str[1] == '>')
+			return (TOKEN_APPEND);
+		return (TOKEN_REDIRECT_OUT);
 	}
 	return (TOKEN_WORD);
 }
+
+static t_token *token_new(
+	t_token_type type,
+	const char *text)
+{
+	t_token *token;
+
+	token = (t_token *)malloc(sizeof(t_token));
+	if (token == NULL)
+		return (NULL);
+	ft_putendl("malloc");
+	token->type = type;
+	token->value = text;
+	if (token->type == TOKEN_WORD && token->value == NULL)
+	{
+		free(token);
+		return (NULL);
+	}
+	return (token);
+}
+
+static void token_clear(
+	t_token *token)
+{
+	if (token != NULL)
+	{
+		if (token->value != NULL)
+		{
+			free(token->value);
+			ft_putendl("free");
+		}
+		free(token);
+		ft_putendl("free");
+	}
+}
+
+static t_token *read_token(
+	t_token_type type,
+	const char *line,
+	size_t *i)
+{
+	while (ft_isspace(line[*i]) || is_meta(line[*i]))
+		(*i)++;
+	return (token_new(type, NULL));
+}
+
+static t_token *read_word(
+	const char *line,
+	size_t *i)
+{
+	const size_t start = *i;
+	bool in_single_quote;
+	bool in_double_quote;
+
+	in_single_quote = false;
+	in_double_quote = false;
+	while (line[*i] != '\0')
+	{
+		char c = line[*i];
+		(void)c;
+		if (line[*i] == '\'' && !in_double_quote)
+			in_single_quote = !in_single_quote;
+		else if (line[*i] == '"' && !in_single_quote)
+			in_double_quote = !in_double_quote;
+		else if (!in_single_quote && !in_double_quote && (isspace(line[*i]) || is_meta(line[*i])))
+			break;
+		(*i)++;
+	}
+	return (token_new(TOKEN_WORD, ft_substr(line, start, *i - start)));
+}
+
+static inline bool is_meta(char c)
+{
+	return (c == '|' || c == '<' || c == '>');
+}