this change is made so that it is easier to print an error message when invalid syntax
173 lines
3.9 KiB
C
173 lines
3.9 KiB
C
/* ************************************************************************** */
|
|
/* */
|
|
/* ::: :::::::: */
|
|
/* lexer.c :+: :+: :+: */
|
|
/* +:+ +:+ +:+ */
|
|
/* By: sede-san <sede-san@student.42madrid.com +#+ +:+ +#+ */
|
|
/* +#+#+#+#+#+ +#+ */
|
|
/* Created: 2026/02/09 18:56:41 by sede-san #+# #+# */
|
|
/* Updated: 2026/02/10 09:11:23 by sede-san ### ########.fr */
|
|
/* */
|
|
/* ************************************************************************** */
|
|
|
|
#include "core.h"
|
|
#include "parser.h"
|
|
|
|
static t_token *tokenize(const char *line, size_t *start);
|
|
static t_token_type get_token_type(const char *str);
|
|
static t_token *token_new(t_token_type type, const char *text);
|
|
static void token_clear(t_token *token);
|
|
static t_token *read_token(t_token_type type, const char *line, size_t *i);
|
|
static t_token *read_word(const char *line, size_t *i);
|
|
static inline bool is_meta(char c);
|
|
|
|
/**
|
|
* @brief Converts a command line string into a list of tokens.
|
|
*
|
|
* @return A list of tokens or NULL on error.
|
|
*/
|
|
t_list *lex(
|
|
const char *line)
|
|
{
|
|
t_list *tokens;
|
|
t_token *token;
|
|
size_t i;
|
|
|
|
tokens = NULL;
|
|
i = 0;
|
|
while (line[i] != '\0')
|
|
{
|
|
while (ft_isspace(line[i]))
|
|
i++;
|
|
if (line[i] == '\0')
|
|
break;
|
|
token = tokenize(line, &i);
|
|
ft_lstadd_back(&tokens, ft_lstnew(token));
|
|
if (token == NULL)
|
|
{
|
|
ft_lstclear(&tokens, (void (*)(void *))token_clear);
|
|
return (NULL);
|
|
}
|
|
}
|
|
return (tokens);
|
|
}
|
|
|
|
/**
|
|
* @return A new token or NULL on error.
|
|
*/
|
|
static t_token *tokenize(
|
|
const char *line,
|
|
size_t *start)
|
|
{
|
|
t_token *token;
|
|
t_token_type type;
|
|
|
|
if (line == NULL || line[*start] == '\0')
|
|
return (NULL);
|
|
type = get_token_type(line + *start);
|
|
if (type != TOKEN_WORD)
|
|
token = read_token(type, line, start);
|
|
else
|
|
token = read_word(line, start);
|
|
return (token);
|
|
}
|
|
|
|
static t_token_type get_token_type(
|
|
const char *str
|
|
)
|
|
{
|
|
if (str == NULL || str[0] == '\0')
|
|
return (TOKEN_WORD);
|
|
if (str[0] == '|')
|
|
return (TOKEN_PIPE);
|
|
if (str[0] == '<')
|
|
{
|
|
if (str[1] == '<')
|
|
return (TOKEN_HEREDOC);
|
|
return (TOKEN_REDIRECT_IN);
|
|
}
|
|
if (str[0] == '>')
|
|
{
|
|
if (str[1] == '>')
|
|
return (TOKEN_APPEND);
|
|
return (TOKEN_REDIRECT_OUT);
|
|
}
|
|
return (TOKEN_WORD);
|
|
}
|
|
|
|
static t_token *token_new(
|
|
t_token_type type,
|
|
const char *text)
|
|
{
|
|
t_token *token;
|
|
|
|
token = (t_token *)malloc(sizeof(t_token));
|
|
if (token == NULL)
|
|
return (NULL);
|
|
token->type = type;
|
|
token->value = text;
|
|
if (token->type == TOKEN_WORD && token->value == NULL)
|
|
{
|
|
free(token);
|
|
return (NULL);
|
|
}
|
|
return (token);
|
|
}
|
|
|
|
static void token_clear(
|
|
t_token *token)
|
|
{
|
|
if (token != NULL)
|
|
{
|
|
free(token->value);
|
|
free(token);
|
|
}
|
|
}
|
|
|
|
static t_token *read_token(
|
|
t_token_type type,
|
|
const char *line,
|
|
size_t *i)
|
|
{
|
|
const size_t start = *i;
|
|
size_t end;
|
|
|
|
while (is_meta(line[*i]))
|
|
(*i)++;
|
|
end = *i;
|
|
while (ft_isspace(line[*i]))
|
|
(*i)++;
|
|
return (token_new(type, ft_substr(line, start, end - start)));
|
|
}
|
|
|
|
static t_token *read_word(
|
|
const char *line,
|
|
size_t *i)
|
|
{
|
|
const size_t start = *i;
|
|
bool in_single_quote;
|
|
bool in_double_quote;
|
|
|
|
in_single_quote = false;
|
|
in_double_quote = false;
|
|
while (line[*i] != '\0')
|
|
{
|
|
char c = line[*i];
|
|
(void)c;
|
|
if (line[*i] == '\'' && !in_double_quote)
|
|
in_single_quote = !in_single_quote;
|
|
else if (line[*i] == '"' && !in_single_quote)
|
|
in_double_quote = !in_double_quote;
|
|
else if (!in_single_quote && !in_double_quote
|
|
&& (isspace(line[*i]) || is_meta(line[*i])))
|
|
break;
|
|
(*i)++;
|
|
}
|
|
return (token_new(TOKEN_WORD, ft_substr(line, start, *i - start)));
|
|
}
|
|
|
|
static inline bool is_meta(char c)
|
|
{
|
|
return (c == '|' || c == '<' || c == '>');
|
|
}
|