From c493979a18a19da405b9fd0903bca59f32ba3066 Mon Sep 17 00:00:00 2001 From: Sergio Date: Wed, 11 Feb 2026 02:51:30 +0100 Subject: [PATCH] update: parser now uses tokens from lexer fixes pending: - some functions are longer than norminette allows - find solution to a list of commands being returned, even though a syntax error is found when processing tokens (maybe delegate some work to the lexer and return only a syntax-valid list?) --- include/core.h | 43 +++- include/errors.h | 23 ++ include/minishell.h | 3 +- include/parser.h | 46 +--- src/errors/errors.c | 30 +++ src/parser/lexer.c | 10 +- src/parser/parser.c | 609 ++++++++++++++++++++++++++------------------ 7 files changed, 475 insertions(+), 289 deletions(-) create mode 100644 include/errors.h create mode 100644 src/errors/errors.c diff --git a/include/core.h b/include/core.h index 3e568ae..9faa961 100644 --- a/include/core.h +++ b/include/core.h @@ -6,7 +6,7 @@ /* By: sede-san # include +# include # include // readline(3), rl_clear_history(), // rl_on_new_line(), rl_replace_line(), // rl_redisplay() diff --git a/include/parser.h b/include/parser.h index 1e39c19..60cba1d 100644 --- a/include/parser.h +++ b/include/parser.h @@ -6,7 +6,7 @@ /* By: sede-san value; + if (token == NULL) + cause = "newline"; + ft_eprintf("minishell: syntax error near unexpected token `%s'\n", cause); +} + +void malloc_error(void) +{ + ft_eprintf("minishell: %s\n", strerror(ENOMEM)); +} \ No newline at end of file diff --git a/src/parser/lexer.c b/src/parser/lexer.c index 2c8a864..3ebeb63 100644 --- a/src/parser/lexer.c +++ b/src/parser/lexer.c @@ -6,7 +6,7 @@ /* By: sede-san content; -// if (previous_command != NULL) -// command->piped_from = (t_command *)previous_command->content; -// next_command = current_command->next; -// if (next_command != NULL) -// command->piped_to = (t_command *)next_command->content; -// previous_command = current_command; -// current_command = current_command->next; -// } -// } - -static char *extract_next_command( - char *line, - size_t *index -) { - char *command_str; - size_t start; - size_t end; - - start = *index; - find_boundary(line, index, '|'); - end = *index; - command_str = trim_whitespaces(line, &start, &end); - while (line[*index] == '|' || ft_isspace(line[*index])) - (*index)++; - return (command_str); -} - -static void find_boundary( - char *line, - size_t *index, - char bound_char -) { - bool in_single_quote; - bool in_double_quote; - - in_single_quote = false; - in_double_quote = false; - while (line[*index] != '\0') - { - if (line[*index] == '\'' && !in_double_quote) - in_single_quote = !in_single_quote; - else if (line[*index] == '"' && !in_single_quote) - in_double_quote = !in_double_quote; - if (line[*index] == bound_char && !in_single_quote && !in_double_quote) - break ; - (*index)++; - } -} - -static char *trim_whitespaces( - char *line, - size_t *start, - size_t *end -) { - while (*start < *end && ft_isspace(line[*start])) - (*start)++; - while (*end > *start && ft_isspace(line[*end - 1])) - (*end)--; - if (*end > *start) - return (ft_substr(line, *start, *end - *start)); - return (NULL); -} - -static t_command *cmdnew( - char *line, - t_minishell *minishell -) { +/** + * @brief Converts a list of tokens into a list of commands. + * + * @param tokens The list of tokens to parse. + * @param minishell The minishell instance. + * + * @return A list of commands or `NULL` on error. + */ +static t_list *parse_tokens( + t_list *tokens +) +{ + t_list *commands; t_command *command; + t_list *current_token; + t_list *new_command; + + if (tokens == NULL) + return (NULL); + commands = NULL; + current_token = tokens; + while (current_token != NULL) + { + command = command_new(¤t_token); + if (command == NULL) + { + ft_lstclear(&commands, (void (*)(void *))command_clear); + return (NULL); + } + new_command = ft_lstnew(command); + if (new_command == NULL) + { + command_clear(command); + ft_lstclear(&commands, (void (*)(void *))command_clear); + return (malloc_error(), NULL); + } + ft_lstadd_back(&commands, new_command); + if (current_token != NULL) + { + if (current_token->next == NULL) + syntax_error_unexpected_token((t_token *)current_token->content); + current_token = current_token->next; + } + } + return (commands); +} + +/** + * @brief Creates a new command from a list of tokens. + * + * @param tokens The list of tokens to create the command from. + * + * @return A new command or NULL on error. + * + * @note The `tokens` pointer is moved to the next command's tokens. + */ +t_command *command_new( + t_list **tokens +) +{ + t_command *command; + t_list *current_token; + t_list *delimiter_token; command = (t_command *)ft_calloc(1, sizeof(t_command)); - if (!command) + if (command == NULL) return (NULL); - // resolve_heredoc - set_argv(command, line, minishell); - if (!command->argv) + current_token = *tokens; + delimiter_token = ft_lstfind(current_token, (bool (*)(void *))is_pipe); + while (command != NULL && current_token != delimiter_token) { - free(command); - return (NULL); + command_add_tokens(command, ¤t_token); } - set_argc(command); - set_infile(command); - set_outfile(command); - set_path(command, minishell); + *tokens = current_token; return (command); } -static void set_argv( - t_command *command, - char *line, - t_minishell *minishell -) { - t_list *argv_list; - char *arg; - size_t i; - size_t start; - size_t end; +/** + * @brief Creates a new redirection from a list of tokens. + * + * @param tokens The list of tokens to create the redirection from. + * + * @return A new redirection or `NULL` on error. + */ +t_redirection *redirection_new( + t_list **tokens +) +{ + t_redirection *redirection; + t_token *token; - if (line == NULL) - return ; - i = 0; - argv_list = NULL; - while (line[i] != '\0') + redirection = (t_redirection *)malloc(sizeof(t_redirection)); + if (redirection == NULL) + return (malloc_error(), NULL); + token = (t_token *)(*tokens)->content; + redirection->type = token->type; + *tokens = (*tokens)->next; + if (*tokens == NULL) { - start = i; - find_boundary(line, &i, ' '); - end = i; - arg = trim_whitespaces(line, &start, &end); - expand_envs(arg, minishell); - if (arg != NULL) - ft_lstadd_back(&argv_list, ft_lstnew(arg)); - while (ft_isspace(line[i])) - i++; + free(redirection); + return (syntax_error_unexpected_token(NULL), NULL); } - command->argv = lst_to_argv(argv_list); - ft_lstclear(&argv_list, free); + token = (t_token *)(*tokens)->content; + if (token->type != TOKEN_WORD) + { + free(redirection); + return (syntax_error_unexpected_token(token), NULL); + } + redirection->target = ft_strdup(token->value); + if (redirection->target == NULL) + { + free(redirection); + return (malloc_error(), NULL); + } + *tokens = (*tokens)->next; + return (redirection); } -static void expand_envs( - char *arg, - t_minishell *minishell -) { - // TODO - (void)arg; - (void)minishell; +void redirection_clear( + t_redirection *redirection +) +{ + if (redirection != NULL) + { + free(redirection->target); + free(redirection); + } } -static char **lst_to_argv( - t_list *argv_list -) { - char **argv; - t_list *current_arg; - size_t i; +/** + * @brief Adds a token to a command, updating the command's arguments and + * redirections as necessary. + * + * @param command The command to add the token to. + * @param tokens The list of tokens to add to the command. + * + * @note The `command` pointer can be free'd if there is an error while adding + * the token. + */ +void command_add_tokens( + t_command *command, + t_list **tokens +) +{ + t_token *token; - argv = (char **)ft_calloc(ft_lstsize(argv_list) + 1, sizeof(char *)); - if (!argv) + token = (t_token *)(*tokens)->content; + if (is_redirection(token)) + redirection_add(tokens, token, command); + else + words_add(tokens, command); +} + +char **args_to_array( + t_list *args, + size_t argc +) +{ + char **argv; + size_t i; + + argv = (char **)malloc(sizeof(char *) * (argc + 1)); + if (argv == NULL) return (NULL); i = 0; - current_arg = argv_list; - while (current_arg != NULL) + while (args != NULL) { - argv[i] = ft_strdup((char *)current_arg->content); + argv[i] = (char *)args->content; + args = args->next; i++; - current_arg = current_arg->next; } + argv[i] = NULL; return (argv); } -static void set_argc( +/** + * @brief Adds all consecutive word tokens to a command's argv and updates its + * argc accordingly. + * + * @param command The command to add the word tokens to. + * @param tokens The list of tokens to add to the command. + */ +void words_add( + t_list **tokens, t_command *command -) { - int argc; +) +{ + t_list *args; + t_list *arg; + t_token *token; - argc = 0; - while (command->argv[argc] != NULL) - argc++; - command->argc = argc; -} - -static void set_infile( - t_command *command -) { - // test_infile - command->infile = -1; -} - -static void set_outfile( - t_command *command -) { - // test_outfile - command->outfile = STDOUT_FILENO; -} - -static void set_path( - t_command *command, - t_minishell *minishell -) { - char *command_path; - char *command_name; - - command_name = command->argv[0]; - if (!path_is_solved(command_name, minishell)) - command_path = solve_path(command_name, minishell); - else - command_path = ft_strdup(command_name); - command->path = command_path; -} - -static char *solve_path( - char *command_name, - t_minishell *minishell -){ - char *command_path; - char **path_env; - size_t i; - - path_env = ft_split(get_env("PATH", minishell), ':'); - if (!path_env) - return (NULL); - command_path = NULL; - i = -1; - while (!command_path && path_env[++i]) + args = NULL; + arg = *tokens; + token = (t_token *)arg->content; + while (arg != NULL && token->type == TOKEN_WORD) { - command_path = ft_strnjoin(3, path_env[i], "/", command_name); - if (command_path != NULL && access(command_path, F_OK) != EXIT_SUCCESS) - { - free(command_path); - command_path = NULL; - } + ft_lstadd_back(&args, ft_lstnew(ft_strdup(token->value))); + command->argc++; + arg = arg->next; + if (arg != NULL) + token = (t_token *)arg->content; } - ft_free_split(path_env); - return (command_path); + *tokens = arg; + command->argv = args_to_array(args, command->argc); + ft_lstclear_nodes(&args); } -static u_int8_t path_is_solved( - char *command_name, - t_minishell *minishell -){ - return (ft_strncmp(command_name, "/", 1) == 0 - || (command_name[1] && ft_strncmp(command_name, "./", 2) == 0) - || (command_name[2] && ft_strncmp(command_name, "../", 3) == 0) - || is_builtin(command_name, minishell) - ); +void redirection_add( + t_list **tokens, + t_token *token, + t_command *command +) +{ + t_redirection *redirection; + t_list *redirection_tokens; + + redirection = redirection_new(tokens); + if (redirection == NULL) + return; + redirection_tokens = ft_lstnew(redirection); + if (redirection_tokens == NULL) + { + free(redirection); + return (malloc_error()); + } + if (token->type == TOKEN_HEREDOC) + ft_lstadd_back(&command->heredocs, redirection_tokens); + else + ft_lstadd_back(&command->redirections, redirection_tokens); +} + +/** + * @brief Checks if a token is a redirection token. + * + * @param token The token to check. + * + * @return `true` if the token is a redirection token, `false` otherwise. + */ +bool is_redirection( + t_token *token +) +{ + return (token->type == TOKEN_REDIRECT_IN + || token->type == TOKEN_REDIRECT_OUT + || token->type == TOKEN_APPEND + || token->type == TOKEN_HEREDOC); +} + +void command_clear_argv( + t_command *command +) +{ + int i; + + if (command->argv != NULL) + { + i = 0; + while (i < command->argc) + { + free(command->argv[i]); + i++; + } + free(command->argv); + command->argv = NULL; + } +} + +/** + * @brief Clears a command, freeing all associated memory. + * + * @param command The command to clear. + */ +void command_clear( + t_command *command +) +{ + if (command != NULL) + { + command_clear_argv(command); + ft_lstclear(&command->redirections, (void (*)(void *))redirection_clear); + ft_lstclear(&command->heredocs, (void (*)(void *))redirection_clear); + free(command); + } +} + +/** + * @brief Checks if a token is a pipe token. + * + * @param token The token to check. + * + * @return `true` if the token is a pipe token, `false` otherwise. + */ +bool is_pipe( + t_token *token) +{ + return (token->type == TOKEN_PIPE); +} + +/** + * @brief Finds a node in a linked list that satisfies a given predicate. + * + * @param lst The linked list to search through. + * @param pre The predicate function to apply to each node's content. + * + * @returns The first node that satisfies the predicate or `NULL` if no such + * node exists or if the list is `NULL`. + */ +t_list *ft_lstfind( + t_list *lst, + bool (*pre)(void *)) +{ + while (lst != NULL) + { + if (pre(lst->content)) + return (lst); + lst = lst->next; + } + return (NULL); +} + +void print_command_info( + t_command *command +) +{ + printf("Command:\n"); + printf(" argc: %d\n", command->argc); + printf(" argv: ["); + for (int i = 0; i < command->argc; i++) + { + printf("%s", command->argv[i]); + if (i < command->argc - 1) + printf(", "); + } + printf("]\n"); + printf(" path: %s\n", command->path); + printf(" redirections:\n"); + t_list *redirection_node = command->redirections; + while (redirection_node != NULL) + { + t_redirection *redirection = (t_redirection *)redirection_node->content; + printf(" type: %d, target: %s\n", redirection->type, redirection->target); + redirection_node = redirection_node->next; + } + printf(" heredocs:\n"); + t_list *heredoc_node = command->heredocs; + while (heredoc_node != NULL) + { + t_redirection *heredoc = (t_redirection *)heredoc_node->content; + printf(" type: %d, target: %s\n", heredoc->type, heredoc->target); + heredoc_node = heredoc_node->next; + } +} + +int main(int argc, char const *argv[]) +{ + t_list *commands; + char *line; + + if (argc != 2) + return (EXIT_FAILURE); + line = ft_strdup(argv[1]); + commands = parse(line, NULL); + ft_lstiter(commands, (void (*)(void *))print_command_info); + if (line != NULL) + free(line); + if (commands != NULL) + ft_lstclear(&commands, (void (*)(void *))command_clear); + return 0; }