From 961d32f58072b5fa01ae6e2135f854ab318e5b64 Mon Sep 17 00:00:00 2001 From: David Morgan Date: Wed, 2 Nov 2016 00:41:11 +0000 Subject: [PATCH 1/1] import codebase --- CMakeLists.txt | 17 +++ LICENSE | 21 ++++ README.md | 188 ++++++++++++++++++++++++++++++ include/taihen/lexer.h | 36 ++++++ include/taihen/parser.h | 16 +++ src/CMakeLists.txt | 10 ++ src/lexer.c | 209 +++++++++++++++++++++++++++++++++ src/parser.c | 249 ++++++++++++++++++++++++++++++++++++++++ test/CMakeLists.txt | 10 ++ test/test_lexer.cpp | 208 +++++++++++++++++++++++++++++++++ test/test_parser.cpp | 9 ++ 11 files changed, 973 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 LICENSE create mode 100644 README.md create mode 100644 include/taihen/lexer.h create mode 100644 include/taihen/parser.h create mode 100644 src/CMakeLists.txt create mode 100644 src/lexer.c create mode 100644 src/parser.c create mode 100644 test/CMakeLists.txt create mode 100644 test/test_lexer.cpp create mode 100644 test/test_parser.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..4a1df26 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.1.0) + +project(taihen-config) +include_directories(include) + +add_subdirectory(src) + +if (not ${TEST}) + add_subdirectory(test) + + enable_testing() + add_test(NAME LexerTest COMMAND test-lexer) +endif() + +install(DIRECTORY include/taihen/ + DESTINATION include/taihen + FILES_MATCHING PATTERN "*.h") diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c2582a7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 David "Davee" Morgan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4c83852 --- /dev/null +++ b/README.md @@ -0,0 +1,188 @@ +# taihen-parser - _taiHEN's configuration parser_ + +taiHEN is a custom firmware (CFW) framework for PS Vita™ and implements a configuration to help packagers and users control which modules or plugins are loaded and when. +taihen-parser provides a convenient C API for interacting with these configuration files to help developers write supporting tools for taiHEN. taihen-parser provides both a lexer and parser API for configuration files. + +The problem with CFW of a previous era was that it was one person's imagination of a custom firmware. Average developer X could not easily replace the in-game menu within the provided CFW. Likewise, average user Y could not strip out features they did not like. Here, taiHEN provides a solution by providing a configuration file where CFW can be defined as a set of modules and plugins. + +Person X may like live-area mod1 and person Y may like live-area mod2. No longer do these two need to chose between CFW A and CFW B that implements mod1 and mod2, respectively. Instead, they can modify this configuration from their favourite CFW to use whichever mod they prefer. This architecture promotes the _custom_ in _custom firmware_ by encouraging developers to move away from huge monolithic CFW of the past and help nurture an open, compatible and _user orientated_ custom firmware experience. + +## Configuration Format +taiHEN employs a text based format for configuring automatic loading of modules. The configuration format is a UTF-8 text file that utilises line seperation to ease parsing and human readability. Each line must be exclusive to one of four types: + - An empty line + - A comment + - Section + - Module path + +Each line can be at most ```CONFIG_MAX_LINE_LENGTH``` characters wide, and trailing/leading whitespace is permitted. + +## Lexer Tokens +The config lexer produces the following tokens: + - ```CONFIG_START_TOKEN``` + - ```CONFIG_END_TOKEN``` + - ```CONFIG_COMMENT_TOKEN``` + - ```CONFIG_SECTION_TOKEN``` + - ```CONFIG_SECTION_HALT_TOKEN``` + - ```CONFIG_SECTION_NAME_TOKEN``` + - ```CONFIG_PATH_TOKEN``` + +A valid configuration format should obey the grammar: +``` +config ::= CONFIG_START_TOKEN (CONFIG_COMMENT_TOKEN | section)* CONFIG_END_TOKEN +section ::= CONFIG_SECTION_TOKEN CONFIG_SECTION_HALT_TOKEN? CONFIG_SECTION_NAME_TOKEN ('\n' | EOF) path* +path ::= CONFIG_PATH_TOKEN ('\n' | EOF) +``` + +## Sections: ```*``` +A section in the configuration file functions as a filter and controller for CFW module loading. +Each section begins with a ```*``` and can optionally be followed with a ```!``` to mark the section as a halt point (see further below). After these tokens, the rest of the line a UTF-8 name for the section. + +A section of the same name may appear in the file multiple times. This functionality is intended to allow users to take advantage of taiHEN's load ordering policy. + +### Halt point: ```!``` +A section can optionally have the halt point token ```!``` following the section token ```*``` in the configuration file. This token instructs the parser to stop further parsing of the file if the section name is within context. See the examples below for a visual worked case on this feature. + +### Reserved names +There are currently two reserved names for sections: + - ```ALL``` - A catch all user-mode section that will load the modules it contains for every user-mode process. + - ```KERNEL``` - A section that loads resident kernel modules on the start of taiHEN. + +Using the halt point ```!``` on these sections results in undefined behaviour. + +## API +This API currently offers no guarantee of stability. Please remember that it may change drastically upon any future versions of taiHEN. +taiHEN's configuration parser exposes it's lexer algorithm to assist in development of supporting tools. Please consult the header files for documentation. + +## Example Configurations + +Below is an example of a very simple configuration: +``` +# example simple config +*ALL +ux0:/plugins/my_plugin.suprx +ux0:/plugins/my_plugin2.suprx +``` + +This example consists of a single section ```ALL```. Which means that every game/application/homebrew that is launched will have both ```my_plugin.suprx``` and ```my_plugin2.suprx``` loaded in that process space and in order. + +More precise functionality may be required for certain homebrew. Perhaps you wish to package your own CFW, in which case you may create a complex configuration as shown below: +``` +# hello this is a comment. this line is ignored + # this line also + # this too, whitespace at the start of a line is OK +*COOL_GAME +# i'm within a section, woo! + ux0:/coolgame/plugin.suprx + # indentation is ok with me + ux0:/coolgame/plugin2.suprx + # spaces within path is ok + ux0:/really cool/I haVe spaces and caps/plugin3.suprx +# next section +*ALL + # i'm a special section! + # i'm always included... usually + ux0:/plugins/ingamemusic.suprx +*KERNEL + # i'm a special section also! + # my plugins are loaded to kernel memory as resident modules + ux0:/taihen/henkaku.skprx + ux0:/psphacks.skprx +*COOL_GAME + # this section again?! this is ok! this is a way packagers + # can take advantage of load order. + ux0:/coolgame/idependoningamemusic.suprx +*!COOL_GAME2 + # what is the '!' for? + # the '!' prevents further parsing + # this would make more sense to put at the start if you want to + # blacklist certain modules + # look, nothing to load! +*ALL + ux0:/plugins/ibreak_coolgame2.suprx + + # emojis? + ux0:/🤔/🦄/👻/🎃.suprx +``` +Much more complex, but really I expect even more complexity when real CFW components come around. As mentioned previously, parsing occurs from top to bottom, identical to load order. When parsing, a section context is selected. In the case of taiHEN, this context is a title id such as ```MLCL00001``` for our molecularShell homebrew. In this case, lets assume for ease that we have selected ```COOL_GAME``` and it is a user-mode process. + +Comments are ignored, so lets continue until we reach the first section: ```COOL_GAME```. Since our selected section matches this first section, the paths below are loaded until a new section is reached. + - ```ux0:/coolgame/plugin.suprx``` + - ```ux0:/coolgame/plugin2.suprx``` + - ```ux0:/really cool/I haVe spaces and caps/plugin3.suprx``` + +Then we reach a new section ```ALL```. As mentioned above, ```ALL``` is a special reserved section name that matches every user-mode process. So our loaded module list grows: + - ```ux0:/coolgame/plugin.suprx``` + - ```ux0:/coolgame/plugin2.suprx``` + - ```ux0:/really cool/I haVe spaces and caps/plugin3.suprx``` + - ```ux0:/plugins/ingamemusic.suprx``` + +Next section we reach is the special section ```KERNEL```. This is not processed within our context we so continue until we reach the next section: ```COOL_GAME```. We have already had this section before, but we multiple sections are allowed to take advantage of taiHEN's module load ordering. This is extremely useful when you have dependencies between plugins/modules that need resolved. In this example we have ```idependoningamemusic.suprx``` which must be loaded after ```ingamemusic.suprx```. + +Our load list now looks like: + - ```ux0:/coolgame/plugin.suprx``` + - ```ux0:/coolgame/plugin2.suprx``` + - ```ux0:/really cool/I haVe spaces and caps/plugin3.suprx``` + - ```ux0:/plugins/ingamemusic.suprx``` + - ```ux0:/coolgame/idependoningamemusic.suprx``` + +Next section is ```COOL_GAME2``` which does not match our section context. This section has a halt point ```!``` but we ignore it in this case because we do not much it. + +Lastly, we have the final section ```ALL``` again, which completes our load list: + - ```ux0:/coolgame/plugin.suprx``` + - ```ux0:/coolgame/plugin2.suprx``` + - ```ux0:/really cool/I haVe spaces and caps/plugin3.suprx``` + - ```ux0:/plugins/ingamemusic.suprx``` + - ```ux0:/coolgame/idependoningamemusic.suprx``` + - ```ux0:/plugins/ibreak_coolgame2.suprx``` + - ```ux0:/🤔/🦄/👻/🎃.suprx``` + +NOTE: I don't know conclusively if the Vita filesystem supports emojis. Don't use them... + +### ```COOL_GAME2``` Halt Point Example +Following the same logic as above, we will walk through the configuration as ```COOL_GAME2``` context. + +First section is ```COOL_GAME```, not a match so we skip it. + +Second section is ```ALL```, so we load modules from it: + - ```ux0:/plugins/ingamemusic.suprx``` + +Third section is ```KERNEL```, so we skip it. + +Fourth section is ```COOL_GAME``` again so we skip it. + +Fifth section is ```COOL_GAME2``` so we process it. This time we have a halt point so this will be the last section we process. Remember, the halt point ```!``` stops any further parsing. This section however has no modules, so nothing is loaded. A section with no modules is OK. In this case, the following ```ALL``` section breaks ```COOL_GAME2``` in our hypothetical world. By using the halt point correctly, a CFW packager can maximise compatibility whilst maintaining load ordering. + +Our final module loading list for ```COOL_GAME2```: + - ```ux0:/plugins/ingamemusic.suprx``` + +# Building +To build taihen-parser, you require CMake to generate the appropriate build scripts. +From within the repository directory: +```sh +$ mkdir build && cd build +$ cmake .. +$ make +``` + +To build the included tests you require the boost ```unit_test_framework``` installed. Then instead use: +```sh +$ mkdir build && cd build +$ cmake -DTEST=ON .. +$ make +``` + +# Installation +To install to a specified location define ```CMAKE_INSTALL_PREFIX```: +```sh +$ mkdir build && cd build +$ cmake -DCMAKE_INSTALL_PREFIX=/my/install/location .. +$ make +$ make install +``` + +# Acknowledgements +Team molecule for HENkaku, Yifan Lu for taiHEN and xyz for immense support of the vitasdk. + +## License +taihen-parser is licensed under the terms of the MIT license which can be read in the ```LICENSE``` file in the root of the repository. +(C) 2016 David "Davee" Morgan diff --git a/include/taihen/lexer.h b/include/taihen/lexer.h new file mode 100644 index 0000000..c71923c --- /dev/null +++ b/include/taihen/lexer.h @@ -0,0 +1,36 @@ +#ifndef LEXER_H +#define LEXER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define CONFIG_MAX_LINE_LENGTH (256) + +typedef enum +{ + CONFIG_START_TOKEN, + CONFIG_END_TOKEN, + CONFIG_COMMENT_TOKEN, + CONFIG_SECTION_TOKEN, + CONFIG_SECTION_HALT_TOKEN, + CONFIG_SECTION_NAME_TOKEN, + CONFIG_PATH_TOKEN +} taihen_config_lexer_token; + +typedef struct +{ + const char *input; + const char *end; + taihen_config_lexer_token token; + char line[CONFIG_MAX_LINE_LENGTH]; + char *line_pos; +} taihen_config_lexer; + +int taihen_config_init_lexer(taihen_config_lexer *ctx, const char *input); +int taihen_config_lex(taihen_config_lexer *ctx); + +#ifdef __cplusplus +} +#endif +#endif // LEXER_H diff --git a/include/taihen/parser.h b/include/taihen/parser.h new file mode 100644 index 0000000..e04561a --- /dev/null +++ b/include/taihen/parser.h @@ -0,0 +1,16 @@ +#ifndef PARSER_H +#define PARSER_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (* taihen_config_handler)(const char *module, void *param); + +int taihen_config_validate(const char *input); +void taihen_config_parse(const char *input, const char *section, taihen_config_handler handler, void *param); + +#ifdef __cplusplus +} +#endif +#endif // PARSER_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..12cf0ce --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,10 @@ +if (MSVC) + add_definitions(-Dinline=__inline) +endif() + +add_library(taihenconfig lexer.c parser.c) + +install(TARGETS taihenconfig + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..e9ee49c --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,209 @@ +/* + * lexer.c - tokenisation algorithm for taihen configuration files + * + * Copyright (C) 2016 David "Davee" Morgan + * + * This software may be modified and distributed under the terms + * of the MIT license. See the LICENSE file for details. + */ + +#include + +#include +#include + +static const char TOKEN_EMPTY = '\0'; +static const char TOKEN_COMMENT_START = '#'; +static const char TOKEN_SECTION_START = '*'; +static const char TOKEN_HALT = '!'; + +static char *skip_whitespace(char *input) +{ + while (isspace((unsigned char)*input)) + { + ++input; + } + + return input; +} + +static void trim_whitespace(char *input) +{ + char *end = input + strlen(input)-1; + + while (end > input) + { + if (!isspace((unsigned char)*end)) + { + break; + } + + *end = '\0'; + end--; + } +} + +static const char *get_newline(const char *input) +{ + while (*input) + { + if (*input == '\r' || *input == '\n') + { + break; + } + + ++input; + } + + return input; +} + +static int lex_line(taihen_config_lexer *ctx) +{ + if (ctx->input >= ctx->end) + { + ctx->token = CONFIG_END_TOKEN; + return 0; + } + + const char *line_end = get_newline(ctx->input); + size_t len = line_end - ctx->input; + + + // check our line can fit in our buffer + if (len >= CONFIG_MAX_LINE_LENGTH) + { + return -1; + } + + // copy line to our buffer so we can modify it + memcpy(ctx->line, ctx->input, len); + ctx->line[len] = '\0'; + ctx->line_pos = ctx->line; + ctx->input = line_end+1; + + // remove leading whitespace + ctx->line_pos = skip_whitespace(ctx->line_pos); + + // check for empty line or comment + if (*ctx->line_pos == TOKEN_EMPTY || *ctx->line_pos == TOKEN_COMMENT_START) + { + ctx->token = CONFIG_COMMENT_TOKEN; + return 1; + } + + // remove any trailing whitespace + trim_whitespace(ctx->line_pos); + + // check if our line is empty now + if (*ctx->line_pos == TOKEN_EMPTY) + { + ctx->token = CONFIG_COMMENT_TOKEN; + return 1; + } + + // check for section start + if (*ctx->line_pos == TOKEN_SECTION_START) + { + ctx->token = CONFIG_SECTION_TOKEN; + } + else + { + // should be a path + ctx->token = CONFIG_PATH_TOKEN; + } + + return 1; +} + +static int lex_section_halt(taihen_config_lexer *ctx) +{ + // skip more whitespace + ctx->line_pos = skip_whitespace(ctx->line_pos+1); + + // check for halt token + if (*ctx->line_pos == TOKEN_HALT) + { + ctx->token = CONFIG_SECTION_HALT_TOKEN; + } + else + { + // should be a name + ctx->token = CONFIG_SECTION_NAME_TOKEN; + } + + return 1; +} + +static int lex_section_name(taihen_config_lexer *ctx) +{ + // skip more whitespace + ctx->line_pos = skip_whitespace(ctx->line_pos+1); + + // should be a name + ctx->token = CONFIG_SECTION_NAME_TOKEN; + return 1; +} + +/*! + \brief Initialise or reset lexer context. + + taihen_config_init_lexer will init/reset the provided taihen_config_lexer and assign the + provided input to the context. + + \param ctx A non-null pointer to a context to initialise or reset. + \param input A non-null UTF-8 encoded null-terminated string to tokenise. + \return zero on success, < 0 on error. + */ +int taihen_config_init_lexer(taihen_config_lexer *ctx, const char *input) +{ + if (ctx == NULL || input == NULL) + { + return -1; + } + + // reset everything to default and reset input/end pointer + memset(ctx, 0, sizeof(taihen_config_lexer)); + ctx->token = CONFIG_START_TOKEN; + ctx->input = input; + ctx->end = input + strlen(input); + return 0; +} + +/*! + \brief Retrieve the next lexer token. + + taihen_config_lex will accept an initialised context and provide the next token + in the stream. This tokenisation does no checking on formatting and as such does not + confirm that the document provided is well-formed. + + \param ctx A non-null point to an initialised context. + \return 0 if there are no further tokens, > 0 if there are further tokens else < 0 on error. + \sa taihen_config_init_lexer + */ +int taihen_config_lex(taihen_config_lexer *ctx) +{ + if (ctx == NULL) + { + return -1; + } + + switch (ctx->token) + { + case CONFIG_START_TOKEN: + case CONFIG_COMMENT_TOKEN: + case CONFIG_PATH_TOKEN: + case CONFIG_SECTION_NAME_TOKEN: + return lex_line(ctx); + + case CONFIG_SECTION_TOKEN: + return lex_section_halt(ctx); + + case CONFIG_SECTION_HALT_TOKEN: + return lex_section_name(ctx); + + case CONFIG_END_TOKEN: + default: + return -1; + } +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..800a987 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,249 @@ +/* + * parser.c - parser algorithm for taihen configuration files + * + * Copyright (C) 2016 David "Davee" Morgan + * + * This software may be modified and distributed under the terms + * of the MIT license. See the LICENSE file for details. + */ + +#include +#include + +#include + +static const char *TOKEN_ALL_SECTION = "ALL"; +static const char *TOKEN_KERNEL_SECTION = "KERNEL"; + +static inline int is_continuation_byte(char b) +{ + return ((b & 0xC0) == 0x80); +} + +static inline int check_continuation_bytes(const char *start, const char *end, int len) +{ + if ((end - start) < len) + { + return 0; + } + + for (int i = 0; i < len; ++i) + { + if (!is_continuation_byte(start[i])) + { + return 0; + } + } + + return 1; +} + +static int check_utf8_sequence(const char *str, const char *end, unsigned char mask, unsigned char lead, int cont_len) +{ + if ((*str & mask) == lead) + { + if (check_continuation_bytes(str+1, end, cont_len)) + { + return -1; + } + + return 1; + } + + return 0; +} + +static int check_utf8(const char *str) +{ + struct + { + unsigned char mask; + unsigned char lead; + unsigned char cont_len; + } utf8_lut[4] = + { + { 0x80, 0x00, 0 }, // U+0000 -> U+007F, 0xxxxxx + { 0xE0, 0xC0, 1 }, // U+0080 -> U+07FF, 110xxxxx + { 0xF0, 0xE0, 2 }, // U+0800 -> U+FFFF, 1110xxxx + { 0xF8, 0xF0, 3 }, // U+10000 -> U+10FFFF, 11110xxx + }; + + const char *end = str + strlen(str); + + while (str < end) + { + int i = 0; + + for (i = 0; i < 4; ++i) + { + int res = check_utf8_sequence(str, end, utf8_lut[i].mask, utf8_lut[i].lead, utf8_lut[i].cont_len); + + // check if valid sequence but incorrect contiunation + if (res < 0) + { + return 0; + } + + // check if valid sequence + if (res > 0) + { + str += utf8_lut[i].cont_len+1; + break; + } + } + + // check if we had no valid sequences + if (i == 4) + { + return 0; + } + } + + return 1; +} + +/*! + \brief Check whether a configuration is valid syntax. + + taihen_config_validate is used to check whether a provided configuration is valid syntax. + This is useful when used before taihen_config_parse to provide error checking before stream based + parsing. + + \param input A UTF-8 encoded null-terminated string containing the configuration to check. + \return non-zero on valid configuration, else zero on invalid. + \sa taihen_config_parse + */ +int taihen_config_validate(const char *input) +{ + taihen_config_lexer ctx; + taihen_config_init_lexer(&ctx, input); + + int have_section = 0; + int lex_result = 0; + + while ((lex_result = taihen_config_lex(&ctx)) > 0) + { + switch (ctx.token) + { + case CONFIG_SECTION_NAME_TOKEN: + // ensure we actually have a string + if (strlen(ctx.line_pos) == 0) + { + return 0; + } + + // validate it is UTF-8 + if (!check_utf8(ctx.line_pos)) + { + return 0; + } + + have_section = 1; + break; + + case CONFIG_PATH_TOKEN: + if (!have_section) + { + // paths must belong to a section + return 0; + } + + // ensure we actually have a string + if (strlen(ctx.line_pos) == 0) + { + return 0; + } + + // validate it is UTF-8 + if (!check_utf8(ctx.line_pos)) + { + return 0; + } + + break; + + // ignore these, nothing to check + case CONFIG_SECTION_HALT_TOKEN: + case CONFIG_COMMENT_TOKEN: + case CONFIG_SECTION_TOKEN: + case CONFIG_END_TOKEN: + break; + + // unexpected tokens, invalid document + default: + return 0; + } + } + + return (lex_result == 0); +} + +/*! + \brief taihen_config_parse parses a configuration for contextualised paths. + + taihen_config_parse is used to obtain an ordered stream of the paths appropriate for the section provided. + Special sections such as ALL and KERNEL will be taken into consideration when generating the stream. + + taihen_config_parse provides no error checking or handling. Use taihen_config_validate before parsing the + document to avoid errors in parsing. + + \param input A UTF-8 encoded null-terminated string containing the configuration to parse. + \param section A UTF-8 encoded null-terminated string containing the section to base context from. + \param handler A taihen_config_handler to receive the stream of paths. + \param param A user provided value that is passed to the provided taihen_config_handler. + \sa taihen_config_validate + */ +void taihen_config_parse(const char *input, const char *section, taihen_config_handler handler, void *param) +{ + taihen_config_lexer ctx; + taihen_config_init_lexer(&ctx, input); + + int halt_flag = 0; + int record_entries = 0; + + while (taihen_config_lex(&ctx) > 0) + { + switch (ctx.token) + { + case CONFIG_SECTION_HALT_TOKEN: + halt_flag = 1; + break; + + case CONFIG_SECTION_NAME_TOKEN: + if (strcmp(ctx.line_pos, TOKEN_ALL_SECTION) == 0 && strcmp(section, TOKEN_KERNEL_SECTION) != 0) + { + record_entries = 1; + } + else if (strcmp(section, ctx.line_pos) == 0) + { + record_entries = 1; + } + else + { + record_entries = 0; + } + + break; + + case CONFIG_SECTION_TOKEN: + if (record_entries && halt_flag) + { + return; + } + + halt_flag = 0; + break; + + case CONFIG_PATH_TOKEN: + if (record_entries) + { + handler(ctx.line_pos, param); + } + + break; + + default: + break; + } + } +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..5273969 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,10 @@ +find_package(Boost COMPONENTS unit_test_framework REQUIRED) + +include_directories(${taihen-config_SOURCE_DIR}/src ${Boost_INCLUDE_DIRS}) +link_directories(${Boost_LIBRARY_DIRS}) + +add_executable(test-lexer test_lexer.cpp) +target_link_libraries(test-lexer taihenconfig) + +add_executable(test-parser test_parser.cpp) +target_link_libraries(test-parser taihenconfig) diff --git a/test/test_lexer.cpp b/test/test_lexer.cpp new file mode 100644 index 0000000..09027ea --- /dev/null +++ b/test/test_lexer.cpp @@ -0,0 +1,208 @@ +#include + +#define BOOST_TEST_MODULE lexer +#include + +#include +#include + +BOOST_AUTO_TEST_CASE(init_lexer) +{ + const char *input = ""; + taihen_config_lexer ctx; + + // test NULL parameter handling + BOOST_REQUIRE_LT(taihen_config_init_lexer(NULL, NULL), 0); + BOOST_REQUIRE_LT(taihen_config_init_lexer(&ctx, NULL), 0); + BOOST_REQUIRE_LT(taihen_config_init_lexer(NULL, input), 0); + + // test correct input + BOOST_REQUIRE_GE(taihen_config_init_lexer(&ctx, input), 0); +} + +BOOST_AUTO_TEST_CASE(empty_lex) +{ + const char *input = ""; + taihen_config_lexer ctx; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, input), 0); + + // we should expect immediate end of stream + BOOST_REQUIRE_EQUAL(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_END_TOKEN); +} + +BOOST_AUTO_TEST_CASE(reset_lexer) +{ + const char *input = ""; + taihen_config_lexer ctx; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, input), 0); + + // we should expect immediate end of stream + BOOST_WARN_EQUAL(taihen_config_lex(&ctx), 0); + BOOST_WARN_EQUAL(ctx.token, CONFIG_END_TOKEN); + + // reset the lexer + BOOST_REQUIRE_GE(taihen_config_init_lexer(&ctx, input), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_START_TOKEN); +} + +BOOST_AUTO_TEST_CASE(simple_section_lex) +{ + const char *input = "*MY SECTION"; + taihen_config_lexer ctx; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, input), 0); + + // we should expect section token + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_SECTION_TOKEN); + + // then we expect name + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_SECTION_NAME_TOKEN); + + // check name is still "MY SECTION" + BOOST_TEST(ctx.line_pos == "MY SECTION"); + + // then we expect end of stream + BOOST_REQUIRE_EQUAL(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_END_TOKEN); +} + +BOOST_AUTO_TEST_CASE(complex_section_lex) +{ + const char *input = "*!MY SECTION"; + taihen_config_lexer ctx; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, input), 0); + + // we should expect section token + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_SECTION_TOKEN); + + // we should expect section halt token + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_SECTION_HALT_TOKEN); + + // then we expect name + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_SECTION_NAME_TOKEN); + + // check name is still "MY SECTION" + BOOST_TEST(ctx.line_pos == "MY SECTION"); + + // then we expect end of stream + BOOST_REQUIRE_EQUAL(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_END_TOKEN); +} + + +BOOST_AUTO_TEST_CASE(whitespace_lex) +{ + const char *input = "\t\t \t\t"; + taihen_config_lexer ctx; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, input), 0); + + // we should expect comment token + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_COMMENT_TOKEN); + + BOOST_REQUIRE_EQUAL(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_END_TOKEN); +} + +BOOST_AUTO_TEST_CASE(comment_lex) +{ + const char *input = "#this is a comment"; + taihen_config_lexer ctx; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, input), 0); + + // we should expect comment token + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_COMMENT_TOKEN); + + BOOST_REQUIRE_EQUAL(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_END_TOKEN); +} + +BOOST_AUTO_TEST_CASE(path_lex) +{ + const char *input = "this:/is/a/path"; + taihen_config_lexer ctx; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, input), 0); + + // we should expect path token, this isnt valid config syntax + // but its not lexer job to ensure its correct order + // it just tokenises the input + BOOST_REQUIRE_GT(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_PATH_TOKEN); + + BOOST_REQUIRE_EQUAL(taihen_config_lex(&ctx), 0); + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_END_TOKEN); +} + +BOOST_AUTO_TEST_CASE(random_lex) +{ + std::random_device seed; + std::mt19937_64 mt; + std::vector line(255); + taihen_config_lexer ctx; + + // seed mt from random device + mt.seed(seed()); + + for (auto i = 0; i < 100000; ++i) + { + std::generate(line.begin(), std::prev(line.end()), mt); + + line[254] = '\0'; + + BOOST_WARN_GE(taihen_config_init_lexer(&ctx, (char *)(line.data())), 0); + + while(1) + { + int res = taihen_config_lex(&ctx); + + if (res < 0) + { + std::stringstream ss; + + ss << "on generated data: " << std::hex << std::setfill('0'); + + std::for_each(line.begin(), line.end(), [&ss](auto& v) + { + ss << std::setw(2) << static_cast(v); + }); + + ss << std::endl; + + + BOOST_TEST_REQUIRE(res >= 0, ss.str()); + } + + if (res == 0) + { + break; + } + } + } + + BOOST_REQUIRE_EQUAL(ctx.token, CONFIG_END_TOKEN); +} + +BOOST_AUTO_TEST_CASE(long_line_lex) +{ + char line[CONFIG_MAX_LINE_LENGTH+1]; + taihen_config_lexer ctx; + + std::memset(line, 'a', sizeof(line)); + line[CONFIG_MAX_LINE_LENGTH] = '\0'; + + BOOST_REQUIRE_GE(taihen_config_init_lexer(&ctx, line), 0); + BOOST_REQUIRE_LT(taihen_config_lex(&ctx), 0); +} diff --git a/test/test_parser.cpp b/test/test_parser.cpp new file mode 100644 index 0000000..1df51af --- /dev/null +++ b/test/test_parser.cpp @@ -0,0 +1,9 @@ +#include + +#define BOOST_TEST_MODULE parser +#include + +BOOST_AUTO_TEST_CASE(removed_for_now) +{ + +} -- 2.39.5