diff --git a/elixir/filters/__init__.py b/elixir/filters/__init__.py
index b06eae8f..e65e9d08 100755
--- a/elixir/filters/__init__.py
+++ b/elixir/filters/__init__.py
@@ -1,23 +1,51 @@
-from typing import List
-
-from .utils import Filter, FilterContext
-from .projects import project_filters, default_filters
-
-# Returns a list of applicable filters for project_name under provided filter context
-def get_filters(ctx: FilterContext, project_name: str) -> List[Filter]:
-    filter_classes = project_filters.get(project_name, default_filters)
-    filters = []
-
-    for filter_cls in filter_classes:
-        if type(filter_cls) == tuple and len(filter_cls) == 2:
-            cls, kwargs = filter_cls
-            filters.append(cls(**kwargs))
-        elif type(filter_cls) == type:
-            filters.append(filter_cls())
-        else:
-            raise ValueError(f"Invalid filter: {filter_cls}, " \
-                    "should be either a two element tuple or a type. " \
-                    "Make sure project_filters in project.py is valid.")
-
-    return [f for f in filters if f.check_if_applies(ctx)]
+from .ident import IdentFilter
+
+from .cppinc import CppIncFilter
+from .cpppathinc import CppPathIncFilter
+
+from .defconfig import DefConfigIdentsFilter
+from .configin import ConfigInFilter
+
+from .kconfig import KconfigFilter
+from .kconfigidents import KconfigIdentsFilter
+
+from .dtsi import DtsiFilter
+from .dtscompdocs import DtsCompDocsFilter
+from .dtscompcode import DtsCompCodeFilter
+from .dtscompdts import DtsCompDtsFilter
+
+from .makefileo import MakefileOFilter
+from .makefiledtb import MakefileDtbFilter
+from .makefiledir import MakefileDirFilter
+from .makefilesubdir import MakefileSubdirFilter
+from .makefilefile import MakefileFileFilter
+from .makefilesrctree import MakefileSrcTreeFilter
+from .makefilesubdir import MakefileSubdirFilter
+
+
+# List of filters applied to all projects
+default_filters = [
+    DtsCompCodeFilter,
+    DtsCompDtsFilter,
+    DtsCompDocsFilter,
+    IdentFilter,
+    CppIncFilter,
+]
+
+# List of filters for Kconfig files
+common_kconfig_filters = [
+    KconfigFilter,
+    KconfigIdentsFilter,
+    DefConfigIdentsFilter,
+]
+
+# List of filters for Makefiles
+common_makefile_filters = [
+    MakefileOFilter,
+    MakefileDtbFilter,
+    MakefileDirFilter,
+    MakefileFileFilter,
+    MakefileSubdirFilter,
+    MakefileSrcTreeFilter,
+]
 
diff --git a/elixir/lexers/__init__.py b/elixir/lexers/__init__.py
new file mode 100644
index 00000000..f4f3fa32
--- /dev/null
+++ b/elixir/lexers/__init__.py
@@ -0,0 +1,10 @@
+from .lexers import *
+
+default_lexers = {
+    r'.*\.(c|h|cpp|hpp|c++|cxx|cc)': CLexer,
+    r'makefile\..*':  MakefileLexer,
+    r'.*\.dts(i)?': DTSLexer,
+    r'.*\.s': GasLexer,
+    r'kconfig.*': KconfigLexer, #TODO negative lookahead for .rst
+}
+
diff --git a/elixir/lexers/__main__.py b/elixir/lexers/__main__.py
new file mode 100644
index 00000000..676b2ed4
--- /dev/null
+++ b/elixir/lexers/__main__.py
@@ -0,0 +1,35 @@
+if __name__ == "__main__":
+    import sys
+    from . import lexers
+
+    if not (len(sys.argv) == 2 or (len(sys.argv) == 3 and sys.argv[1] == '-s')):
+        print("usage:", sys.argv[0], "[-s]", "path/to/file")
+        exit(1)
+
+    short = sys.argv[1] == '-s'
+
+    filename = sys.argv[-1]
+
+    with open(filename) as f:
+        if filename.endswith(('.c', '.h', '.cpp', '.hpp')):
+            lexer = lexers.CLexer(f.read())
+        elif filename.endswith(('.dts', '.dtsi')):
+            lexer = lexers.DTSLexer(f.read())
+        elif filename.endswith('Kconfig'):
+            lexer = lexers.KconfigLexer(f.read())
+        elif filename.endswith(('.s', '.S')):
+            lexer = lexers.GasLexer(f.read())
+        elif filename.endswith('Makefile'):
+            lexer = lexers.MakefileLexer(f.read())
+        else:
+            raise Exception("no lexer for filetype")
+
+        for token in lexer.lex():
+            if not short:
+                print(token.line, token.token_type.name, token.span, token.token.encode())
+            else:
+                if token.token_type.name == 'IDENTIFIER' or token.token_type.name == 'STRING':
+                    print(f"|{token.token}|", end='')
+                else:
+                    print(token.token, end='')
+
diff --git a/elixir/lexers/lexers.py b/elixir/lexers/lexers.py
new file mode 100644
index 00000000..b470e749
--- /dev/null
+++ b/elixir/lexers/lexers.py
@@ -0,0 +1,395 @@
+import re
+
+from . import shared
+from .utils import TokenType, simple_lexer, FirstInLine, split_by_groups, regex_concat, token_from_string, token_from_match, \
+        regex_or, match_token, Token
+
+# Lexers used to extract possible references from source files
+# Design inspired by Pygments lexers interface
+
+# https://en.cppreference.com/w/c/language
+# https://www.iso-9899.info/wiki/The_Standard
+class CLexer:
+    # NOTE: does not support unicode identifiers
+    c_identifier = r'[a-zA-Z_][a-zA-Z_0-9]*'
+
+    c_punctuation = r'[!#%&`()*+,./:;<=>?\[\]\\^_{|}~-]'
+
+    # NOTE: macros don't always contain C code, but detecting that in pratice is hard
+    # without information about context (where the file is included from).
+    c_punctuation_extra = r'[$\\@]'
+
+    rules = [
+        (shared.whitespace, TokenType.WHITESPACE),
+        (shared.common_slash_comment, TokenType.COMMENT),
+        (shared.common_string_and_char, TokenType.STRING),
+        (shared.c_number, TokenType.NUMBER),
+        (c_identifier, TokenType.IDENTIFIER),
+        (FirstInLine(shared.c_preproc_ignore), TokenType.SPECIAL),
+        (c_punctuation, TokenType.PUNCTUATION),
+        (c_punctuation_extra, TokenType.PUNCTUATION),
+    ]
+
+    def __init__(self, code):
+        self.code = code
+
+    def lex(self, **kwargs):
+        return simple_lexer(self.rules, self.code, **kwargs)
+
+
+# https://www.devicetree.org/specifications/
+class DTSLexer:
+    # TODO handle macros separately
+
+    # NOTE: previous versions would split identifiers by commas (and other special characters),
+    # this changes the old behavior
+
+    # 6.2
+    # technically shall be 1-31 characters long BUT /linux/v6.9.4/source/arch/arm64/boot/dts/qcom/sm8250.dtsi#L3506
+    dts_label = r'[a-zA-Z_][a-zA-Z_0-9]*'
+    # no whitespace between label and ampersand/colon is allowed
+    dts_label_reference = f'(&)({ dts_label })'
+    dts_label_definition = f'({ dts_label })(:)'
+
+    # 2.2.1
+    # same with label lenght, just in case
+    dts_node_name = r'[a-zA-Z0-9,._+-]+'
+    # can contain macro symbols
+    dts_unit_address = r'[a-zA-Z0-9,._+-]*'
+
+    dts_node_name_with_unit_address = f'({ dts_node_name })(@)({ dts_unit_address })' + r'(\s*)({)'
+    dts_node_name_without_unit_address = f'({ dts_node_name })' + r'(\s*)({)'
+
+    # 2.2.4
+    dts_property_name = r'[0-9a-zA-Z,._+?#-]+'
+    dts_property_assignment = f'({ dts_property_name })' + r'(\s*)(=)'
+    dts_property_empty = f'({ dts_property_name })' + r'(\s*)(;)'
+
+    dts_directive = r'/[a-zA-Z0-9-]+/';
+    dts_delete_node = regex_concat(r'/delete-node/\s+', dts_node_name)
+    dts_delete_property = regex_concat(r'/delete-property/\s+', dts_property_name)
+
+    # 6.3
+    dts_node_reference = r'(&)({)([a-zA-Z0-9,._+/@-]+?)(})'
+
+    dts_punctuation = r'[#@:;{}\[\]()^<>=+*/%&\\|~!?,-]'
+    # other, unknown, identifiers - for exmple macros
+    dts_default_identifier = r'[0-9a-zA-Z_]+'
+
+    # Parse DTS node reference, ex: &{/path/to/node@20/test}
+    @staticmethod
+    def parse_dts_node_reference(ctx, match):
+        # &
+        token, ctx = token_from_string(ctx, match.group(1), TokenType.PUNCTUATION)
+        yield token
+
+        # {
+        token, ctx = token_from_string(ctx, match.group(2), TokenType.PUNCTUATION)
+        yield token
+
+        path = match.group(3)
+        path_part_matcher = re.compile(DTSLexer.dts_unit_address)
+        strpos = 0
+
+        while strpos < len(path):
+            if path[strpos] == '@' or path[strpos] == '/':
+                token, ctx = token_from_string(ctx, path[strpos], TokenType.PUNCTUATION)
+                yield token
+                strpos += 1
+            else:
+                part_match = path_part_matcher.match(path, strpos)
+                if part_match is None:
+                    token, _ = token_from_string(ctx, TokenType.ERROR, '')
+                    yield token
+                    return None
+
+                token, ctx = token_from_string(ctx, part_match.group(0), TokenType.IDENTIFIER)
+                yield token
+                strpos += len(part_match.group(0))
+        # }
+        token, ctx = token_from_string(ctx, match.group(4), TokenType.PUNCTUATION)
+        yield token
+
+    rules = [
+        (shared.whitespace, TokenType.WHITESPACE),
+        (shared.common_slash_comment, TokenType.COMMENT),
+        (shared.common_string_and_char, TokenType.STRING),
+        (shared.c_number, TokenType.NUMBER),
+
+        (dts_label_reference, split_by_groups(TokenType.PUNCTUATION, TokenType.IDENTIFIER)),
+        (dts_label_definition, split_by_groups(TokenType.IDENTIFIER, TokenType.PUNCTUATION)),
+        (dts_node_reference, parse_dts_node_reference),
+
+        (dts_property_assignment,
+         split_by_groups(TokenType.IDENTIFIER, TokenType.WHITESPACE, TokenType.PUNCTUATION)),
+        (dts_property_empty,
+         split_by_groups(TokenType.IDENTIFIER, TokenType.WHITESPACE, TokenType.PUNCTUATION)),
+
+        (dts_node_name_with_unit_address,
+         split_by_groups(TokenType.IDENTIFIER, TokenType.PUNCTUATION,
+                    TokenType.IDENTIFIER, TokenType.WHITESPACE, TokenType.PUNCTUATION)),
+        (dts_node_name_without_unit_address,
+         split_by_groups(TokenType.IDENTIFIER, TokenType.WHITESPACE, TokenType.PUNCTUATION)),
+
+        (dts_directive, TokenType.SPECIAL),
+        (dts_delete_node, split_by_groups(TokenType.SPECIAL, TokenType.IDENTIFIER)),
+        (dts_delete_property, split_by_groups(TokenType.SPECIAL, TokenType.IDENTIFIER)),
+        (dts_default_identifier, TokenType.IDENTIFIER),
+        (FirstInLine(shared.c_preproc_ignore), TokenType.SPECIAL),
+        (dts_punctuation, TokenType.PUNCTUATION),
+    ]
+
+    def __init__(self, code):
+        self.code = code
+
+    def lex(self, **kwargs):
+        return simple_lexer(self.rules, self.code, **kwargs)
+
+
+# https://www.kernel.org/doc/html/next/kbuild/kconfig-language.html#kconfig-syntax
+# https://www.kernel.org/doc/html/next/kbuild/kconfig-language.html#kconfig-hints
+
+# TODO better macros calls support
+
+class KconfigLexer:
+    hash_comment = r'#' + shared.singleline_comment_with_escapes_base
+
+    # NOTE pretty much all kconfig identifiers either start uppercase or with a number. this saves us from parsing macro calls
+    kconfig_identifier_starts_with_letters = r'[A-Z_][A-Z0-9a-z_-]*'
+    kconfig_identifier_starts_with_digits = r'[0-9]+[A-Z_a-z-][A-Z0-9a-z_-]*'
+    kconfig_identifier = regex_or(kconfig_identifier_starts_with_letters, kconfig_identifier_starts_with_digits)
+    # other perhaps interesting identifiers
+    kconfig_minor_identifier = r'[a-zA-Z0-9_/][a-zA-Z0-9_/.-]*'
+    kconfig_punctuation = r'[|&!=$()/_.+<>,-]'
+    kconfig_number = f'[0-9]+' # TODO does not handle hex numbers
+
+    # NOTE no identifiers are parsed out of KConfig help texts now, this changes the
+    # old behavior
+    # for example see all instances of USB in /u-boot/v2024.07/source/drivers/usb/Kconfig#L3
+
+    @staticmethod
+    def count_kconfig_help_whitespace(start_whitespace_str):
+        tabs = start_whitespace_str.count('\t')
+        spaces = start_whitespace_str.count(' ')
+        return 8*tabs + spaces + (len(start_whitespace_str)-tabs-spaces)
+
+    @staticmethod
+    def parse_kconfig_help_text(ctx, match):
+        # assumes called with matched help keyword, return the keyword
+        token, ctx = token_from_match(ctx, match, TokenType.SPECIAL)
+        yield token
+
+        # match whitespace after help
+        whitespace_after_help, ctx = match_token(ctx, r'\s*?\n', TokenType.WHITESPACE)
+        if whitespace_after_help is None:
+            # failed to match whitespace and newline after kconfig help - perhaps it's not the right context (macro call for exapmle)
+            return
+        else:
+            yield whitespace_after_help
+
+        line_matcher = re.compile(r'[^\n]*\n', flags=re.MULTILINE|re.UNICODE)
+
+        start_help_text_pos = ctx.pos
+        current_pos = ctx.pos
+        min_whitespace = None
+
+        def collect_tokens(start, end):
+            return Token(TokenType.COMMENT, ctx.code[start:end], (start, end), ctx.line)
+
+        # match first line with whitespace at the beginning
+        while current_pos < len(ctx.code):
+            line = line_matcher.match(ctx.code, current_pos)
+            if line is None:
+                yield collect_tokens(start_help_text_pos, current_pos)
+                return
+
+            token = line.group(0)
+            span = line.span()
+
+            if token == '\n':
+                # just an empty line
+                current_pos = span[1]
+                continue
+            else:
+                start_whitespace = re.match(r'\s*', token)
+                if start_whitespace is None:
+                    # no whitespace at the beginning of the line
+                    yield collect_tokens(start_help_text_pos, current_pos)
+                    return
+                elif min_whitespace is None:
+                    # first nonemtpy line - save amount of whitespace
+                    min_whitespace = KconfigLexer.count_kconfig_help_whitespace(start_whitespace.group(0))
+                    current_pos = span[1]
+                else:
+                    cur_whitespace = KconfigLexer.count_kconfig_help_whitespace(start_whitespace.group(0))
+                    if cur_whitespace < min_whitespace:
+                        yield collect_tokens(start_help_text_pos, current_pos)
+                        return
+                    else:
+                        current_pos = span[1]
+
+        yield collect_tokens(start_help_text_pos, current_pos)
+
+    rules = [
+        (shared.whitespace, TokenType.WHITESPACE),
+        (hash_comment, TokenType.COMMENT),
+        (shared.common_string_and_char, TokenType.STRING),
+        # for whatever reason u-boot kconfigs sometimes use ---help--- instead of help
+        # /u-boot/v2024.07/source/arch/arm/mach-sunxi/Kconfig#L732
+        (FirstInLine('-+help-+'), parse_kconfig_help_text),
+        (kconfig_punctuation, TokenType.PUNCTUATION),
+        (FirstInLine('help'), parse_kconfig_help_text),
+        (kconfig_identifier, TokenType.IDENTIFIER),
+        (kconfig_number, TokenType.NUMBER),
+        (kconfig_minor_identifier, TokenType.SPECIAL),
+        # things that do not match are probably things from a macro call.
+        # unless the syntax changed, or the help parser got confused.
+        # https://www.kernel.org/doc/html/next/kbuild/kconfig-macro-language.html
+        # both shell call and warning/error would require additinal parsing
+        (r'[^\n]+', TokenType.SPECIAL),
+    ]
+
+    def __init__(self, code):
+        self.code = code
+
+    def lex(self):
+        return simple_lexer(self.rules, self.code)
+
+
+# https://sourceware.org/binutils/docs/as.html#Syntax
+class GasLexer:
+    # https://sourceware.org/binutils/docs/as.html#Symbol-Intro
+    # apparently dots are okay, BUT ctags removes the first dot from labels, for example. same with dollars
+    # /musl/v1.2.5/source/src/string/aarch64/memcpy.S#L92
+    gasm_identifier = r'[a-zA-Z0-9_][a-zA-Z0-9_$.]*'
+
+    gasm_flonum = r'0?[a-zA-Z][+-]?([0-9]|\\s*\n\s*)*\.([0-9]|\\s*\n\s*)*([eE][+-]?[0-9]+)?'
+    gasm_number = regex_or(gasm_flonum, shared.common_hexidecimal_integer, shared.common_binary_integer,
+                           shared.common_decimal_integer)
+
+    gasm_char = r"'(\\.|.|\n)"
+    gasm_string = f'(({ shared.double_quote_string_with_escapes })|({ gasm_char }))'
+
+    gasm_comment_chars_map = {
+        'generic': (r'#\s',),
+
+        'nios2': (r'#',),
+        'openrisc': (r'#',),
+        'powerpc': (r'#',),
+        's390': (r'#',),
+        'xtensa': (r'#',),
+        'microblaze': (r'#',),
+        'mips': (r'#',),
+        'alpha': (r'#',),
+        'csky': (r'#',),
+        # BUT double pipe in macros is an operator... and # not in the first line in
+        # /linux/v6.10.7/source/arch/m68k/ifpsp060/src/fplsp.S
+        'm68k': ('|', '^#', r'#\s'),
+        'arc': ('# ', ';'),
+
+        # https://sourceware.org/binutils/docs/as.html#HPPA-Syntax
+        # /linux/v6.10.7/source/arch/parisc/kernel/perf_asm.S#L28
+        'parisc': (';',),
+        'x86': (';',),
+        'tic6x': (';', '*'), # cx6, tms320, although the star is sketchy
+
+        # in below, # can be a comment only if the first character of the line
+
+        # https://sourceware.org/binutils/docs/as.html#SH-Syntax
+        # /linux/v6.10.7/source/arch/sh/kernel/head_32.S#L58
+        'sh': ('!', '^#'),
+        # https://sourceware.org/binutils/docs/as.html#Sparc_002dSyntax
+        # /linux/v6.10.7/source/arch/sparc/lib/memset.S#L125
+        'sparc': ('!', '^#'),
+        # used in ARM https://sourceware.org/binutils/docs/as.html#ARM-Syntax
+        # /linux/v6.10.7/source/arch/arm/mach-sa1100/sleep.S#L33
+        'arm32': ('@', '^#'),
+        'cris': (';', '^#'),
+        'avr': (';', '^#'),
+        # blackfin, tile
+    }
+
+    gasm_punctuation = r'[.,\[\]()<>{}%&+*!|@#$;:^/\\=~-]'
+    # TODO make sure all relevant directives are listed here
+    gasm_preprocessor = r'#[ \t]*(define|ifdef|ifndef|undef|if|else|elif|endif)'
+
+    rules_before_comments = [
+        (shared.whitespace, TokenType.WHITESPACE),
+        # don't interpret macro concatenate as a comment
+        ('##', TokenType.PUNCTUATION),
+        # don't interpret or as a comment
+        (r'\|\|', TokenType.PUNCTUATION),
+        (FirstInLine(regex_or(shared.c_preproc_include, shared.c_preproc_warning_and_error)), TokenType.SPECIAL),
+        (FirstInLine(gasm_preprocessor), TokenType.SPECIAL),
+        (shared.common_slash_comment, TokenType.COMMENT),
+    ]
+
+    rules_after_comments = [
+        (gasm_string, TokenType.STRING),
+        (gasm_number, TokenType.NUMBER),
+        (gasm_identifier, TokenType.IDENTIFIER),
+        (gasm_punctuation, TokenType.PUNCTUATION),
+    ]
+
+    def __init__(self, code, arch='generic'):
+        self.code = code
+        self.comment_chars = self.gasm_comment_chars_map[arch]
+
+    def get_arch_rules(self):
+        result = []
+
+        regex_chars = '*?+^.$\\[]|()'
+        add_slash = lambda ch: '\\' + ch if ch in regex_chars else ch
+
+        for comment_char in self.comment_chars:
+            if comment_char[0] == '^':
+                result.append((
+                    FirstInLine(add_slash(comment_char[1]) + shared.singleline_comment_with_escapes_base),
+                    TokenType.COMMENT
+                ))
+            else:
+                result.append((
+                    add_slash(comment_char) + shared.singleline_comment_with_escapes_base,
+                    TokenType.COMMENT)
+                )
+
+        return result
+
+    def lex(self):
+        rules = self.rules_before_comments + \
+                self.get_arch_rules() + \
+                self.rules_after_comments
+
+        return simple_lexer(rules, self.code)
+
+
+# https://www.gnu.org/software/make/manual/make.html
+class MakefileLexer:
+    # https://pubs.opengroup.org/onlinepubs/007904975/utilities/make.html
+
+    # NOTE same as in KConfig, we only care about screaming case names
+    make_identifier = r'[A-Z0-9_]+'
+    make_minor_identifier = r'[a-zA-Z0-9_][a-zA-Z0-9-_]*'
+    make_variable = r'(\$\([a-zA-Z0-9_-]\)|\$\{[a-zA-Z0-9_-]\})'
+    make_single_quote_string = r"'*?'"
+    make_string = f'(({ make_single_quote_string })|({ shared.double_quote_string_with_escapes }))'
+    make_escape = r'\\[#"\']'
+    make_punctuation = r'[~\\`\[\](){}<>.,:;|%$^@&?!+*/=-]'
+    make_comment = r'(?<!\\)#(\\\s*\n|[^\n])*\n'
+
+    rules = [
+        (shared.whitespace, TokenType.WHITESPACE),
+        (make_escape, TokenType.PUNCTUATION),
+        (make_comment, TokenType.COMMENT),
+        (make_string, TokenType.STRING),
+        (make_identifier, TokenType.IDENTIFIER),
+        (make_minor_identifier, TokenType.SPECIAL),
+        (make_punctuation, TokenType.PUNCTUATION),
+    ]
+
+    def __init__(self, code):
+        self.code = code
+
+    def lex(self):
+        return simple_lexer(self.rules, self.code)
+
diff --git a/elixir/lexers/shared.py b/elixir/lexers/shared.py
new file mode 100644
index 00000000..96625d30
--- /dev/null
+++ b/elixir/lexers/shared.py
@@ -0,0 +1,47 @@
+from .utils import regex_or, regex_concat
+
+# Regexes shared between lexers
+
+whitespace = r'\s+'
+
+# Building block for comments that start with a character and go until the end of the line
+singleline_comment_with_escapes_base = r'(\\\s*\n|[^\n])*\n'
+
+slash_star_multline_comment = r'/\*(.|\s)*?\*/'
+double_slash_singleline_comment = r'//' + singleline_comment_with_escapes_base
+common_slash_comment = regex_or(slash_star_multline_comment, double_slash_singleline_comment)
+
+common_decimal_integer = r'[0-9][0-9\']*'
+common_hexidecimal_integer = r'0[xX][0-9a-fA-F][0-9a-fA-F\']*'
+common_octal_integer = r'0[0-7][0-7\']*'
+common_binary_integer = r'0[bB][01][01\']*'
+
+c_preproc_include = r'#\s*include\s*(<.*?>|".*?")'
+# match warning and error directives with the error string
+c_preproc_warning_and_error = r'#\s*(warning|error)\s(\\\s*\n|[^\n])*\n'
+# match other preprocessor directives, but don't consume the whole line
+c_preproc_other = r'#\s*[a-z]+'
+c_preproc_ignore = regex_or(c_preproc_include, c_preproc_warning_and_error, c_preproc_other)
+
+# \, any amount of whitespace, newline or any character that's not backslash newline or a quote, any escaped character
+double_quote_string_with_escapes = r'"(\\\s*\n|[^\\"\n]|\\(.|\s))*?"'
+single_quote_string_with_escapes = r"'(\\\s*\n|[^\\'\n]|\\(.|\s))*?'"
+
+common_string_and_char = regex_or(double_quote_string_with_escapes, single_quote_string_with_escapes)
+
+c_exponent = r'([eE][+-]?[0-9][0-9\']*)'
+c_hexidecimal_exponent = r'([pP][+-]?[0-9][0-9\']*)'
+
+c_decimal_double_part = r'\.[0-9\']*' + c_exponent + '?'
+c_octal_double_part = r'\.[0-7\']*' + c_exponent + '?'
+c_hexidecimal_double_part = r'\.[0-9a-fA-F\']*' + c_hexidecimal_exponent  + '?'
+
+c_decimal = f'{ common_decimal_integer }({ c_decimal_double_part })?'
+c_hexidecimal = f'{ common_hexidecimal_integer }({ c_hexidecimal_double_part })?'
+c_octal = f'{ common_octal_integer }({ c_octal_double_part  })?'
+
+# not entirely correct... accepts way more than the standard allows
+c_number_suffix = r'([uU]|[lL]|(wb|WB)|[fF]|[zZ]){0,5}'
+
+c_number = regex_concat(regex_or(c_hexidecimal, common_binary_integer, c_decimal, c_octal), c_number_suffix)
+
diff --git a/elixir/lexers/tests/__init__.py b/elixir/lexers/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/elixir/lexers/tests/base.py b/elixir/lexers/tests/base.py
new file mode 100644
index 00000000..e234df33
--- /dev/null
+++ b/elixir/lexers/tests/base.py
@@ -0,0 +1,65 @@
+import unittest
+
+class LexerTest(unittest.TestCase):
+    default_filtered_tokens = ("SPECIAL", "COMMENT", "STRING", "IDENTIFIER", "SPECIAL", "ERROR")
+
+    # Checks if each token starts in the claimed position of code, if tokens cover all code and if no tokens overlap
+    def verify_positions(self, code, tokens):
+        last_token = None
+        for t in tokens:
+            if code[t.span[0]:t.span[1]] != t.token:
+                self.fail(f"token {t} span != code span {code[t.span[0]:t.span[1]].encode()}")
+
+            if last_token is not None and last_token.span[1] != t.span[0]:
+                self.fail(f"token does not start where the previous token ends. prev: {last_token}, next: {t}")
+            elif last_token is None and t.span[0] != 0:
+                self.fail(f"first token does not start at zero: {t}")
+
+            last_token = t
+
+        if last_token.span[1] != len(code):
+            self.fail(f"code is longer than position of the last token: {t}, code len: {len(code)}")
+
+    # Checks if each token is in the claimed line of code
+    def verify_lines(self, code, tokens):
+        lines = [""] + code.split("\n") # zero line is emtpy
+        last_line_number = None
+        last_line_contents_left = None
+        for t in tokens:
+            if last_line_number != t.line:
+                last_line_number = t.line
+                last_line_contents_left = lines[t.line]
+
+            if last_line_contents_left is None:
+                self.fail(f"nothing left in line {t.line} for {t.token} {t}")
+
+            newline_count = t.token.count("\n")
+            all_token_lines = last_line_contents_left + "\n" + \
+                    "\n".join([lines[i] for i in range(t.line+1, t.line+newline_count+1)]) + "\n"
+            token_pos_in_lines = all_token_lines.find(t.token)
+            if token_pos_in_lines == -1:
+                self.fail(f"token {t.token} not found in line {t.line}: {all_token_lines.encode()}")
+            if token_pos_in_lines < len(last_line_contents_left):
+                last_line_contents_left = last_line_contents_left[token_pos_in_lines:]
+            else:
+                last_line_contents_left = None
+
+    # Lex code, do basic soundness checks on tokens (lines and positions) and compare lexing results with a list of tokens
+    def lex(self, code, expected, filtered_tokens=None, lexer_options={}):
+        if filtered_tokens is None:
+            filtered_tokens = self.default_filtered_tokens
+
+        code = code.lstrip()
+        tokens = list(self.lexer_cls(code, **lexer_options).lex())
+        self.verify_positions(code, tokens)
+        self.verify_lines(code, tokens)
+
+        tokens = [[type.name, token] for type, token, span, line in tokens]
+        tokens = [t for t in tokens if t[0] in filtered_tokens]
+        try:
+            self.assertEqual(tokens, expected)
+        except Exception as e:
+            print()
+            for t in tokens: print(t, end=",\n")
+            raise e
+
diff --git a/elixir/lexers/tests/test_c.py b/elixir/lexers/tests/test_c.py
new file mode 100644
index 00000000..ffd48cee
--- /dev/null
+++ b/elixir/lexers/tests/test_c.py
@@ -0,0 +1,567 @@
+from ..lexers import CLexer
+from .base import LexerTest
+
+class CLexerTest(LexerTest):
+    lexer_cls = CLexer
+    default_filtered_tokens = ("SPECIAL", "COMMENT", "STRING", "IDENTIFIER", "SPECIAL", "ERROR")
+
+    def test_if0(self):
+        self.lex(r"""
+#if 0
+static bool test_v3_0_test(void *h,
+                    enum type_enum e) {
+    return false;
+}
+#endif
+static bool test_v3_0_test(void *h,
+                    enum type_enum e) {
+    return false;
+}
+""", [
+        ['SPECIAL', '#if'],
+        ['NUMBER', '0'],
+        ['IDENTIFIER', 'static'],
+        ['IDENTIFIER', 'bool'],
+        ['IDENTIFIER', 'test_v3_0_test'],
+        ['IDENTIFIER', 'void'],
+        ['IDENTIFIER', 'h'],
+        ['IDENTIFIER', 'enum'],
+        ['IDENTIFIER', 'type_enum'],
+        ['IDENTIFIER', 'e'],
+        ['IDENTIFIER', 'return'],
+        ['IDENTIFIER', 'false'],
+        ['SPECIAL', '#endif'],
+        ['IDENTIFIER', 'static'],
+        ['IDENTIFIER', 'bool'],
+        ['IDENTIFIER', 'test_v3_0_test'],
+        ['IDENTIFIER', 'void'],
+        ['IDENTIFIER', 'h'],
+        ['IDENTIFIER', 'enum'],
+        ['IDENTIFIER', 'type_enum'],
+        ['IDENTIFIER', 'e'],
+        ['IDENTIFIER', 'return'],
+        ['IDENTIFIER', 'false'],
+    ], self.default_filtered_tokens + ("NUMBER",))
+
+    def test_preproc(self):
+        self.lex(r"""
+#include <stdio.h>
+#   include <stdio.h>
+# include "test.h"
+#   include "test.h"
+
+# warning war
+#       error err
+    #       error err
+    #warning war
+
+#error "escaped\
+        message"
+
+#warning "escaped\  
+        message"
+
+#  if defined(TEST)
+#   elif defined(TEST2)
+#else
+""", [
+        ['SPECIAL', '#include <stdio.h>'],
+        ['SPECIAL', '#   include <stdio.h>'],
+        ['SPECIAL', '# include "test.h"'],
+        ['SPECIAL', '#   include "test.h"'],
+        ['SPECIAL', '# warning war\n'],
+        ['SPECIAL', '#       error err\n'],
+        ['SPECIAL', '#       error err\n'],
+        ['SPECIAL', '#warning war\n'],
+        ['SPECIAL', '#error "escaped\\\n        message"\n'],
+        ['SPECIAL', '#warning "escaped\\  \n        message"\n'],
+        ['SPECIAL', '#  if'],
+        ['IDENTIFIER', 'defined'],
+        ['IDENTIFIER', 'TEST'],
+        ['SPECIAL', '#   elif'],
+        ['IDENTIFIER', 'defined'],
+        ['IDENTIFIER', 'TEST2'],
+        ['SPECIAL', '#else'],
+    ])
+
+    def test_defines(self):
+        self.lex("""
+# define test "long string \
+    escaped newline"
+
+    #define     test define1
+#       define     test2 define12323
+
+#define func(name, arg1,arg2...) \
+    void name##f() { \
+        return arg1 + arg2;
+    }
+""", [
+        ['SPECIAL', '# define'],
+        ['IDENTIFIER', 'test'],
+        ['STRING', '"long string     escaped newline"'],
+        ['SPECIAL', '#define'],
+        ['IDENTIFIER', 'test'],
+        ['IDENTIFIER', 'define1'],
+        ['SPECIAL', '#       define'],
+        ['IDENTIFIER', 'test2'],
+        ['IDENTIFIER', 'define12323'],
+        ['SPECIAL', '#define'],
+        ['IDENTIFIER', 'func'],
+        ['IDENTIFIER', 'name'],
+        ['IDENTIFIER', 'arg1'],
+        ['IDENTIFIER', 'arg2'],
+        ['IDENTIFIER', 'void'],
+        ['IDENTIFIER', 'name'],
+        ['IDENTIFIER', 'f'],
+        ['IDENTIFIER', 'return'],
+        ['IDENTIFIER', 'arg1'],
+        ['IDENTIFIER', 'arg2'],
+    ])
+
+    def test_strings(self):
+        self.lex(r"""
+"asdsad \   
+    asdasd";
+'asdsad \
+    asdasd';
+u8"test string";
+u"test string";
+u"test string";
+L"test string";
+"test \" string";
+"test ' string";
+"test \' string";
+"test \n string";
+"\xff";
+"test" "string";
+"test""string";
+"test"
+""", [
+        ['STRING', '"asdsad \\   \n    asdasd"'],
+        ['STRING', "'asdsad \\\n    asdasd'"],
+        ['IDENTIFIER', 'u8'],
+        ['STRING', '"test string"'],
+        ['IDENTIFIER', 'u'],
+        ['STRING', '"test string"'],
+        ['IDENTIFIER', 'u'],
+        ['STRING', '"test string"'],
+        ['IDENTIFIER', 'L'],
+        ['STRING', '"test string"'],
+        ['STRING', '"test \\" string"'],
+        ['STRING', '"test \' string"'],
+        ['STRING', '"test \\\' string"'],
+        ['STRING', '"test \\n string"'],
+        ['STRING', '"\\xff"'],
+        ['STRING', '"test"'],
+        ['STRING', '"string"'],
+        ['STRING', '"test"'],
+        ['STRING', '"string"'],
+        ['STRING', '"test"'],
+    ])
+
+    def test_strings2(self):
+        self.lex(r"""
+    "string";
+        char* s1 = "asdjlsajdlksad""asdsajdlsad";       //comment6
+    char* s2 = "asdjlsajdlksad"  "asdsajdlsad";         // \
+                                                        single line comment \
+        with escapes
+    char* s3 = " asdsaldjkas \"";
+    char* s4 = " asdsaldjkas \" zxclzxclk \" asljda";
+    char* s5 = " asdsaldjkas \' zxclzxclk \" asljda";
+    char* s6 = " asdsaldjkas \"\"\" zxclzxclk \'\'\' ; asljda";
+    char* s7 = u8"test";
+""", [
+        ['STRING', '"string"'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's1'],
+        ['STRING', '"asdjlsajdlksad"'],
+        ['STRING', '"asdsajdlsad"'],
+        ['COMMENT', '//comment6\n'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's2'],
+        ['STRING', '"asdjlsajdlksad"'],
+        ['STRING', '"asdsajdlsad"'],
+        ['COMMENT', '// \\\n                                                        single line comment \\\n        with escapes\n'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's3'],
+        ['STRING', '" asdsaldjkas \\""'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's4'],
+        ['STRING', '" asdsaldjkas \\" zxclzxclk \\" asljda"'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's5'],
+        ['STRING', '" asdsaldjkas \\\' zxclzxclk \\" asljda"'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's6'],
+        ['STRING', '" asdsaldjkas \\"\\"\\" zxclzxclk \\\'\\\'\\\' ; asljda"'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's7'],
+        ['IDENTIFIER', 'u8'],
+        ['STRING', '"test"'],
+    ])
+
+    def test_chars(self):
+        self.lex(r"""
+'a';
+u8'a';
+u'a';
+U'a';
+'\'';
+'\"';
+'\\';
+'\n';
+'\f';
+'\U0001f34c';
+'\13';
+'\x1234';
+'\u213';
+u'ą';
+""", [
+        ['STRING', "'a'"],
+        ['IDENTIFIER', 'u8'],
+        ['STRING', "'a'"],
+        ['IDENTIFIER', 'u'],
+        ['STRING', "'a'"],
+        ['IDENTIFIER', 'U'],
+        ['STRING', "'a'"],
+        ['STRING', "'\\''"],
+        ['STRING', '\'\\"\''],
+        ['STRING', "'\\\\'"],
+        ['STRING', "'\\n'"],
+        ['STRING', "'\\f'"],
+        ['STRING', "'\\U0001f34c'"],
+        ['STRING', "'\\13'"],
+        ['STRING', "'\\x1234'"],
+        ['STRING', "'\\u213'"],
+        ['IDENTIFIER', 'u'],
+        ['STRING', "'ą'"],
+    ])
+
+    def test_numbers(self):
+        self.lex(r"""
+1239183;
+-1239183;
+0xAB08902;
+-0xAB08902;
+0Xab08902;
+-0Xab08902;
+0b0101001;
+-0b0101001;
+0B0101001;
+-0B0101001;
+0231273;
+-0231273;
+""", [
+        ['NUMBER', '1239183'],
+        ['NUMBER', '1239183'],
+        ['NUMBER', '0xAB08902'],
+        ['NUMBER', '0xAB08902'],
+        ['NUMBER', '0Xab08902'],
+        ['NUMBER', '0Xab08902'],
+        ['NUMBER', '0b0101001'],
+        ['NUMBER', '0b0101001'],
+        ['NUMBER', '0B0101001'],
+        ['NUMBER', '0B0101001'],
+        ['NUMBER', '0231273'],
+        ['NUMBER', '0231273'],
+    ], self.default_filtered_tokens + ("NUMBER",))
+
+    def test_floats(self):
+        self.lex(r"""
+double       e = 0x2ABDEFabcdef;
+double
+    f = 017.048509495;
+double     -g = 0b1010010;
+double     g = 0b1010010;
+-017.048509495;
+017.048509495;
+-017.048509495e-12329123;
+017.048509495e-12329123;
+-0x123.fp34;
+0x123.fp34;
+-0x123.fP34;
+0x123.fP34;
+-0x123.fe1p123;
+0x123.fe1p123;
+-0x123.fe1p123;
+0x123.fe1p123;
+-.1;
+.1;
+-1.;
+1.;
+-0x1.ep+3;
+0x1.ep+3;
+-0X183083;
+0X183083;
+-0x213213.1231212'31e21p-2;
+0x213213.1231212'31e21p-2;
+-123123.123e2;
+123123.123e2;
+""", [
+        ['IDENTIFIER', 'double'],
+        ['IDENTIFIER', 'e'],
+        ['NUMBER', '0x2ABDEFabcdef'],
+        ['IDENTIFIER', 'double'],
+        ['IDENTIFIER', 'f'],
+        ['NUMBER', '017.048509495'],
+        ['IDENTIFIER', 'double'],
+        ['IDENTIFIER', 'g'],
+        ['NUMBER', '0b1010010'],
+        ['IDENTIFIER', 'double'],
+        ['IDENTIFIER', 'g'],
+        ['NUMBER', '0b1010010'],
+        ['NUMBER', '017.048509495'],
+        ['NUMBER', '017.048509495'],
+        ['NUMBER', '017.048509495e-12329123'],
+        ['NUMBER', '017.048509495e-12329123'],
+        ['NUMBER', '0x123.fp34'],
+        ['NUMBER', '0x123.fp34'],
+        ['NUMBER', '0x123.fP34'],
+        ['NUMBER', '0x123.fP34'],
+        ['NUMBER', '0x123.fe1p123'],
+        ['NUMBER', '0x123.fe1p123'],
+        ['NUMBER', '0x123.fe1p123'],
+        ['NUMBER', '0x123.fe1p123'],
+        ['NUMBER', '1'],
+        ['NUMBER', '1'],
+        ['NUMBER', '1.'],
+        ['NUMBER', '1.'],
+        ['NUMBER', '0x1.ep+3'],
+        ['NUMBER', '0x1.ep+3'],
+        ['NUMBER', '0X183083'],
+        ['NUMBER', '0X183083'],
+        ['NUMBER', "0x213213.1231212'31e21p-2"],
+        ['NUMBER', "0x213213.1231212'31e21p-2"],
+        ['NUMBER', '123123.123e2'],
+        ['NUMBER', '123123.123e2'],
+    ], self.default_filtered_tokens + ("NUMBER",))
+
+    def test_longs(self):
+        self.lex(r"""
+-123213092183ul;
+123213092183ul;
+-123213092183ull;
+123213092183ull;
+-123213092183llu;
+123213092183llu;
+-123213092183uLL;
+123213092183uLL;
+-123213092183LLU;
+123213092183LLU;
+-1232'13092183LLU;
+1232'13092183LLU;
+-1232'1309'2183LLU;
+1232'1309'2183LLU;
+-1232'1309'218'3LLU;
+1232'1309'218'3LLU;
+""", [
+        ['NUMBER', '123213092183ul'],
+        ['NUMBER', '123213092183ul'],
+        ['NUMBER', '123213092183ull'],
+        ['NUMBER', '123213092183ull'],
+        ['NUMBER', '123213092183llu'],
+        ['NUMBER', '123213092183llu'],
+        ['NUMBER', '123213092183uLL'],
+        ['NUMBER', '123213092183uLL'],
+        ['NUMBER', '123213092183LLU'],
+        ['NUMBER', '123213092183LLU'],
+        ['NUMBER', "1232'13092183LLU"],
+        ['NUMBER', "1232'13092183LLU"],
+        ['NUMBER', "1232'1309'2183LLU"],
+        ['NUMBER', "1232'1309'2183LLU"],
+        ['NUMBER', "1232'1309'218'3LLU"],
+        ['NUMBER', "1232'1309'218'3LLU"],
+    ], self.default_filtered_tokens + ("NUMBER",))
+
+    def test_comments(self):
+        self.lex(r"""
+    /*comment1*/
+    /* comment2*/
+    /* comment3 */
+    /*
+     *
+        comment4
+    _+}{|":?><~!@#$%&*()_+`123567890-=[];'\,./
+     * */
+
+    /* comment 5 \*\// */
+
+// comment5
+char* s2 = "asdjlsajdlksad"  "asdsajdlsad";         // \
+                                   single line comment \
+        with escapes
+char statement;
+""", [
+        ['COMMENT', '/*comment1*/'],
+        ['COMMENT', '/* comment2*/'],
+        ['COMMENT', '/* comment3 */'],
+        ['COMMENT', '/*\n     *\n        comment4\n    _+}{|":?><~!@#$%&*()_+`123567890-=[];\'\\,./\n     * */'],
+        ['COMMENT', '/* comment 5 \\*\\// */'],
+        ['COMMENT', '// comment5\n'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 's2'],
+        ['STRING', '"asdjlsajdlksad"'],
+        ['STRING', '"asdsajdlsad"'],
+        ['COMMENT', '// \\\n                                   single line comment \\\n        with escapes\n'],
+        ['IDENTIFIER', 'char'],
+        ['IDENTIFIER', 'statement'],
+    ])
+
+    # https://en.cppreference.com/w/cpp/language/pack_indexing
+    def test_cpp_templates(self):
+        self.lex(r"""
+template<typename... Ts>
+constexpr auto f(Ts&&... ts) {
+    return sizeof...(Ts);
+}
+
+template<typename T, T::t t = 0>
+int f() {
+    std::cout << t << std::endl;
+    ns1::ns2::type v;
+    ns1::ns2::type2<int> v2;
+    ns1::ns2::type3<int, double> v3;
+}
+""", [
+        ['IDENTIFIER', 'template'],
+        ['IDENTIFIER', 'typename'],
+        ['IDENTIFIER', 'Ts'],
+        ['IDENTIFIER', 'constexpr'],
+        ['IDENTIFIER', 'auto'],
+        ['IDENTIFIER', 'f'],
+        ['IDENTIFIER', 'Ts'],
+        ['IDENTIFIER', 'ts'],
+        ['IDENTIFIER', 'return'],
+        ['IDENTIFIER', 'sizeof'],
+        ['IDENTIFIER', 'Ts'],
+        ['IDENTIFIER', 'template'],
+        ['IDENTIFIER', 'typename'],
+        ['IDENTIFIER', 'T'],
+        ['IDENTIFIER', 'T'],
+        ['IDENTIFIER', 't'],
+        ['IDENTIFIER', 't'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'f'],
+        ['IDENTIFIER', 'std'],
+        ['IDENTIFIER', 'cout'],
+        ['IDENTIFIER', 't'],
+        ['IDENTIFIER', 'std'],
+        ['IDENTIFIER', 'endl'],
+        ['IDENTIFIER', 'ns1'],
+        ['IDENTIFIER', 'ns2'],
+        ['IDENTIFIER', 'type'],
+        ['IDENTIFIER', 'v'],
+        ['IDENTIFIER', 'ns1'],
+        ['IDENTIFIER', 'ns2'],
+        ['IDENTIFIER', 'type2'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'v2'],
+        ['IDENTIFIER', 'ns1'],
+        ['IDENTIFIER', 'ns2'],
+        ['IDENTIFIER', 'type3'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'double'],
+        ['IDENTIFIER', 'v3'],
+    ])
+
+    # https://en.cppreference.com/w/cpp/language/requires
+    def test_cpp_concepts(self):
+        self.lex(r"""
+template<typename T>
+concept C = requires(T x) {
+    {x.count()} -> std::same_as<int>;
+    requires Same<T*, decltype(&x)>
+};
+""", [
+        ['IDENTIFIER', 'template'],
+        ['IDENTIFIER', 'typename'],
+        ['IDENTIFIER', 'T'],
+        ['IDENTIFIER', 'concept'],
+        ['IDENTIFIER', 'C'],
+        ['IDENTIFIER', 'requires'],
+        ['IDENTIFIER', 'T'],
+        ['IDENTIFIER', 'x'],
+        ['IDENTIFIER', 'x'],
+        ['IDENTIFIER', 'count'],
+        ['IDENTIFIER', 'std'],
+        ['IDENTIFIER', 'same_as'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'requires'],
+        ['IDENTIFIER', 'Same'],
+        ['IDENTIFIER', 'T'],
+        ['IDENTIFIER', 'decltype'],
+        ['IDENTIFIER', 'x'],
+    ])
+
+    def test_cpp_class(self):
+        self.lex(r"""
+using namespace std;
+
+auto f() -> std::string;
+
+class test {
+public:
+    int operator ""_tx(int);
+    int a = 123_tx;
+};
+""", [
+        ['IDENTIFIER', 'using'],
+        ['IDENTIFIER', 'namespace'],
+        ['IDENTIFIER', 'std'],
+        ['IDENTIFIER', 'auto'],
+        ['IDENTIFIER', 'f'],
+        ['IDENTIFIER', 'std'],
+        ['IDENTIFIER', 'string'],
+        ['IDENTIFIER', 'class'],
+        ['IDENTIFIER', 'test'],
+        ['IDENTIFIER', 'public'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'operator'],
+        ['STRING', '""'],
+        ['IDENTIFIER', '_tx'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'a'],
+        ['IDENTIFIER', '_tx'],
+    ])
+
+    def test_cpp_attrs(self):
+        self.lex(r"""
+[[using test: atr1]] [[atr2]]
+int f[[atr3]]();
+""", [
+        ['IDENTIFIER', 'using'],
+        ['IDENTIFIER', 'test'],
+        ['IDENTIFIER', 'atr1'],
+        ['IDENTIFIER', 'atr2'],
+        ['IDENTIFIER', 'int'],
+        ['IDENTIFIER', 'f'],
+        ['IDENTIFIER', 'atr3'],
+    ])
+
+    # https://en.cppreference.com/w/cpp/language/noexcept_spec
+    def test_cpp_noexpect(self):
+        self.lex(r"""
+void f() noexpect(true) {}
+""", [
+        ['IDENTIFIER', 'void'],
+        ['IDENTIFIER', 'f'],
+        ['IDENTIFIER', 'noexpect'],
+        ['IDENTIFIER', 'true'],
+    ])
+
+    # https://en.cppreference.com/w/cpp/language/coroutines
+    def test_cpp_coroutines(self):
+        self.lex(r"""
+task<> test() {
+    co_await test2();
+}
+""", [
+        ['IDENTIFIER', 'task'],
+        ['IDENTIFIER', 'test'],
+        ['IDENTIFIER', 'co_await'],
+        ['IDENTIFIER', 'test2'],
+    ])
+
diff --git a/elixir/lexers/tests/test_dts.py b/elixir/lexers/tests/test_dts.py
new file mode 100644
index 00000000..72f39d7f
--- /dev/null
+++ b/elixir/lexers/tests/test_dts.py
@@ -0,0 +1,271 @@
+from ..lexers import DTSLexer
+from .base import LexerTest
+
+class DTSLexerTests(LexerTest):
+    lexer_cls = DTSLexer
+    default_filtered_tokens = ("SPECIAL", "COMMENT", "STRING", "IDENTIFIER", "SPECIAL", "ERROR")
+
+    def test_preproc(self):
+        self.lex(r"""
+#include <file.dtsi>
+#include "file2.dtsi"
+#error error message asldjlksajdlksad
+#warning   warning message alsjdlkasjdlksajd
+#define MACRO(arg) \
+        arg = <3>;
+#if 0
+/ {
+    property = <2>;
+    MACRO(test)
+};
+#endif
+""", [
+        ['SPECIAL', '#include <file.dtsi>'],
+        ['SPECIAL', '#include "file2.dtsi"'],
+        ['SPECIAL', '#error error message asldjlksajdlksad\n'],
+        ['SPECIAL', '#warning   warning message alsjdlkasjdlksajd\n'],
+        ['SPECIAL', '#define'],
+        ['IDENTIFIER', 'MACRO'],
+        ['IDENTIFIER', 'arg'],
+        ['IDENTIFIER', 'arg'],
+        ['SPECIAL', '#if'],
+        ['IDENTIFIER', 'property'],
+        ['IDENTIFIER', 'MACRO'],
+        ['IDENTIFIER', 'test'],
+        ['SPECIAL', '#endif'],
+    ])
+
+    def test_dts_directives(self):
+        self.lex(r"""
+/include/ "file.dtsi"
+/dts-v1/;
+/memreserve/ 0x100 0x2;
+/ {
+    test_label: test-node {
+        test-prop2 = <3>;
+    };
+    test-prop = <2>;
+    /delete-node/ test-node;
+    /delete-node/ &test_label;
+    /delete-property/ test-prop;
+};
+""", [
+        ['SPECIAL', '/include/'],
+        ['STRING', '"file.dtsi"'],
+        ['SPECIAL', '/dts-v1/'],
+        ['SPECIAL', '/memreserve/'],
+        ['IDENTIFIER', 'test_label'],
+        ['IDENTIFIER', 'test-node'],
+        ['IDENTIFIER', 'test-prop2'],
+        ['IDENTIFIER', 'test-prop'],
+        ['SPECIAL', '/delete-node/'],
+        ['IDENTIFIER', 'test-node'],
+        ['SPECIAL', '/delete-node/'],
+        ['IDENTIFIER', 'test_label'],
+        ['SPECIAL', '/delete-property/'],
+        ['IDENTIFIER', 'test-prop'],
+    ])
+
+    def test_dts_unusual_identifiers(self):
+        self.lex(r"""
+/ {
+    _test_label:        5id,test._+asd-2           {
+        property,name = <2>;
+        0p,r.o_p+e?r#t-y,name = [1,2,3];
+        way_too_long_label_123219380921830218309218309213    :  node@234 {
+            compatible = "asd,zxc";
+        }
+        test  =   <&way_too_long_label_123219380921830218309218309213>;
+    };
+};
+""", [
+        ['IDENTIFIER', '_test_label'],
+        ['IDENTIFIER', 'id,test._+asd-2'],
+        ['IDENTIFIER', 'property,name'],
+        ['IDENTIFIER', 'p,r.o_p+e?r#t-y,name'],
+        ['IDENTIFIER', 'way_too_long_label_123219380921830218309218309213'],
+        ['IDENTIFIER', 'node'],
+        ['IDENTIFIER', '234'],
+        ['IDENTIFIER', 'compatible'],
+        ['STRING', '"asd,zxc"'],
+        ['IDENTIFIER', 'test'],
+        ['IDENTIFIER', 'way_too_long_label_123219380921830218309218309213'],
+    ])
+
+    def test_non_numeric_unit_address(self):
+        self.lex(r"""
+/ {
+    test: node@test_address {
+    };
+    test2: node@MACRO_ADDRESS(123) {
+    };
+};
+""", [
+        ['IDENTIFIER', 'test'],
+        ['IDENTIFIER', 'node'],
+        ['IDENTIFIER', 'test_address'],
+        ['IDENTIFIER', 'test2'],
+        ['IDENTIFIER', 'node'],
+        ['IDENTIFIER', 'MACRO_ADDRESS'],
+    ])
+
+    def test_values_with_labels(self):
+        self.lex(r"""
+/ {
+    prop1 = label1: <0 label2: 0x21323>;
+    prop2 = [1 2 3 label3: 4];
+    prop3 = label4: "val" label5: ;
+};
+""", [
+        ['PUNCTUATION', '/'],
+        ['PUNCTUATION', '{'],
+        ['IDENTIFIER', 'prop1'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'label1'],
+        ['PUNCTUATION', ':'],
+        ['PUNCTUATION', '<'],
+        ['NUMBER', '0'],
+        ['IDENTIFIER', 'label2'],
+        ['PUNCTUATION', ':'],
+        ['NUMBER', '0x21323'],
+        ['PUNCTUATION', '>'],
+        ['PUNCTUATION', ';'],
+        ['IDENTIFIER', 'prop2'],
+        ['PUNCTUATION', '='],
+        ['PUNCTUATION', '['],
+        ['NUMBER', '1'],
+        ['NUMBER', '2'],
+        ['NUMBER', '3'],
+        ['IDENTIFIER', 'label3'],
+        ['PUNCTUATION', ':'],
+        ['NUMBER', '4'],
+        ['PUNCTUATION', ']'],
+        ['PUNCTUATION', ';'],
+        ['IDENTIFIER', 'prop3'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'label4'],
+        ['PUNCTUATION', ':'],
+        ['STRING', '"val"'],
+        ['IDENTIFIER', 'label5'],
+        ['PUNCTUATION', ':'],
+        ['PUNCTUATION', ';'],
+        ['PUNCTUATION', '}'],
+        ['PUNCTUATION', ';'],
+    ], self.default_filtered_tokens + ('PUNCTUATION', 'NUMBER'))
+
+    def test_references(self):
+        self.lex(r"""
+/ {
+    interrupt-parent = < &{/node@c2342/another_node@address(2)/node3} >;
+    property2 = <&{/node@c2342/another_node@address(2)}>;
+    power-domains = <&power DEVICE_DOMAIN>;
+};
+""", [
+        ['IDENTIFIER', 'interrupt-parent'],
+        ['IDENTIFIER', 'node'],
+        ['IDENTIFIER', 'c2342'],
+        ['IDENTIFIER', 'another_node'],
+        ['IDENTIFIER', 'address'],
+        ['IDENTIFIER', 'node3'],
+        ['IDENTIFIER', 'property2'],
+        ['IDENTIFIER', 'node'],
+        ['IDENTIFIER', 'c2342'],
+        ['IDENTIFIER', 'another_node'],
+        ['IDENTIFIER', 'address'],
+        ['IDENTIFIER', 'power-domains'],
+        ['IDENTIFIER', 'power'],
+        ['IDENTIFIER', 'DEVICE_DOMAIN'],
+    ])
+
+    def test_property_types(self):
+        self.lex(r"""
+/ {
+    prop1 = <0 0x21323>;
+    prop2 = [1 2 3 4];
+    prop3 = "val", "val4" ;
+    prop4 = <~1+2-3*4/5%6&7|8^9<<10>>11>;
+    prop5;
+};
+""", [
+        ['PUNCTUATION', '/'],
+        ['PUNCTUATION', '{'],
+        ['IDENTIFIER', 'prop1'],
+        ['PUNCTUATION', '='],
+        ['PUNCTUATION', '<'],
+        ['NUMBER', '0'],
+        ['NUMBER', '0x21323'],
+        ['PUNCTUATION', '>'],
+        ['PUNCTUATION', ';'],
+        ['IDENTIFIER', 'prop2'],
+        ['PUNCTUATION', '='],
+        ['PUNCTUATION', '['],
+        ['NUMBER', '1'],
+        ['NUMBER', '2'],
+        ['NUMBER', '3'],
+        ['NUMBER', '4'],
+        ['PUNCTUATION', ']'],
+        ['PUNCTUATION', ';'],
+        ['IDENTIFIER', 'prop3'],
+        ['PUNCTUATION', '='],
+        ['STRING', '"val"'],
+        ['PUNCTUATION', ','],
+        ['STRING', '"val4"'],
+        ['PUNCTUATION', ';'],
+        ['IDENTIFIER', 'prop4'],
+        ['PUNCTUATION', '='],
+        ['PUNCTUATION', '<'],
+        ['PUNCTUATION', '~'],
+        ['NUMBER', '1'],
+        ['PUNCTUATION', '+'],
+        ['NUMBER', '2'],
+        ['PUNCTUATION', '-'],
+        ['NUMBER', '3'],
+        ['PUNCTUATION', '*'],
+        ['NUMBER', '4'],
+        ['PUNCTUATION', '/'],
+        ['NUMBER', '5'],
+        ['PUNCTUATION', '%'],
+        ['NUMBER', '6'],
+        ['PUNCTUATION', '&'],
+        ['NUMBER', '7'],
+        ['PUNCTUATION', '|'],
+        ['NUMBER', '8'],
+        ['PUNCTUATION', '^'],
+        ['NUMBER', '9'],
+        ['PUNCTUATION', '<'],
+        ['PUNCTUATION', '<'],
+        ['NUMBER', '10'],
+        ['PUNCTUATION', '>'],
+        ['PUNCTUATION', '>'],
+        ['NUMBER', '11'],
+        ['PUNCTUATION', '>'],
+        ['PUNCTUATION', ';'],
+        ['IDENTIFIER', 'prop5'],
+        ['PUNCTUATION', ';'],
+        ['PUNCTUATION', '}'],
+        ['PUNCTUATION', ';'],
+    ], self.default_filtered_tokens + ('PUNCTUATION', 'NUMBER'))
+
+    def test_comments(self):
+        self.lex(r"""
+//license info
+/ {
+    interrupts = <NAME 100 TYPE>, /* comment 1 */
+        <NAME 101 TYPE>; // comemnt2
+    /* long
+    * coment
+    * asdasd
+    */
+};
+""", [
+        ['COMMENT', '//license info\n'],
+        ['IDENTIFIER', 'interrupts'],
+        ['IDENTIFIER', 'NAME'],
+        ['IDENTIFIER', 'TYPE'],
+        ['COMMENT', '/* comment 1 */'],
+        ['IDENTIFIER', 'NAME'],
+        ['IDENTIFIER', 'TYPE'],
+        ['COMMENT', '// comemnt2\n'],
+        ['COMMENT', '/* long\n    * coment\n    * asdasd\n    */'],
+    ], self.default_filtered_tokens)
+
diff --git a/elixir/lexers/tests/test_gas.py b/elixir/lexers/tests/test_gas.py
new file mode 100644
index 00000000..3c541f22
--- /dev/null
+++ b/elixir/lexers/tests/test_gas.py
@@ -0,0 +1,282 @@
+from ..lexers import GasLexer
+from .base import LexerTest
+
+class GasLexerTest(LexerTest):
+    lexer_cls = GasLexer
+    default_filtered_tokens = ("SPECIAL", "COMMENT", "STRING", "IDENTIFIER", "SPECIAL", "ERROR")
+
+    def test_comments_m68k(self):
+        self.lex(r"""
+# comment 1
+#comment 2
+    clrl d1 | comment 3
+    clrl d0 |comment 4
+| comment 4
+
+    clrl d2 # comment 3
+
+#if defined(C1) || !defined(C2)
+	addql #4,%sp
+label:
+	movel	#-IDNENT,%sp@(IDENT)| comment 5
+// /linux/v6.10.7/source/arch/m68k/ifpsp060/src/fplsp.S
+    test # comment 6
+# endif
+
+#define macro(x) inst &IDENT,%pc@(ident); inst x
+""", [
+        ['COMMENT', '# comment 1\n'],
+        ['COMMENT', '#comment 2\n'],
+        ['IDENTIFIER', 'clrl'],
+        ['IDENTIFIER', 'd1'],
+        ['COMMENT', '| comment 3\n'],
+        ['IDENTIFIER', 'clrl'],
+        ['IDENTIFIER', 'd0'],
+        ['COMMENT', '|comment 4\n'],
+        ['COMMENT', '| comment 4\n'],
+        ['IDENTIFIER', 'clrl'],
+        ['IDENTIFIER', 'd2'],
+        ['COMMENT', '# comment 3\n'],
+        ['SPECIAL', '#if'],
+        ['IDENTIFIER', 'defined'],
+        ['IDENTIFIER', 'C1'],
+        ['IDENTIFIER', 'defined'],
+        ['IDENTIFIER', 'C2'],
+        ['IDENTIFIER', 'addql'],
+        ['IDENTIFIER', 'sp'],
+        ['IDENTIFIER', 'label'],
+        ['IDENTIFIER', 'movel'],
+        ['IDENTIFIER', 'IDNENT'],
+        ['IDENTIFIER', 'sp'],
+        ['IDENTIFIER', 'IDENT'],
+        ['COMMENT', '| comment 5\n'],
+        ['COMMENT', '// /linux/v6.10.7/source/arch/m68k/ifpsp060/src/fplsp.S\n'],
+        ['IDENTIFIER', 'test'],
+        ['COMMENT', '# comment 6\n'],
+        ['SPECIAL', '# endif'],
+        ['SPECIAL', '#define'],
+        ['IDENTIFIER', 'macro'],
+        ['IDENTIFIER', 'x'],
+        ['IDENTIFIER', 'inst'],
+        ['IDENTIFIER', 'IDENT'],
+        ['IDENTIFIER', 'pc'],
+        ['IDENTIFIER', 'ident'],
+        ['IDENTIFIER', 'inst'],
+        ['IDENTIFIER', 'x'],
+    ], lexer_options={"arch": "m68k"})
+
+    def test_comments_sparc(self):
+        self.lex(r"""
+#define F(i) 		\
+	.type	i,@function;
+
+    std	t1, [0x00];
+
+/*comment default */
+//comment default2
+    .type identifier,#function
+label:
+        sethi   %hi(IDENT), %g0 !test comment
+        wrpr %g1, %sp   ! test comment
+# comment
+#comment
+        sethi   %hi(IDENT_1 | IDENT_2), %l0
+""", [
+        ['SPECIAL', '#define'],
+        ['IDENTIFIER', 'F'],
+        ['IDENTIFIER', 'i'],
+        ['IDENTIFIER', 'type'],
+        ['IDENTIFIER', 'i'],
+        ['IDENTIFIER', 'function'],
+        ['IDENTIFIER', 'std'],
+        ['IDENTIFIER', 't1'],
+        ['COMMENT', '/*comment default */'],
+        ['COMMENT', '//comment default2\n'],
+        ['IDENTIFIER', 'type'],
+        ['IDENTIFIER', 'identifier'],
+        ['IDENTIFIER', 'function'],
+        ['IDENTIFIER', 'label'],
+        ['IDENTIFIER', 'sethi'],
+        ['IDENTIFIER', 'hi'],
+        ['IDENTIFIER', 'IDENT'],
+        ['IDENTIFIER', 'g0'],
+        ['COMMENT', '!test comment\n'],
+        ['IDENTIFIER', 'wrpr'],
+        ['IDENTIFIER', 'g1'],
+        ['IDENTIFIER', 'sp'],
+        ['COMMENT', '! test comment\n'],
+        ['COMMENT', '# comment\n'],
+        ['COMMENT', '#comment\n'],
+        ['IDENTIFIER', 'sethi'],
+        ['IDENTIFIER', 'hi'],
+        ['IDENTIFIER', 'IDENT_1'],
+        ['IDENTIFIER', 'IDENT_2'],
+        ['IDENTIFIER', 'l0'],
+    ], lexer_options={"arch": "sparc"})
+
+    def test_comments_arm32(self):
+        self.lex(r"""
+// comment default
+/* comment default2 */
+test:
+    bic	r0, r1, #10
+    # comment 1
+    #comment 1
+"""
++ "\t# comment 1" + r"""
+	moveq	r0, #IDENTIFIER @ Comment
+# comment 2
+#comment 2
+    push {r0}
+    add \addr, \addr, \tmp  @comment3
+    ldr r1, =TEST3
+	ldr TEST, [sp, IDENT(i)];
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4
+    stmfd	sp!, {r0, r1, r2, r3}
+    eor RT0, d, b;
+""", [
+        ['COMMENT', '// comment default\n'],
+        ['COMMENT', '/* comment default2 */'],
+        ['IDENTIFIER', 'test'],
+        ['IDENTIFIER', 'bic'],
+        ['IDENTIFIER', 'r0'],
+        ['IDENTIFIER', 'r1'],
+        ['NUMBER', '10'],
+        ['COMMENT', '# comment 1\n'],
+        ['COMMENT', '#comment 1\n'],
+        ['COMMENT', '# comment 1\n'],
+        ['IDENTIFIER', 'moveq'],
+        ['IDENTIFIER', 'r0'],
+        ['IDENTIFIER', 'IDENTIFIER'],
+        ['COMMENT', '@ Comment\n'],
+        ['COMMENT', '# comment 2\n'],
+        ['COMMENT', '#comment 2\n'],
+        ['IDENTIFIER', 'push'],
+        ['IDENTIFIER', 'r0'],
+        ['IDENTIFIER', 'add'],
+        ['IDENTIFIER', 'addr'],
+        ['IDENTIFIER', 'addr'],
+        ['IDENTIFIER', 'tmp'],
+        ['COMMENT', '@comment3\n'],
+        ['IDENTIFIER', 'ldr'],
+        ['IDENTIFIER', 'r1'],
+        ['IDENTIFIER', 'TEST3'],
+        ['IDENTIFIER', 'ldr'],
+        ['IDENTIFIER', 'TEST'],
+        ['IDENTIFIER', 'sp'],
+        ['IDENTIFIER', 'IDENT'],
+        ['IDENTIFIER', 'i'],
+        ['IDENTIFIER', 'long'],
+        ['IDENTIFIER', 'PMD_TYPE_SECT'],
+        ['IDENTIFIER', 'PMD_BIT4'],
+        ['IDENTIFIER', 'stmfd'],
+        ['IDENTIFIER', 'sp'],
+        ['IDENTIFIER', 'r0'],
+        ['IDENTIFIER', 'r1'],
+        ['IDENTIFIER', 'r2'],
+        ['IDENTIFIER', 'r3'],
+        ['IDENTIFIER', 'eor'],
+        ['IDENTIFIER', 'RT0'],
+        ['IDENTIFIER', 'd'],
+        ['IDENTIFIER', 'b'],
+    ], self.default_filtered_tokens + ("NUMBER",), {"arch": "arm32"})
+
+    def test_comments_generic(self):
+        self.lex(r"""
+/* comment
+ * more comment
+ * more comment
+ */
+    mov r0, r1  //test
+    mov x0, #IDENT
+    stp     x1, x2, [sp, #-4]!
+#if defined(IDENT1) || defined(IDENT2)
+#endif
+""", [
+        ['COMMENT', '/* comment\n * more comment\n * more comment\n */'],
+        ['IDENTIFIER', 'mov'],
+        ['IDENTIFIER', 'r0'],
+        ['PUNCTUATION', ','],
+        ['IDENTIFIER', 'r1'],
+        ['COMMENT', '//test\n'],
+        ['IDENTIFIER', 'mov'],
+        ['IDENTIFIER', 'x0'],
+        ['PUNCTUATION', ','],
+        ['PUNCTUATION', '#'],
+        ['IDENTIFIER', 'IDENT'],
+        ['IDENTIFIER', 'stp'],
+        ['IDENTIFIER', 'x1'],
+        ['PUNCTUATION', ','],
+        ['IDENTIFIER', 'x2'],
+        ['PUNCTUATION', ','],
+        ['PUNCTUATION', '['],
+        ['IDENTIFIER', 'sp'],
+        ['PUNCTUATION', ','],
+        ['PUNCTUATION', '#'],
+        ['PUNCTUATION', '-'],
+        ['NUMBER', '4'],
+        ['PUNCTUATION', ']'],
+        ['PUNCTUATION', '!'],
+        ['SPECIAL', '#if'],
+        ['IDENTIFIER', 'defined'],
+        ['PUNCTUATION', '('],
+        ['IDENTIFIER', 'IDENT1'],
+        ['PUNCTUATION', ')'],
+        ['PUNCTUATION', '||'],
+        ['IDENTIFIER', 'defined'],
+        ['PUNCTUATION', '('],
+        ['IDENTIFIER', 'IDENT2'],
+        ['PUNCTUATION', ')'],
+        ['SPECIAL', '#endif'],
+    ], self.default_filtered_tokens + ("PUNCTUATION", "NUMBER"))
+
+    def test_comments_preproc(self):
+        self.lex(r"""
+ # error "test"
+#warning "test"
+#include "test.h"
+#include <test.h>
+#if defined(T1) || defined(T2)
+#endif
+""", [
+        ['SPECIAL', '# error "test"\n'],
+        ['SPECIAL', '#warning "test"\n'],
+        ['SPECIAL', '#include "test.h"'],
+        ['SPECIAL', '#include <test.h>'],
+        ['SPECIAL', '#if'],
+        ['IDENTIFIER', 'defined'],
+        ['IDENTIFIER', 'T1'],
+        ['IDENTIFIER', 'defined'],
+        ['IDENTIFIER', 'T2'],
+        ['SPECIAL', '#endif'],
+    ])
+
+    def test_comments_literals(self):
+        self.lex(r"""
+.byte 12, 0b1010, 0B1010, 0x34, 0123, 0X45, 'a, '\b
+.ascii "asdsad\"zxczc"
+.float 0f-12321321030982394324\
+        21321432432.234324324E-14
+.float 0f-123.123213e+13
+.float 0e-123.123213e+13
+""", [
+        ['IDENTIFIER', 'byte'],
+        ['NUMBER', '12'],
+        ['NUMBER', '0b1010'],
+        ['NUMBER', '0B1010'],
+        ['NUMBER', '0x34'],
+        ['NUMBER', '0123'],
+        ['NUMBER', '0X45'],
+        ['STRING', "'a"],
+        ['STRING', "'\\b"],
+        ['IDENTIFIER', 'ascii'],
+        ['STRING', '"asdsad\\"zxczc"'],
+        ['IDENTIFIER', 'float'],
+        ['NUMBER', '0f-12321321030982394324\\\n        21321432432.234324324E-14'],
+        ['IDENTIFIER', 'float'],
+        ['NUMBER', '0f-123.123213e+13'],
+        ['IDENTIFIER', 'float'],
+        ['NUMBER', '0e-123.123213e+13'],
+    ], self.default_filtered_tokens + ("NUMBER",))
+
diff --git a/elixir/lexers/tests/test_kconfig.py b/elixir/lexers/tests/test_kconfig.py
new file mode 100644
index 00000000..e0adf379
--- /dev/null
+++ b/elixir/lexers/tests/test_kconfig.py
@@ -0,0 +1,372 @@
+from ..lexers import KconfigLexer
+from .base import LexerTest
+
+class KconfigLexerTest(LexerTest):
+    lexer_cls = KconfigLexer
+    default_filtered_tokens = ("SPECIAL", "COMMENT", "STRING", "IDENTIFIER", "SPECIAL", "ERROR")
+
+    # TODO improve macro calls
+
+    def test_comments(self):
+        self.lex(r"""
+# comment1
+config 64BIT # comment2
+    bool # comment3
+    default "# asd"
+    default $(shell, \#)
+    help
+        asdasdsajdlakjd # not a comment
+
+        asdasdsajdlakjd # not a comment
+
+        # comment 5
+
+    # comment 6
+""", [
+            ['COMMENT', '# comment1\n'],
+            ['SPECIAL', 'config'],
+            ['IDENTIFIER', '64BIT'],
+            ['COMMENT', '# comment2\n'],
+            ['SPECIAL', 'bool'],
+            ['COMMENT', '# comment3\n'],
+            ['SPECIAL', 'default'],
+            ['STRING', '"# asd"'],
+            ['SPECIAL', 'default'],
+            ['SPECIAL', 'shell'],
+            ['SPECIAL', '\\#)'],
+            ['SPECIAL', 'help'],
+            ['COMMENT', '        asdasdsajdlakjd # not a comment\n\n        asdasdsajdlakjd # not a comment\n\n        # comment 5\n\n'],
+            ['COMMENT', '# comment 6\n'],
+        ])
+
+
+    def test_keywords(self):
+        self.lex(r""",
+menu "menu name"
+
+visible if y
+
+choice
+    prompt "test prompt"
+    default y
+
+config 86CONIFG
+    bool "text"
+    prompt "prompt"
+    default y
+    tristate "test"
+    def_bool TEST_bool
+    depends on TEST
+    select TEST2
+    imply TEST3
+    range 5 512 if CONFIG_512
+    help
+        help text
+
+        more help text
+
+endmenu
+""", [
+        ['SPECIAL', 'menu'],
+        ['STRING', '"menu name"'],
+        ['SPECIAL', 'visible'],
+        ['SPECIAL', 'if'],
+        ['SPECIAL', 'y'],
+        ['SPECIAL', 'choice'],
+        ['SPECIAL', 'prompt'],
+        ['STRING', '"test prompt"'],
+        ['SPECIAL', 'default'],
+        ['SPECIAL', 'y'],
+        ['SPECIAL', 'config'],
+        ['IDENTIFIER', '86CONIFG'],
+        ['SPECIAL', 'bool'],
+        ['STRING', '"text"'],
+        ['SPECIAL', 'prompt'],
+        ['STRING', '"prompt"'],
+        ['SPECIAL', 'default'],
+        ['SPECIAL', 'y'],
+        ['SPECIAL', 'tristate'],
+        ['STRING', '"test"'],
+        ['SPECIAL', 'def_bool'],
+        ['IDENTIFIER', 'TEST_bool'],
+        ['SPECIAL', 'depends'],
+        ['SPECIAL', 'on'],
+        ['IDENTIFIER', 'TEST'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST2'],
+        ['SPECIAL', 'imply'],
+        ['IDENTIFIER', 'TEST3'],
+        ['SPECIAL', 'range'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'CONFIG_512'],
+        ['SPECIAL', 'help'],
+        ['COMMENT', '        help text\n\n        more help text\n\n'],
+        ['SPECIAL', 'endmenu'],
+    ])
+
+    def test_conditions(self):
+        self.lex(r"""
+config TEST
+    select TEST1 if TEST2 = TEST3
+    select TEST2 if TEST5 != TEST6
+    select TEST7 if TEST8 < TEST9
+    select TEST10 if TEST11 > TEST12
+    select TEST13 if TEST14 <=  TEST15
+""", [
+        ['SPECIAL', 'config'],
+        ['IDENTIFIER', 'TEST'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST1'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST2'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'TEST3'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST2'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST5'],
+        ['PUNCTUATION', '!'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'TEST6'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST7'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST8'],
+        ['PUNCTUATION', '<'],
+        ['IDENTIFIER', 'TEST9'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST10'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST11'],
+        ['PUNCTUATION', '>'],
+        ['IDENTIFIER', 'TEST12'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST13'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST14'],
+        ['PUNCTUATION', '<'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'TEST15'],
+    ], self.default_filtered_tokens + ("PUNCTUATION",))
+
+    def test_conditions2(self):
+        self.lex(r"""
+config TEST
+    select TEST16    if TEST17   >= TEST3
+    select TEST17 if (TEST18 = TEST19)
+
+    select TEST20 if !(TEST21 = TEST22)
+    select TEST23 if TEST24 && TEST25
+    select TEST26 if TEST27 || TEST28
+""", [
+        ['SPECIAL', 'config'],
+        ['IDENTIFIER', 'TEST'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST16'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST17'],
+        ['PUNCTUATION', '>'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'TEST3'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST17'],
+        ['SPECIAL', 'if'],
+        ['PUNCTUATION', '('],
+        ['IDENTIFIER', 'TEST18'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'TEST19'],
+        ['PUNCTUATION', ')'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST20'],
+        ['SPECIAL', 'if'],
+        ['PUNCTUATION', '!'],
+        ['PUNCTUATION', '('],
+        ['IDENTIFIER', 'TEST21'],
+        ['PUNCTUATION', '='],
+        ['IDENTIFIER', 'TEST22'],
+        ['PUNCTUATION', ')'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST23'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST24'],
+        ['PUNCTUATION', '&'],
+        ['PUNCTUATION', '&'],
+        ['IDENTIFIER', 'TEST25'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST26'],
+        ['SPECIAL', 'if'],
+        ['IDENTIFIER', 'TEST27'],
+        ['PUNCTUATION', '|'],
+        ['PUNCTUATION', '|'],
+        ['IDENTIFIER', 'TEST28'],
+    ], self.default_filtered_tokens + ("PUNCTUATION",))
+
+    def test_macros(self):
+        self.lex(r"""
+config TEST
+    depends on $(shell,cat file | grep -vi "option 2")
+    depends on $(info,info to print)
+    depends on $(warning-if,a != b,warning to print)
+""", [
+        ['SPECIAL', 'config'],
+        ['IDENTIFIER', 'TEST'],
+        ['SPECIAL', 'depends'],
+        ['SPECIAL', 'on'],
+        ['PUNCTUATION', '$'],
+        ['PUNCTUATION', '('],
+        ['SPECIAL', 'shell'],
+        ['PUNCTUATION', ','],
+        ['SPECIAL', 'cat'],
+        ['SPECIAL', 'file'],
+        ['PUNCTUATION', '|'],
+        ['SPECIAL', 'grep'],
+        ['PUNCTUATION', '-'],
+        ['SPECIAL', 'vi'],
+        ['STRING', '"option 2"'],
+        ['PUNCTUATION', ')'],
+        ['SPECIAL', 'depends'],
+        ['SPECIAL', 'on'],
+        ['PUNCTUATION', '$'],
+        ['PUNCTUATION', '('],
+        ['SPECIAL', 'info'],
+        ['PUNCTUATION', ','],
+        ['SPECIAL', 'info'],
+        ['SPECIAL', 'to'],
+        ['SPECIAL', 'print'],
+        ['PUNCTUATION', ')'],
+        ['SPECIAL', 'depends'],
+        ['SPECIAL', 'on'],
+        ['PUNCTUATION', '$'],
+        ['PUNCTUATION', '('],
+        ['SPECIAL', 'warning-if'],
+        ['PUNCTUATION', ','],
+        ['SPECIAL', 'a'],
+        ['PUNCTUATION', '!'],
+        ['PUNCTUATION', '='],
+        ['SPECIAL', 'b'],
+        ['PUNCTUATION', ','],
+        ['SPECIAL', 'warning'],
+        ['SPECIAL', 'to'],
+        ['SPECIAL', 'print'],
+        ['PUNCTUATION', ')'],
+    ], self.default_filtered_tokens + ("PUNCTUATION",))
+
+def test_macros2(self):
+    self.lex(r"""
+config TEST
+    depends on $(error-if,a != b,warning to print)
+    depends on $(filename)
+    depends on $(lineno)
+""", [
+        ['SPECIAL', 'config'],
+        ['IDENTIFIER', 'TEST'],
+        ['SPECIAL', 'depends'],
+        ['SPECIAL', 'on'],
+        ['PUNCTUATION', '$'],
+        ['PUNCTUATION', '('],
+        ['SPECIAL', 'error-if'],
+        ['PUNCTUATION', ','],
+        ['SPECIAL', 'a'],
+        ['PUNCTUATION', '!'],
+        ['PUNCTUATION', '='],
+        ['SPECIAL', 'b'],
+        ['PUNCTUATION', ','],
+        ['SPECIAL', 'warning'],
+        ['SPECIAL', 'to'],
+        ['SPECIAL', 'print'],
+        ['PUNCTUATION', ')'],
+        ['SPECIAL', 'depends'],
+        ['SPECIAL', 'on'],
+        ['PUNCTUATION', '$'],
+        ['PUNCTUATION', '('],
+        ['SPECIAL', 'filename'],
+        ['PUNCTUATION', ')'],
+        ['SPECIAL', 'depends'],
+        ['SPECIAL', 'on'],
+        ['PUNCTUATION', '$'],
+        ['PUNCTUATION', '('],
+        ['SPECIAL', 'lineno'],
+        ['PUNCTUATION', ')'],
+    ], self.default_filtered_tokens + ("PUNCTUATION",))
+
+    def test_help(self):
+        self.lex(r"""
+config
+    help
+     help test lasdlkajdk sadlksajd
+     lsajdlad
+
+     salkdjaldlksajd
+
+     "
+     asdlkajsdlkjsadlajdsk
+
+     salkdjlsakdj'
+config
+    select TEST
+config
+    ---help---
+     help test lasdlkajdk sadlksajd
+     lsajdlad
+
+     salkdjaldlksajd
+        
+config
+    select TEST
+""", [
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'help'],
+        ['COMMENT', '     help test lasdlkajdk sadlksajd\n     lsajdlad\n\n     salkdjaldlksajd\n\n     "\n     asdlkajsdlkjsadlajdsk\n\n     salkdjlsakdj\'\n'],
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST'],
+        ['SPECIAL', 'config'],
+        ['SPECIAL', '---help---'],
+        ['COMMENT', '     help test lasdlkajdk sadlksajd\n     lsajdlad\n\n     salkdjaldlksajd\n        \n'],
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'select'],
+        ['IDENTIFIER', 'TEST'],
+    ])
+
+    def test_types(self):
+        self.lex(r"""
+config
+    bool
+    default y
+
+config
+    tristate
+    default m
+
+config
+    hex
+	default 0xdfffffff00000000
+
+config
+    string
+    default "string \" test # \# zxc"
+
+config
+    int
+    default 21312323
+""", [
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'bool'],
+        ['SPECIAL', 'default'],
+        ['SPECIAL', 'y'],
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'tristate'],
+        ['SPECIAL', 'default'],
+        ['SPECIAL', 'm'],
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'hex'],
+        ['SPECIAL', 'default'],
+        ['IDENTIFIER', '0xdfffffff00000000'],
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'string'],
+        ['SPECIAL', 'default'],
+        ['STRING', '"string \\" test # \\# zxc"'],
+        ['SPECIAL', 'config'],
+        ['SPECIAL', 'int'],
+        ['SPECIAL', 'default'],
+    ])
diff --git a/elixir/lexers/utils.py b/elixir/lexers/utils.py
new file mode 100644
index 00000000..7b991dd8
--- /dev/null
+++ b/elixir/lexers/utils.py
@@ -0,0 +1,210 @@
+import re
+import enum
+from collections import namedtuple
+
+# Supported token types
+class TokenType(enum.Enum):
+    WHITESPACE = 'whitespace',
+    COMMENT = 'comment'
+    STRING = 'string'
+    NUMBER = 'number'
+    IDENTIFIER = 'identifier'
+    # may require extra parsing or context information
+    SPECIAL = 'special'
+    PUNCTUATION = 'punctuation'
+    # lexing failure - should be logged, at least until update jobs are preemptible
+    ERROR = 'error'
+
+Token = namedtuple('Token', 'token_type, token, span, line')
+
+def match_regex(regex):
+    rule = re.compile(regex, flags=re.MULTILINE)
+    return lambda code, pos, _: rule.match(code, pos)
+
+def match_token(ctx, pattern, token_type):
+    match = re.compile(pattern).match(ctx.code, ctx.pos)
+    if match is None:
+        return None, ctx
+    else:
+        span = match.span()
+        result = Token(token_type, ctx.code[span[0]:span[1]], span, ctx.line)
+        ctx.pos = span[1] 
+        ctx.line += result.token.count('\n')
+        return result, ctx
+
+def split_by_groups(*token_types):
+    def split(ctx, match):
+        pos = ctx.pos
+        line = ctx.line
+        for gi in range(len(match.groups())):
+            token = match.group(gi+1)
+            if len(token) != 0:
+                action = token_types[gi]
+                yield Token(action, token, (pos, pos+len(token)), line)
+                line += token.count("\n")
+                pos += len(token)
+
+    return split
+
+def token_from_match(ctx, match, token_type):
+    span = match.span()
+    result = Token(token_type, ctx.code[span[0]:span[1]], span, ctx.line)
+    ctx.pos = span[1]
+    ctx.line = ctx.line+result.token.count('\n')
+    return result, ctx
+
+def token_from_string(ctx, match, token_type):
+    span = (ctx.pos, ctx.pos+len(match))
+    result = Token(token_type, ctx.code[span[0]:span[1]], span, ctx.line)
+    ctx.pos = span[1]
+    ctx.line = ctx.line+result.token.count('\n')
+    return result, ctx
+
+# Interface class that allows to match only if certian conditions,
+# hard to express in regex, are true
+class Matcher:
+    def update_after_match(self, code: str, pos: int, line: int, token: Token) -> None:
+        pass
+
+    def match(self, code: str, pos: int, line: int) -> None | re.Match:
+        pass
+
+# Match token only if it's the first token in line (skipping whitespace)
+class FirstInLine(Matcher):
+    whitespace = re.compile(r'\s*')
+
+    def __init__(self, regex):
+        self.rule = re.compile(regex, flags=re.MULTILINE)
+        self.first_in_line = True
+
+    def update_after_match(self, code, pos, line, token):
+        # first token is always first in line
+        if pos == 0:
+            self.first_in_line = True
+            return
+
+        # check if matched token contains a newline
+        newline_pos = code.rfind('\n', token.span[0], token.span[1])
+
+        # if it doesn't contain a newline, check the part after newline
+        if newline_pos != -1:
+            post_newline_tok = code[newline_pos+1:token.span[1]]
+
+            # if part after newline contains only whitespace (or nothing), the next token is first in line
+            if self.whitespace.fullmatch(post_newline_tok):
+                self.first_in_line = True
+        # if currently matched is the first in line, and only contains whitespace,
+        # the next token also counts as first in line
+        elif self.first_in_line and self.whitespace.fullmatch(code, token.span[0], token.span[1]):
+            self.first_in_line = True
+        # otherwise reset first in line marker
+        else:
+            self.first_in_line = False
+
+    def match(self, code, pos, line):
+        if self.first_in_line:
+            return self.rule.match(code, pos)
+
+class LexerContext:
+    def __init__(self, code, pos, line, filter_tokens):
+        self.code = code
+        self.pos = pos
+        self.line = line
+        self.filter_tokens = filter_tokens
+
+def simple_lexer(rules, code, filter_tokens=None):
+    if len(code) == 0:
+        return
+
+    # to avoid dealing with files without trailing newlines
+    if code[-1] != '\n':
+        code += '\n'
+
+    rules_compiled = []
+    after_match_hooks = []
+
+    # compile rules
+    for rule, action in rules:
+        # string rules are actually match regex rules
+        if type(rule) is str:
+            rules_compiled.append((match_regex(rule), action))
+        # rules can also be callables
+        elif callable(rule):
+            rules_compiled.append((rule, action))
+        # rules can also be matchers - matchers get more information during parsing,
+        # that information can stored in their state
+        elif isinstance(rule, Matcher):
+            rules_compiled.append((rule.match, action))
+            after_match_hooks.append(rule.update_after_match)
+
+    # helper function that calls hooks before yielding
+    def yield_token(to_yield):
+        for hook in after_match_hooks:
+            hook(code, pos, line, to_yield)
+        return to_yield
+
+    pos = 0
+    line = 1
+    while pos < len(code):
+        rule_matched = False
+        for rule, action in rules_compiled:
+            match = rule(code, pos, line)
+
+            if match is not None:
+                span = match.span()
+                # if match is empty - continue
+                if span[0] == span[1]:
+                    continue
+
+                rule_matched = True
+
+                if isinstance(action, TokenType):
+                    # only parse tokens of interest - slices apparently copy
+                    if filter_tokens is None or action in filter_tokens:
+                        token = code[span[0]:span[1]]
+                    else:
+                        token = None
+
+                    token_obj = Token(action, token, span, line)
+                    yield yield_token(token_obj)
+                    line += code.count('\n', span[0], span[1])
+                    pos = span[1]
+                    break
+                elif callable(action):
+                    last_token = None
+                    for token in action(LexerContext(code, pos, line, filter_tokens), match):
+                        last_token = token
+                        yield yield_token(token)
+
+                    if last_token is not None:
+                        pos = last_token.span[1]
+                        line = last_token.line + last_token.token.count('\n')
+
+                    break
+                else:
+                    raise Exception(f"invalid action {action}")
+
+        # if no rules match, an error token with a single character is produced.
+        # this isn't always a big problem, hence it's the decision of the caller
+        # to decide whether to quit or continue
+        if not rule_matched:
+            token = Token(TokenType.ERROR, code[pos], (pos, pos+1), line)
+            yield yield_token(token)
+            if code[pos] == '\n':
+                line += 1
+            pos += 1
+
+# Combines regexes passed as arguments with pipe operator
+def regex_or(*regexes):
+    result = '('
+    for r in regexes:
+        result += f'({ r })|'
+    return result[:-1] + ')'
+
+# Concatenates regexes, putting each in a separate group
+def regex_concat(*regexes):
+    result = ''
+    for r in regexes:
+        result += f'({ r })'
+    return result
+
diff --git a/elixir/project_utils.py b/elixir/project_utils.py
new file mode 100644
index 00000000..242a62c1
--- /dev/null
+++ b/elixir/project_utils.py
@@ -0,0 +1,47 @@
+import re
+from typing import List
+
+from .filters.utils import Filter, FilterContext
+from .filters import default_filters
+from .projects import projects
+from .lexers import default_lexers
+
+# Returns a list of applicable filters for project_name under provided filter context
+def get_filters(ctx: FilterContext, project_name: str) -> List[Filter]:
+    project_config = projects.get(project_name)
+    if project_config is None or 'filters' not in project_config:
+        filter_classes = default_filters
+    else:
+        filter_classes = project_config['filters']
+
+    filters = []
+
+    for filter_cls in filter_classes:
+        if type(filter_cls) == tuple and len(filter_cls) == 2:
+            cls, kwargs = filter_cls
+            filters.append(cls(**kwargs))
+        elif type(filter_cls) == type:
+            filters.append(filter_cls())
+        else:
+            raise ValueError(f"Invalid filter: {filter_cls}, " \
+                    "should be either a two element tuple or a type. " \
+                    "Make sure project_filters in project.py is valid.")
+
+    return [f for f in filters if f.check_if_applies(ctx)]
+
+def get_lexer(path: str, project_name: str):
+    project_config = projects.get(project_name)
+    if project_config is None or 'lexers' not in project_config:
+        lexers = default_lexers
+    else:
+        lexers = project_config['lexers']
+
+    path = path.lower()
+    for regex, lexer in lexers.items():
+        if re.match(regex, path):
+            if type(lexer) == tuple:
+                lexer_cls, kwargs = lexer
+                return lambda code: lexer_cls(code, **kwargs)
+            else:
+                return lambda code: lexer(code)
+
diff --git a/elixir/projects.py b/elixir/projects.py
new file mode 100644
index 00000000..53d4065e
--- /dev/null
+++ b/elixir/projects.py
@@ -0,0 +1,126 @@
+from .filters import *
+from collections import OrderedDict
+from .filters import *
+from .lexers import *
+
+# Dictionary of custom per-projects settings.
+# filters:
+# Projects not present in this dictionary only use default_filters.
+# Use `*` to unpack filter lists defined above,
+# you can pass additional options to filters by putting a Filter
+# class and a dictionary with options in a tuple, like this:
+# (FilterCls, {"option": True}).
+# Check filter files and utils.py for information about available options
+projects = {
+    'amazon-freertos': {
+        'filters': [
+            *default_filters,
+            MakefileSubdirFilter,
+        ],
+    },
+    'arm-trusted-firmware': {
+        'filters': [
+            *default_filters,
+            CppPathIncFilter,
+        ],
+    },
+    'barebox': {
+        'filters': [
+            *default_filters,
+            DtsiFilter,
+            *common_kconfig_filters,
+            CppPathIncFilter,
+            *common_makefile_filters,
+        ],
+    },
+    'coreboot': {
+        'filters': [
+            *default_filters,
+            DtsiFilter,
+            *common_kconfig_filters,
+            *common_makefile_filters,
+        ],
+    },
+    'linux': {
+        'filters': [
+            *default_filters,
+            DtsiFilter,
+            *common_kconfig_filters,
+            *common_makefile_filters,
+            # include/uapi contains includes to user headers under #ifndef __KERNEL__
+            # Our solution is to ignore all includes in such paths
+            (CppPathIncFilter, {"path_exceptions": {'^/include/uapi/.*'}}),
+        ],
+        'lexers': OrderedDict({
+            r'.*\.(c|h|cpp|hpp|c++|cxx|cc)': CLexer,
+            r'makefile\..*':  MakefileLexer,
+            r'.*\.dts(i)?': DTSLexer,
+            r'kconfig.*': KconfigLexer, #TODO negative lookahead for .rst
+
+            r'/arch/alpha/.*\.s': (GasLexer, {"arch": "alpha"}),
+            r'/arch/arc/.*\.s': (GasLexer, {"arch": "arc"}),
+            r'/arch/arm/.*\.s': (GasLexer, {"arch": "arm32"}),
+            r'/arch/csky/.*\.s': (GasLexer, {"arch": "csky"}),
+            r'/arch/m68k/.*\.s': (GasLexer, {"arch": "m68k"}),
+            r'/arch/microblaze/.*\.s': (GasLexer, {"arch": "microblaze"}),
+            r'/arch/mips/.*\.s': (GasLexer, {"arch": "mips"}),
+            r'/arch/openrisc/.*\.s': (GasLexer, {"arch": "openrisc"}),
+            r'/arch/parisc/.*\.s': (GasLexer, {"arch": "parisc"}),
+            r'/arch/s390/.*\.s': (GasLexer, {"arch": "s390"}),
+            r'/arch/sh/.*\.s': (GasLexer, {"arch": "sh"}),
+            r'/arch/sparc/.*\.s': (GasLexer, {"arch": "sparc"}),
+            r'/arch/um/.*\.s': (GasLexer, {"arch": "x86"}),
+            r'/arch/x86/.*\.s': (GasLexer, {"arch": "x86"}),
+            r'/arch/xtensa/.*\.s': (GasLexer, {"arch": "xtensa"}),
+            r'.*\.s': GasLexer,
+        }),
+    },
+    'qemu': {
+        'filters': [
+            *default_filters,
+            *common_kconfig_filters,
+        ],
+    },
+    'u-boot': {
+        'filters': [
+            *default_filters,
+            DtsiFilter,
+            *common_kconfig_filters,
+            CppPathIncFilter,
+            *common_makefile_filters,
+        ],
+        'lexers': OrderedDict({
+            r'.*\.(c|h|cpp|hpp|c++|cxx|cc)': CLexer,
+            r'makefile\..*':  MakefileLexer,
+            r'.*\.dts(i)?': DTSLexer,
+            r'kconfig.*': KconfigLexer, #TODO negative lookahead for .rst
+
+            r'/arch/arc/.*\.s': (GasLexer, {"arch": "arc"}),
+            r'/arch/arm/.*\.s': (GasLexer, {"arch": "arm32"}),
+            r'/arch/m68k/.*\.s': (GasLexer, {"arch": "m68k"}),
+            r'/arch/microblaze/.*\.s': (GasLexer, {"arch": "microblaze"}),
+            r'/arch/mips/.*\.s': (GasLexer, {"arch": "mips"}),
+            r'/arch/riscv/.*\.s': (GasLexer, {"arch": "riscv"}),
+            r'/arch/sh/.*\.s': (GasLexer, {"arch": "sh"}),
+            r'/arch/x86/.*\.s': (GasLexer, {"arch": "x86"}),
+            r'/arch/sandbox/.*\.s': (GasLexer, {"arch": "x86"}),
+            r'/arch/xtensa/.*\.s': (GasLexer, {"arch": "xtensa"}),
+            r'.*\.s': GasLexer,
+        }),
+    },
+    'uclibc-ng': {
+        'filters': [
+            *default_filters,
+            ConfigInFilter,
+        ],
+    },
+    'zephyr': {
+        'filters': [
+            *default_filters,
+            DtsiFilter,
+            *common_kconfig_filters,
+            CppPathIncFilter,
+        ],
+    },
+}
+
diff --git a/elixir/query.py b/elixir/query.py
index ff14d4b1..5476dc6d 100755
--- a/elixir/query.py
+++ b/elixir/query.py
@@ -21,7 +21,8 @@
 from .lib import script, scriptLines, decode
 from . import lib
 from . import data
-import os
+from .lexers import TokenType
+import os, sys
 from collections import OrderedDict
 from urllib import parse
 
@@ -172,29 +173,38 @@ def query(self, cmd, *args):
 
             version = args[0]
             path = args[1]
+            lexer = args[2]
 
             filename = os.path.basename(path)
             family = lib.getFileFamily(filename)
 
-            if family != None:
+            if family is not None and lexer is not None:
                 buffer = BytesIO()
-                tokens = self.scriptLines('tokenize-file', version, path, family)
-                even = True
+                code = self.get_file_raw(version, path)
 
                 prefix = b''
                 if family == 'K':
                     prefix = b'CONFIG_'
 
-                for tok in tokens:
-                    even = not even
-                    tok2 = prefix + tok
-                    if (even and self.db.defs.exists(tok2) and
-                        (lib.compatibleFamily(self.db.defs.get(tok2).get_families(), family) or
-                        lib.compatibleMacro(self.db.defs.get(tok2).get_macros(), family))):
-                        tok = b'\033[31m' + tok2 + b'\033[0m'
-                    else:
-                        tok = lib.unescape(tok)
-                    buffer.write(tok)
+                for token_type, token, _, line in lexer(code).lex():
+                    token = token.encode()
+
+                    if token_type == TokenType.ERROR:
+                        print("error token: ", token, token_type, filename, line, file=sys.stderr)
+                    elif token_type == TokenType.IDENTIFIER:
+                        token_with_prefix = prefix + token
+                        token_in_db = self.db.defs.exists(token_with_prefix)
+                        if token_in_db:
+                            compatible = \
+                                lib.compatibleFamily(self.db.defs.get(token_with_prefix).get_families(), family) or \
+                                lib.compatibleMacro(self.db.defs.get(token_with_prefix).get_macros(), family)
+
+                            if compatible:
+                                buffer.write(b'\033[31m' + token_with_prefix + b'\033[0m')
+                                continue
+
+                    buffer.write(token)
+
                 return decode(buffer.getvalue())
             else:
                 return decode(self.script('get-file', version, path))
diff --git a/elixir/web.py b/elixir/web.py
index 2a0cbbbb..d25745b0 100755
--- a/elixir/web.py
+++ b/elixir/web.py
@@ -33,7 +33,7 @@
 
 from .lib import validFamily
 from .query import Query, SymbolInstance
-from .filters import get_filters
+from .project_utils import get_filters, get_lexer
 from .filters.utils import FilterContext
 from .autocomplete import AutocompleteResource
 from .api import ApiIdentGetterResource
@@ -485,7 +485,8 @@ def format_code(filename, code):
 # version: requested version of the project
 # path: path to the file in the repository
 def generate_source(q, project, version, path):
-    code = q.query('file', version, path)
+    lexer = get_lexer(path, project)
+    code = q.query('file', version, path, lexer)
 
     _, fname = os.path.split(path)
     _, extension = os.path.splitext(fname)
diff --git a/update.py b/update.py
index 79cb4dcf..3d14e8ce 100755
--- a/update.py
+++ b/update.py
@@ -22,13 +22,16 @@
 # Throughout, an "idx" is the sequential number associated with a blob.
 # This is different from that blob's Git hash.
 
+import sys
 from sys import argv
 from threading import Thread, Lock, Event, Condition
 
+from elixir.lexers import TokenType
 import elixir.lib as lib
 from elixir.lib import script, scriptLines
 import elixir.data as data
 from elixir.data import PathList
+from elixir.project_utils import get_lexer
 from find_compatible_dts import FindCompatibleDTS
 
 verbose = False
@@ -56,6 +59,7 @@
 bindings_idxes = [] # DT bindings documentation files
 idx_key_mod = 1000000
 defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key.
+file_paths = {}
 
 tags_done = False # True if all tags have been added to new_idxes
 
@@ -163,7 +167,7 @@ def run(self):
         progress('vers: Thread finished', index)
 
     def update_versions(self, tag):
-        global blobs_lock
+        global blobs_lock, file_paths
 
         # Get blob hashes and associated file paths
         blobs = scriptLines('list-blobs', '-p', tag)
@@ -174,12 +178,14 @@ def update_versions(self, tag):
             with blobs_lock:
                 idx = db.blob.get(hash)
             buf.append((idx, path))
+            file_paths[idx] = path
 
         buf = sorted(buf)
         obj = PathList()
         for idx, path in buf:
             obj.append(idx, path)
 
+
             # Store DT bindings documentation files to parse them later
             if path[:33] == b'Documentation/devicetree/bindings':
                 bindings_idxes.append(idx)
@@ -275,6 +281,7 @@ def run(self):
 
             new_idxes[self.index][1].wait() # Make sure the tag is ready
             new_idxes[self.index][2].wait() # Make sure UpdateDefs processed the tag
+            new_idxes[self.index][4].wait() # Tell that UpdateVersions processed the tag
 
             with tags_refs_lock:
                 tags_refs[0] += 1
@@ -288,45 +295,53 @@ def run(self):
             progress('refs: Thread ' + str(tags_refs[1]) + '/' + str(self.inc) + ' finished', tags_refs[0])
 
     def update_references(self, idxes):
-        global hash_file_lock, defs_lock, refs_lock, tags_refs
+        global hash_file_lock, defs_lock, refs_lock, tags_refs, file_paths
 
         for idx in idxes:
             if idx % 1000 == 0: progress('refs: ' + str(idx), tags_refs[0])
 
             with hash_file_lock:
                 hash = db.hash.get(idx)
-                filename = db.file.get(idx)
+                filename = file_paths[idx].decode()
 
             family = lib.getFileFamily(filename)
             if family == None: continue
 
+            lexer = get_lexer(filename, project)
+            if lexer is None:
+                continue
+
+            try:
+                code = script('get-blob', hash).decode()
+            except UnicodeDecodeError:
+                code = script('get-blob', hash).decode('raw_unicode_escape')
+
             prefix = b''
             # Kconfig values are saved as CONFIG_<value>
             if family == 'K':
                 prefix = b'CONFIG_'
 
-            tokens = scriptLines('tokenize-file', '-b', hash, family)
-            even = True
-            line_num = 1
             idents = {}
             with defs_lock:
-                for tok in tokens:
-                    even = not even
-                    if even:
-                        tok = prefix + tok
-
-                        if (db.defs.exists(tok) and
-                            not ( (idx*idx_key_mod + line_num) in defs_idxes and
-                                defs_idxes[idx*idx_key_mod + line_num] == tok ) and
-                            (family != 'M' or tok.startswith(b'CONFIG_'))):
-                            # We only index CONFIG_??? in makefiles
-                            if tok in idents:
-                                idents[tok] += ',' + str(line_num)
-                            else:
-                                idents[tok] = str(line_num)
+                for token_type, token, _, line in lexer(code).lex():
+                    if token_type == TokenType.ERROR:
+                        print("error token: ", token, token_type, filename, line, file=sys.stderr)
+                        continue
 
-                    else:
-                        line_num += tok.count(b'\1')
+                    token = prefix + token.encode()
+
+                    if token_type != TokenType.IDENTIFIER:
+                        continue
+
+                    if (db.defs.exists(token) and
+                        not ( (idx*idx_key_mod + line) in defs_idxes and
+                            defs_idxes[idx*idx_key_mod + line] == token ) and
+                        (family != 'M' or token.startswith(b'CONFIG_'))):
+                        # We only index CONFIG_??? in makefiles
+                        if token in idents:
+                            idents[token] += ',' + str(line)
+                        else:
+                            idents[token] = str(line)
 
             with refs_lock:
                 for ident, lines in idents.items():
@@ -579,6 +594,7 @@ def progress(msg, current):
 for tag in scriptLines('list-tags'):
     if not db.vers.exists(tag):
         tag_buf.append(tag)
+        break
 
 num_tags = len(tag_buf)
 project = lib.currentProject()