1
votes

First of all: I am fully aware of this post: Cannot infer appropriate lifetime for autoref in Iterator impl and that the problem is probably similar to mine. However, I can't get it working with the knowledge of this thread.

The code:

use std::str::Chars;

use super::token::*;
use super::token_stream::TokenStream;

pub struct Lexer<'a> {
    input: Chars<'a>,
    buffer: String,
    cur_char: char
}

impl<'a> Lexer<'a> {
    pub fn new(iterator: Chars<'a>) -> Lexer {
        let mut lexer = Lexer {
            input: iterator,
            buffer: String::new(),
            cur_char: '\0' };
        lexer.consume_next();
        lexer
    }

    pub fn new_from_str(content : &str) -> Lexer {
        Lexer::new(content.chars())
    }

    fn consume_next(&mut self) -> char {
        let next = self.input.next();
        if let Some(c) = next {
            self.buffer.push(c);
            self.cur_char = c;
        }
        else {
            self.cur_char = '\0';
        }
        self.current_char()
    }

    fn clear_buffer(&mut self) {
        self.buffer.clear();
    }

    fn current_char(&self) -> char {
        self.cur_char
    }

    fn scan_line_comment(&self) -> Token { Token::EndOfFile }
    fn scan_multi_line_comment(&self) -> Token { Token::EndOfFile }


    fn scan_identifier(&self) -> Token { Token::EndOfFile }
    fn scan_char_literal(&self) -> Token { Token::EndOfFile }
    fn scan_string_literal(&self) -> Token { Token::EndOfFile }
    fn scan_number_literal(&self) -> Token { Token::EndOfFile }

    fn consume_and_return<'b>(&mut self, token: Token<'b>) -> Token<'b> {
        self.consume_next();
        token
    }
}

impl<'a> TokenStream for Lexer<'a> {
    fn next_token(&mut self) -> Token {
        match self.current_char() {
            /* Skip whitespace */
            ' '  |
            '\r' |
            '\n' |
            '\t' => {
                self.clear_buffer();
                self.consume_and_return(Token::Whitespace)
            }

            /* Opening delimiters */
            '(' => self.consume_and_return(Token::OpenDelim(DelimitToken::Paren)),
            '[' => self.consume_and_return(Token::OpenDelim(DelimitToken::Bracket)),
            '{' => self.consume_and_return(Token::OpenDelim(DelimitToken::Brace)),

            /* Opening delimiters */
            ')' => self.consume_and_return(Token::CloseDelim(DelimitToken::Paren)),
            ']' => self.consume_and_return(Token::CloseDelim(DelimitToken::Bracket)),
            '}' => self.consume_and_return(Token::CloseDelim(DelimitToken::Brace)),

            /* Special tokens which aren't the beginning
               of any other token */
            '?' => self.consume_and_return(Token::Question),
            ';' => self.consume_and_return(Token::SemiColon),
            ',' => self.consume_and_return(Token::Comma),

            /* Dot, DotDot and DotDotDot tokens */
            '.' => match self.consume_next() {
                '.' => match self.consume_next() {
                    '.' => self.consume_and_return(Token::DotDotDot),
                    _   => Token::DotDot
                },
                _ => Token::Dot
            },

            /* Tokens starting with '+' */
            '+' => match self.consume_next() {
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Plus)),
                _   => Token::BinOp(BinOpToken::Plus)
            },

            /* Tokens starting with '-' */
            '-' => match self.consume_next() {
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Minus)),
                '>' => self.consume_and_return(Token::Arrow),
                _   => Token::BinOp(BinOpToken::Minus)
            },

            /* Tokens starting with '*' */
            '*' => match self.consume_next() {
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Star)),
                _   => return Token::BinOp(BinOpToken::Star)
            },

            /* Tokens starting with '/' */
            '/' => match self.consume_next() {
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Slash)),
                '/' => self.scan_line_comment(),
                '*' => self.scan_multi_line_comment(),
                _ => Token::BinOp(BinOpToken::Slash)
            },

            /* Tokens starting with '%' */
            '%' => match self.consume_next() {
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Percent)),
                _   => Token::BinOp(BinOpToken::Percent)
            },

            /* Tokens starting with '^' */
            '^' => match self.consume_next() {
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Caret)),
                _   => return Token::BinOp(BinOpToken::Caret)
            },

            /* Tokens starting with '!' */
            '!' => match self.consume_next() {
                '=' => self.consume_and_return(Token::RelOp(RelOpToken::NotEq)),
                _   => Token::Exclamation
            },

            /* Tokens starting with '=' */
            '=' => match self.consume_next() {
                '=' => self.consume_and_return(Token::RelOp(RelOpToken::EqEq)),
                _   => Token::Eq
            },

            /* Tokens starting with '&' */
            '&' => match self.consume_next() {
                '&' => self.consume_and_return(Token::LogicalOp(LogicalOpToken::AndAnd)),
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::And)),
                _   => Token::BinOp(BinOpToken::And)
            },

            /* Tokens starting with '|' */
            '|' => match self.consume_next() {
                '|' => self.consume_and_return(Token::LogicalOp(LogicalOpToken::OrOr)),
                '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Or)),
                _   => Token::BinOp(BinOpToken::Or)
            },

            /* Tokens starting with '<' */
            '<' => match self.consume_next() {
                '<' => match self.consume_next() {
                    '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Shl)),
                    _   => Token::BinOp(BinOpToken::Shl)
                },
                '=' => self.consume_and_return(Token::RelOp(RelOpToken::LessEq)),
                _   => Token::RelOp(RelOpToken::LessThan)
            },

            /* Tokens starting with '>' */
            '>' => match self.consume_next() {
                '>' => match self.consume_next() {
                    '=' => self.consume_and_return(Token::BinOpEq(BinOpToken::Shr)),
                    _   => Token::BinOp(BinOpToken::Shr)
                },
                '=' => self.consume_and_return(Token::RelOp(RelOpToken::GreaterEq)),
                _   => Token::RelOp(RelOpToken::GreaterThan)
            },

            /* Char and string literals */
            '\'' => self.scan_char_literal(),
            '\"' => self.scan_string_literal(),

            /* Integer- and float literals and identifiers */
            '0' ... '9' => self.scan_number_literal(),
            'a' ... 'z' |
            'A' ... 'Z' => self.scan_identifier(),

            /* When end of iterator has been reached */
            _ => Token::EndOfFile
        }
    }
}

impl<'a> Iterator for Lexer<'a> {
    type Item = Token<'a>;

    fn next(&mut self) -> Option<Self::Item> {
        let token = self.next_token();
        match token {
            Token::EndOfFile => None,
            _                => Some(token)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use super::super::token::*;
    use super::super::token_stream::TokenStream;

    #[test]
    fn simple_tokens() {
        let solution = [
            Token::OpenDelim(DelimitToken::Paren),
            Token::CloseDelim(DelimitToken::Paren),

            Token::OpenDelim(DelimitToken::Bracket),
            Token::CloseDelim(DelimitToken::Bracket),

            Token::OpenDelim(DelimitToken::Brace),
            Token::CloseDelim(DelimitToken::Brace),

            Token::Question,
            Token::SemiColon,
            Token::Comma,

            Token::EndOfFile
        ];
        let mut lexer = Lexer::new_from_str("()[]{}?;,");
        for expected in &solution {
            assert_eq!(lexer.next_token(), *expected);
        }
    }
}

Playground

And its dependent module 'Token':

#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
pub enum BinOpToken {
    Plus,    // +
    Minus,   // -
    Star,    // *
    Slash,   // /
    Percent, // %
    Caret,   // ^
    And,     // &
    Or,      // |
    Shl,     // <<
    Shr      // >>
}

#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
pub enum RelOpToken {
    EqEq,        // ==
    NotEq,       // !=
    LessThan,    // <
    LessEq,      // <=
    GreaterThan, // >
    GreaterEq    // >=
}

#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
pub enum LogicalOpToken {
    AndAnd, // &&
    OrOr    // ||
}

#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
pub enum DelimitToken {
    Paren,   // ( or )
    Bracket, // [ or ]
    Brace,   // { or }
}

#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
pub enum LiteralToken<'a> {
    Char(&'a str),    // e.g. 'a'
    Integer(&'a str), // e.g. 5, 42, 1337, 0
    Float(&'a str),   // e.g. 0.1, 5.0, 13.37, 0.0
    String(&'a str)   // e.g. "Hello, World!"
}

#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
pub enum Token<'a> {
    /* Logical operators, e.g. && or || */
    LogicalOp(LogicalOpToken),
    /* Binary operators compatible with assignment, e.g. +, - */
    BinOp(BinOpToken),
    /* Binary assignment operators, e.g. +=, -= */
    BinOpEq(BinOpToken),
    /* Relational operators, e.g. <, <=, >, >=, ==, != */
    RelOp(RelOpToken),

    /* An opening delimiter, e.g. { or ( or [ */
    OpenDelim(DelimitToken),

    /* A closing delimiter, e.g. } or ) or ] */
    CloseDelim(DelimitToken),

    /* Identifiers with their given name */
    Identifier(&'a str),
    /* Literal token, e.g. an integer, float or string literal */
    Literal(LiteralToken<'a>),

    /* Special tokens */
    Eq,          // =
    Colon,       // :
    SemiColon,   // ;
    ColonColon,  // ::
    Dot,         // .
    DotDot,      // ..
    DotDotDot,   // ...
    Comma,       // ,
    Exclamation, // !
    Question,    // ?
    Arrow,       // ->
    FatArrow,    // =>

    /* Junk tokens which the parser doesn't require in order to parse the program. */
    Whitespace,
    Comment,

    /* End of file (EOF) token indicating the end of stream for parsing */
    EndOfFile
}

Playground

As well as the trait 'TokenStream':

pub use super::token::Token;

pub trait TokenStream {
    fn next_token(&mut self) -> Token;
}

I am getting the following error:

src/parser/lexer.rs:202:20: 202:32 error: cannot infer an appropriate lifetime for autoref due to conflicting requirements [E0495]
src/parser/lexer.rs:202         let token = self.next_token();
                                                 ^~~~~~~~~~~~

I guess that it is a lifetime problem. My next_token() method returns a Token that has a lifetime independent of Self, however I am not sure if I did the annotation right.

I also tried to do some more annotation for the next() method in Iterator but it all failed ...

I get this error when I add a lifetime to the &mut self parameter of the next() method in the implementation of the Iterator trait:

src/parser/lexer.rs:201:2: 207:3 error: method `next` has an incompatible type for trait:
 expected bound lifetime parameter ,
    found concrete lifetime [E0053]
1
Your code is really, really large. Please take some time to produce an MCVE, emphasis on the M. Delete all methods that don't directly contribute to the error, replace function bodies with unimplemented!, remove parameters, struct members, enum variants. Combine all your code into one file. Ideally, produce an example that reproduces your code on the Playground. Taking the time to produce a clear example will help you understand where the problem occurs and makes it more likely that you will get a helpful answer.Shepmaster
Thank you for the reformatting! I have found a solution to my problem with the help of a friend and I am going to update the initial post with the solution. ;) However, sometime - as in this case - the errors are everywhere in the code, even in methods where you wouldn't expect them.robbepop
Done! ;) Hope, everything is okay with that answer.robbepop

1 Answers

3
votes

I found a solution to my problems and now everything compiles fine.

The problem was in fact a lifetime problem but not only within the TokenStream trait. I had lifetime issues in several places across the entire code.

Some notable places from the long code in the initial post:

lexer.rs: line 46 - 58

fn scan_line_comment<'b>(&self) -> Token<'b> { Token::EndOfFile }
fn scan_multi_line_comment<'b>(&self) -> Token<'b> { Token::EndOfFile }


fn scan_identifier<'b>(&self) -> Token<'b> { Token::EndOfFile }
fn scan_char_literal<'b>(&self) -> Token<'b> { Token::EndOfFile }
fn scan_string_literal<'b>(&self) -> Token<'b> { Token::EndOfFile }
fn scan_number_literal<'b>(&self) -> Token<'b> { Token::EndOfFile }

fn consume_and_return<'b>(&mut self, token: Token<'b>) -> Token<'b> {
    self.consume_next();
    token
}

I had to insert the lifetime 'b to specify that the Token may outlive the Lexer instance.

The TokenStream required a new lifetime parameter so that it can specify that extended lifetime as well:

pub trait TokenStream<'a> {
    fn next_token(&mut self) -> Token<'a>;
}

The TokenStream implementation for Lexer had to be adjusted for this change:

impl<'a, 'b> TokenStream<'b> for Lexer<'a> {
    fn next_token(&mut self) -> Token<'b> {
        ...
    }
    ...
}

As well as the Iterator implementation for Lexer

impl<'a> Iterator for Lexer<'a> {
    type Item = Token<'a>;

    fn next(&mut self) -> Option<Self::Item> {
        let token = self.next_token();
        match token {
            Token::EndOfFile => None,
            _                => Some(token)
        }
    }
}

That's it!