-- Packrat parser for Pappy grammar definition files
module ReadGrammar (pappyParse, pappyParseFile) where

import Char
import List

import PappyPos
import PappyParse
import Pappy


-------------------- Top-level tie-up --------------------

data PappyDerivs = PappyDerivs {

		-- Grammar definition structure
		pdGrammar	:: Result PappyDerivs Grammar,
		pdNonterminal	:: Result PappyDerivs Nonterminal,

		-- Grammar rule expressions
		pdAltRule	:: Result PappyDerivs Rule,
		pdSeqRule	:: Result PappyDerivs Rule,
		pdSeqMatch	:: Result PappyDerivs Match,
		pdUnaryRule	:: Result PappyDerivs Rule,
		pdPrimRule	:: Result PappyDerivs Rule,

		-- Lexical structure
		pdIdentifier	:: Result PappyDerivs String,
		pdWord		:: Result PappyDerivs String,
		pdSymbol	:: Result PappyDerivs String,
		pdRawCode	:: Result PappyDerivs String,
		pdCodeBlock	:: Result PappyDerivs String,
		pdCodeChars	:: Result PappyDerivs String,
		pdCodeSQChars	:: Result PappyDerivs String,
		pdCodeDQChars	:: Result PappyDerivs String,
		pdCharLit	:: Result PappyDerivs Char,
		pdStringLit	:: Result PappyDerivs String,
		pdWhitespace	:: Result PappyDerivs String,

		-- Input text
		pdChar		:: Result PappyDerivs Char,
		pdPos		:: Pos
	}

instance Derivs PappyDerivs where
	dvPos d = pdPos d
	dvChar d = pdChar d

grammar		= Parser pdGrammar
nonterminal	= Parser pdNonterminal
altRule		= Parser pdAltRule
seqRule		= Parser pdSeqRule
seqMatch	= Parser pdSeqMatch
unaryRule	= Parser pdUnaryRule
primRule	= Parser pdPrimRule
identifier	= Parser pdIdentifier
word		= Parser pdWord
symbol		= Parser pdSymbol
rawCode		= Parser pdRawCode
codeBlock	= Parser pdCodeBlock
codeChars	= Parser pdCodeChars
codeSQChars	= Parser pdCodeSQChars
codeDQChars	= Parser pdCodeDQChars
charLit		= Parser pdCharLit
stringLit	= Parser pdStringLit
whitespace	= Parser pdWhitespace

pappyParse :: String -> String -> Either Grammar String
pappyParse name text =
	let	initpos = Pos name 1 1
		d = pappyDerivs initpos text
	in case pdGrammar d of
		Parsed g _ _ -> Left g
		NoParse e -> Right (show e)

pappyParseFile :: String -> IO Grammar
pappyParseFile filename =
	do	text <- readFile filename
		let	initpos = Pos filename 1 1
			d = pappyDerivs initpos text
		case pdGrammar d of
			Parsed g _ _ -> return g
			NoParse e -> fail (show e)

pappyDerivs :: Pos -> String -> PappyDerivs
pappyDerivs pos text = d
	where	d = PappyDerivs
			(pGrammar d)
			(pNonterminal d)
			(pAltRule d)
			(pSeqRule d)
			(pSeqMatch d)
			(pUnaryRule d)
			(pPrimRule d)
			(pIdentifier d)
			(pWord d)
			(pSymbol d)
			(pRawCode d)
			(pCodeBlock d)
			(pCodeChars d)
			(pCodeSQChars d)
			(pCodeDQChars d)
			(pCharLit d)
			(pStringLit d)
			(pWhitespace d)
			chr pos
		chr = case text of
			[] -> NoParse (eofError d)
			(c:cs) -> Parsed c (pappyDerivs (nextPos pos c) cs)
					 (nullError d)

-------------------- Grammar Definition Structure --------------------



mids = do
    sym "("
    xs <- sepBy (identifier </> string "..") (sym ",")
    sym ")"
    return . concat $ ["("] ++ intersperse "," xs ++ [")"]

mexports = do
    sym "("
    xs <- sepBy (do x <- identifier; y <- mids </> return ""; return $ x ++ y) (sym ",")
    sym ")"
    return (Just xs)

pGrammar :: PappyDerivs -> Result PappyDerivs Grammar
Parser pGrammar =
	do	whitespace
		keyword "parser"; name <- identifier; exps <- (mexports </> return Nothing); sym ":"
                lp1 <- getLinePragma
		c1 <- rawCode </> return []
		imports <- (keyword "import" >>  sepBy1 stringLit (sym ",")) </> return []
                token <- fmap Just (keyword "token" >> identifier) </> return Nothing
		keyword "top"; tops <- sepBy1 identifier (sym ",")
		ns <- many1 nonterminal
                lp2 <- getLinePragma
		c2 <- rawCode </> return []
		let code = lp1 ++ c1 ++ "\n" ++ lp2 ++ c2 ++ "\n"
		notFollowedBy anyChar
		return Grammar {
                    grammarName = name,
                    grammarToken = token,
                    grammarImports = imports,
                    grammarRawCode = code,
                    grammarExports = exps,
                    grammarTops = tops,
                    grammarOptions = [],
                    grammarNonterminals = ns
                    }

pNonterminal :: PappyDerivs -> Result PappyDerivs Nonterminal
Parser pNonterminal =
	do	n <- identifier
		sym "::"
		t <- sym "()" </> identifier </> rawCode
		sym "="
		r <- altRule
                let r' | t == "()" = discardRule r
                       | otherwise = r
		return (n, t, r')

discardRule r = f r where
    f r = case r of
        RuleSeq ms _ -> RuleSeq ms (ProdCode "()")
        RuleAlt [a] -> f a
        RuleAlt as -> RuleAlt (map f as)
        r -> RuleSeq [MatchAnon r] (ProdCode "()")



-------------------- Grammar Rule Expressions --------------------

pAltRule :: PappyDerivs -> Result PappyDerivs Rule
Parser pAltRule =
	do alts <- sepBy1 seqRule (sym "/")
	   case alts of
		[a] -> return a
		_ -> return (RuleAlt alts)

pSeqRule :: PappyDerivs -> Result PappyDerivs Rule
Parser pSeqRule =
	    (do ms <- many seqMatch
		sym "->"
		id <- identifier
		return (RuleSeq ms (ProdName id)))
	</> (do ms <- many seqMatch
		sym "->"
		c <- sym "()" </> rawCode
		return (RuleSeq ms (ProdCode c)))
	</> unaryRule


pSeqMatch :: PappyDerivs -> Result PappyDerivs Match
Parser pSeqMatch =
	    (do i <- identifier
		sym ":"
		r <- unaryRule
		return (MatchName r i))
	</> (do p <- rawCode
		sym ":"
		r <- unaryRule
		return (MatchPat r p))
	</> (do c <- charLit
		sym ":"
		r <- unaryRule
		return (MatchString r [c]))
	</> (do s <- stringLit
		sym ":"
		r <- unaryRule
		return (MatchString r s))
	</> (do sym "&"
		r <- unaryRule
		return (MatchAnd r))
	</> (do sym "!"
		r <- unaryRule
		return (MatchNot r))
	</> (do sym "&"
		p <- rawCode
		return (MatchPred p))
	</> (do r <- unaryRule
		return (MatchAnon r))

pUnaryRule :: PappyDerivs -> Result PappyDerivs Rule
Parser pUnaryRule =
	    (do	r <- primRule
		sym "?"
		return (RuleOpt r))
	</> (do	r <- primRule
		sym "+"
		return (RulePlus r))
	</> (do	r <- primRule
		sym "*"
		return (RuleStar r))
	</> primRule

pPrimRule :: PappyDerivs -> Result PappyDerivs Rule
Parser pPrimRule =
            (do "Pos" <- identifier
                return RulePos)
	</> (do n <- identifier
		return (RulePrim n))
	</> (do c <- charLit
		return (RuleChar c))
	</> (do s <- stringLit
		return (RuleString s))
	</> (do sym "("
		r <- altRule
		sym ")"
		return r)
	</> (do sym "("
		as <- sepBy1 (many seqMatch >>= \ms -> return $ RuleSeq ms (ProdCode "()")) (sym "/")
		sym ")"
		return $ discardRule (RuleAlt as))

-------------------- Lexical Structure --------------------


-- Keywords and identifiers

keyword :: String -> Parser PappyDerivs String
keyword w =
	(do	s <- word
		if s == w then return w else fail "")
	<?!> show w

pIdentifier :: PappyDerivs -> Result PappyDerivs String
Parser pIdentifier =
	(do	s <- word
		if s `elem` keywords then fail "" else return s)
	<?!> "identifier"

pWord :: PappyDerivs -> Result PappyDerivs String
Parser pWord =
	do	c <- satisfy anyChar isIdentStart
		cs <- many (satisfy anyChar isIdentCont)
		whitespace
		return (c : cs)

isIdentStart c = isAlpha c || c == '_'
isIdentCont c = isIdentStart c || isDigit c || c == '\''

keywords = ["parser", "top", "import"]


-- Symbols: operators, parentheses, etc.

sym :: String -> Parser PappyDerivs String
sym s = (do	s' <- symbol
		if s' == s then return s else fail "")
	    <?!> show s

pSymbol :: PappyDerivs -> Result PappyDerivs String
Parser pSymbol =
	do	s <- stringFrom symbols
		whitespace
		return s

symbols = [
	"->",
	"::",
	"()",
	":",
	"=",
	",",
	"(",
	")",
	"/",
	"+",
	"*",
	"?",
	"&",
	"!"]

getLinePragma = do
    Pos { posFile = fn, posLine = l } <- getPos
    return $ "\n{-# LINE " ++ show l ++ " " ++ show fn ++  " #-}"

-- Raw code block, ignoring following whitespace
pRawCode :: PappyDerivs -> Result PappyDerivs String
Parser pRawCode =
	do	c <- codeBlock
		whitespace
                return c

-- Raw code block, consuming only the braces and contents
pCodeBlock :: PappyDerivs -> Result PappyDerivs String
Parser pCodeBlock =
	do	char '{'
		c <- codeChars
		char '}'
		return c

-- Characters in a code block
pCodeChars :: PappyDerivs -> Result PappyDerivs String
Parser pCodeChars =
	    (do	b <- codeBlock		-- nested code blocks
		cs <- codeChars
		return ("{" ++ b ++ "}" ++ cs))
	</> (do char '\''		-- character literals
		lit <- codeSQChars
		char '\''
		cs <- codeChars
		return ("\'" ++ lit ++ "\'" ++ cs))
	</> (do char '\"'		-- string literals
		lit <- codeDQChars
		char '\"'
		cs <- codeChars
		return ("\"" ++ lit ++ "\"" ++ cs))
	</> (do ic <- satisfy anyChar isIdentStart	-- identifiers
		ics <- many (satisfy anyChar isIdentCont)
		cs <- codeChars
		return (ic : ics ++ cs))
	</> (do s <- lineComment	-- comments
		cs <- codeChars
		return (s ++ cs))
	</> (do c <- noneOf "{}\"\'"	-- other characters
		cs <- codeChars
		return (c : cs))
	</> return []

-- Characters in a single-quoted character literal in a code block
pCodeSQChars :: PappyDerivs -> Result PappyDerivs String
Parser pCodeSQChars =
	    (do char '\\'
		c <- anyChar
		cs <- codeSQChars
		return ('\\' : c : cs))
	</> (do c <- noneOf "\'\\\r\n"
		cs <- codeSQChars
		return (c : cs))
	</> return []

-- Characters in a double-quoted string literal in a code block
pCodeDQChars :: PappyDerivs -> Result PappyDerivs String
Parser pCodeDQChars =
	    (do char '\\'
		c <- anyChar
		cs <- codeDQChars
		return ('\\' : c : cs))
	</> (do c <- noneOf "\"\\\r\n"
		cs <- codeDQChars
		return (c : cs))
	</> return []

-- Character and string literals

quotedChar quote =
	(do char '\\'
	    c <- anyChar
	    case c of
		'n' -> return '\n'
		'r' -> return '\r'
		't' -> return '\t'
		'v' -> return '\v'
		'f' -> return '\f'
		'\\' -> return '\\'
		'\'' -> return '\''
		'\"' -> return '\"'
		-- XXX octal characters, other escapes
		_ -> fail "invalid escape sequence")
	</> satisfy anyChar (\c -> c /= quote)

pCharLit :: PappyDerivs -> Result PappyDerivs Char
Parser pCharLit = (do char '\''
		      c <- quotedChar '\''
		      char '\''
		      whitespace
		      return c)

pStringLit :: PappyDerivs -> Result PappyDerivs String
Parser pStringLit = (do char '"'
		        s <- many (quotedChar '"')
		        char '"'
			whitespace
		        return s)


-- Whitespace

pWhitespace :: PappyDerivs -> Result PappyDerivs String
Parser pWhitespace =
	do	ss <- many (many1 spaceChar </> lineComment)
		return (concat ss)

spaceChar :: Parser PappyDerivs Char
spaceChar = satisfy anyChar isSpace

--flowComment :: Parser PappyDerivs ()
--flowComment =
--	do	string "-*"
--		many (do notFollowedBy (string "*/"); anyChar)
--		string "*-"
--		return ()

lineComment :: Parser PappyDerivs String
lineComment =
	do	s1 <- string "--"
		s2 <- many (do notFollowedBy lineTerminator; anyChar)
		s3 <- lineTerminator
		return (s1 ++ s2 ++ s3)

lineTerminator :: Parser PappyDerivs String
lineTerminator =
	    (do string "\r\n")
	</> string "\r"
	</> string "\n"


