From b4d0525f64538f7e1af2f7657ff2a19fb81f4d5f Mon Sep 17 00:00:00 2001 From: Jed Barber Date: Sun, 4 May 2014 04:40:21 +1000 Subject: Started writing more complete DOT language parser --- src/Grasp/Parser.hs | 128 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 100 insertions(+), 28 deletions(-) diff --git a/src/Grasp/Parser.hs b/src/Grasp/Parser.hs index 129e08e..7b4f685 100644 --- a/src/Grasp/Parser.hs +++ b/src/Grasp/Parser.hs @@ -14,6 +14,7 @@ import Data.Graph.Inductive.Graph as Graph import Data.Graph.Inductive.Tree import Data.List import Data.Maybe +import Data.Char import qualified Data.Map as Map import Grasp.Types @@ -116,70 +117,141 @@ dup x = grasp = do string "digraph {" - whiteSpace - (n,e) <- stmtList ([],[]) + whiteSpac' + (n,e) <- stmtLis' ([],[]) string "}" - eol + eo' eof return (n,e) -stmtList (n,e) = - try (node >>= (\x -> stmtList (x:n,e)) ) - <|> try (edge >>= (\x -> stmtList (n,x:e)) ) +stmtLis' (n,e) = + try (nod' >>= (\x -> stmtLis' (x:n,e)) ) + <|> try (edg' >>= (\x -> stmtLis' (n,x:e)) ) <|> return (reverse n, reverse e) -node = do - i <- ident - l <- labelAttrib - whiteSpace +nod' = do + i <- iden' + l <- labelAttri' + whiteSpac' return (i,l) -edge = do - a <- ident - directedEdge - b <- ident - l <- labelAttrib - whiteSpace +edg' = do + a <- iden' + directedEdg' + b <- iden' + l <- labelAttri' + whiteSpac' return (a,b,l) -ident = do +iden' = do d <- some (noneOf " \t\r\n") - inLineWhSp + inLineWhS' return d -labelAttrib = do +labelAttri' = do char '[' - inLineWhSp + inLineWhS' string "label=\"" - l <- labelID + l <- labelI' char '\"' - inLineWhSp + inLineWhS' string "];" return l -labelID = some (noneOf "\"\r\n\\" <|> escapedChar) +labelI' = some (noneOf "\"\r\n\\" <|> escapedCha') -escapedChar = try (string "\\\"" >> return '\"') +escapedCha' = try (string "\\\"" >> return '\"') <|> try (string "\\\\" >> return '\\') -directedEdge = string "->" >> inLineWhSp +directedEdg' = string "->" >> inLineWhS' -inLineWhSp = many (oneOf "\t ") -whiteSpace = many (oneOf "\n\r\t ") +inLineWhS' = many (oneOf "\t ") +whiteSpac' = many (oneOf "\n\r\t ") -eol = try (string "\r\n") +eo' = try (string "\r\n") <|> try (string "\n\r") <|> try (string "\n") <|> try (string "\r") "end of line" + + + +-- work in progress more complete DOT language parser below this point + +graspDOT = do + optional strict + graphType + ident + openBrace + (n,e) <- stmtList ([],[]) + closeBrace + eof + return (n,e) + + +strict = caseInsensitiveString "strict" + + +graphType = try (caseInsensitiveString "digraph") "digraph" + + +ident = (try alphaNumString) + <|> (try numeral) + <|> (try quotedString) + "ID" + + +stmtList (n,e) = + try (node >>= (\x -> stmtList (x:n,e)) ) + <|> try (edge >>= (\x -> stmtList (n,x:e)) ) + <|> try (attr >> stmtList (n,e)) + <|> try (subgraph >>= (\(x,y) -> stmtList (x ++ n, y ++ e)) ) + <|> return (reverse n, reverse e) + + +-- todo +alphaNumString = return "a" + + +-- todo +numeral = return "0" + + +-- todo +quotedString = return "\"" + + +-- todo +node = return ("1","a") + + +-- todo +edge = return ("1","2","b") + + +-- todo +attr = return "" + + +-- todo +subgraph = return ([],[]) + + +openBrace = char '{' +closeBrace = char '}' + + +caseInsensitiveChar c = char (toLower c) <|> char (toUpper c) +caseInsensitiveString s = mapM caseInsensitiveChar s + -- cgit