commit 48af85e4a64b93da4271af54d10f3c2f326bd111 Author: lurchi Date: Thu Aug 16 22:26:53 2018 +0200 initial commit diff --git a/parse.nim b/parse.nim new file mode 100644 index 0000000..6f47d1a --- /dev/null +++ b/parse.nim @@ -0,0 +1,285 @@ +import strutils, unittest + +type + ParseState = enum + RoutingModifierName, + RoutingModifierValue, + ContentLength, + EntityModifierName, + EntityModifierValue, + Method, + Data, + Complete + + Modifier = object + op: char + name: string + value: string + + PsycPacket = object + routingHeader*: seq[Modifier] + entityHeader*: seq[Modifier] + entityHeaderLen*: int + contentLength*: int + methodName*: string + data*: string + state*: ParseState + cursor: int + remainingPartLen: int + + Validator = proc(c: char): bool + +proc validateTrue(c: char): bool = true + +proc getUntil(input: string, delimiters: openArray[char], packet: var PsycPacket, + validate: Validator = validateTrue): tuple[complete: bool, + value: string] = + assert(packet.cursor < input.len()) + result.complete = false + for i in packet.cursor .. input.high(): + if input[i] in delimiters: + result.complete = true + if i > packet.cursor: + result.value = input[packet.cursor .. i - 1] + packet.cursor = i + break + if not validate(input[i]): + raise new(ValueError) + +proc parseKeyword(input: string, + packet: var PsycPacket): tuple[complete: bool, value: string] = + assert(packet.cursor < input.len()) + proc validate(c: char): bool = c == '_' or c.isAlphaNumeric() + result = input.getUntil([' ', '\t'], packet, validate) + if result.complete: + packet.cursor.inc() + if result.value.isNil() or result.value.len() == 0 or result.value[0] != '_': + raise new(ValueError) # name does not start with '_' + +proc parseModifierValueLength(input: string, + packet: var PsycPacket): tuple[complete: bool, + value: int] = + let (complete, value) = input.getUntil(['\t'], packet) + if not complete: + return (false, -1) + packet.cursor.inc() + if value.len() == 0: + raise new(ValueError) # no value length + result = (true, value.parseInt()) # may throw ValueError if invalid number + if result.value < 0: + raise new(ValueError) # negative value length + +proc parseModifierName(input: string, + packet: var PsycPacket): tuple[complete: bool, + op: char, + name: string] = + assert(packet.cursor < input.len()) + const operators = ['=', ':', '+', '-', '?'] + if packet.cursor == input.high(): + result.complete = false + return + if input[packet.cursor] == '\n': + packet.cursor.inc() + return (true, '\0', nil) + result.op = input[packet.cursor] + if not (result.op in operators): + raise new(ValueError) # invalid operator + packet.cursor.inc() + if result.op == '?': + if input[packet.cursor] != '\n': + raise new(ValueError) # '?' must be on a line by itself + result.complete = true + return + (result.complete, result.name) = parseKeyword(input, packet) + if not result.complete: + return + # the following separator must either be ' ' (binary-arg) or '\t' (simple-arg) + if input[packet.cursor - 1] == ' ': + (result.complete, packet.remainingPartLen) = parseModifierValueLength(input, + packet) + elif input[packet.cursor - 1] != '\t': + raise new(ValueError) # invalid separator + +proc parseMethod(input: string, packet: var PsycPacket): tuple[complete: bool, + value: string] = + assert(packet.cursor < input.len()) + if input[packet.cursor] == '|': + return (true, nil) + return parseKeyword(input, packet) + +proc parseModifierSimpleValue(input: string, + packet: var PsycPacket): tuple[complete: bool, + value: string] = + assert(packet.cursor < input.len()) + result = input.getUntil(['\n'], packet) + if result.complete: + packet.cursor.inc() + +proc parseModifierValue(input: string, + packet: var PsycPacket): tuple[complete: bool, + value: string] = + assert(packet.cursor < input.len()) + if packet.remainingPartLen < 0: + return parseModifierSimpleValue(input, packet) + result.complete = packet.remainingPartLen < input.high() - packet.cursor + if result.complete: + result.value = input[packet.cursor .. packet.cursor + packet.remainingPartLen - 1] + if input[packet.cursor + packet.remainingPartLen] != '\n': + raise new(ValueError) # missing '\n' after binary-arg + packet.cursor += packet.remainingPartLen + 1 + packet.remainingPartLen = -1 + +proc parseContentLength(input: string, + packet: var PsycPacket): tuple[complete: bool, value: int] = + assert(packet.cursor < input.len()) + let (complete, value) = input.getUntil(['\n'], packet) + result.complete = complete + if result.complete: + packet.cursor.inc() + if value.len() == 0: + result.value = -1 + else: + result.value = value.parseInt() # parseInt may throw ValueError if invalid number + if result.value < 0: + raise new(ValueError) # negative content length + +proc parseData(input: string, + packet: var PsycPacket): tuple[complete: bool, value: string] = + assert(packet.cursor < input.len()) + if packet.remainingPartLen < 0: + if packet.contentLength < 0: + if input.continuesWith("|\n", packet.cursor): + return (true, "") + let findResult = input.find("\n|\n") + if findResult < 0: + return (false, input[packet.cursor .. input.high()]) + return (true, input[packet.cursor .. findResult - 1]) + packet.remainingPartLen = packet.contentLength - + packet.entityHeaderLen - + packet.methodName.len() - + sizeof('\n') + result.complete = packet.remainingPartLen <= input.high() - packet.cursor + if result.complete: + result.value = input[packet.cursor .. packet.cursor + packet.remainingPartLen - 1] + packet.remainingPartLen = -1 + +proc newPacket*(): PsycPacket = + PsycPacket(routingHeader: newSeq[Modifier](), + entityHeader: newSeq[Modifier](), + state: ParseState.RoutingModifierName, + remainingPartLen: -1) + +proc consumePart*(packet: var PsycPacket, slice: Slice[int]): string = + assert(packet.remainingPartLen >= slice.len()) + case packet.state + of ParseState.RoutingModifierValue: + if packet.routingHeader.len() > 0: swap(result, packet.routingHeader[^1].value) + of ParseState.EntityModifierValue: + if packet.entityHeader.len() > 0: swap(result, packet.entityHeader[^1].value) + of ParseState.Data: + swap(result, packet.data) + else: + return + packet.remainingPartLen -= result.len() + +proc parse*(input: string, + packet: var PsycPacket): tuple[needMoreInput: bool, + unparsed: Slice[int]] = + #proc addedPart(packet: var PsycPacket, slice: Slice[int]) = + # packet.remainingPartLen -= slice.len() + # packet.cursor += slice.len() + result.needMoreInput = true + case packet.state: + of ParseState.RoutingModifierName: + let (complete, op, name) = parseModifierName(input, packet) + if complete: + result.needMoreInput = false + packet.state = ParseState.RoutingModifierValue + let modifier = Modifier(op: op, name: name) + packet.routingHeader.add(modifier) + of ParseState.RoutingModifierValue: + let (complete, value) = parseModifierSimpleValue(input, packet) + result.needMoreInput = not complete + if complete and value.len() > 0: + packet.state = ParseState.ContentLength + if packet.routingHeader[^1].value.isNil(): + packet.routingHeader[^1].value = "" + packet.routingHeader[^1].value.add(value) + #packet.addedPart(slice) + of ParseState.ContentLength: + let (complete, value) = parseContentLength(input, packet) + if complete: + result.needMoreInput = false + packet.state = ParseState.EntityModifierName + packet.contentLength = value + of ParseState.EntityModifierName: + let oldCursor = packet.cursor + let (complete, op, name) = parseModifierName(input, packet) + packet.entityHeaderLen += packet.cursor - oldCursor + if complete: + result.needMoreInput = false + packet.state = ParseState.EntityModifierValue + let modifier = Modifier(op: op, name: name) + packet.entityHeader.add(modifier) + of ParseState.EntityModifierValue: + let oldCursor = packet.cursor + let (complete, value) = parseModifierValue(input, packet) + result.needMoreInput = not complete + packet.entityHeaderLen += (packet.cursor - oldCursor) + if complete: + packet.state = ParseState.Method + if not value.isNil(): + if packet.entityHeader[^1].value.isNil(): + packet.entityHeader[^1].value = "" + packet.entityHeader[^1].value.add(value) + #packet.addedPart(slice) + of ParseState.Method: + let (complete, value) = parseMethod(input, packet) + if complete: + result.needMoreInput = false + packet.state = ParseState.Data + packet.methodName = value + packet.data = "" + of ParseState.Data: + let (complete, value) = parseData(input, packet) + result.needMoreInput = not complete + if complete: + packet.state = ParseState.Complete + packet.data.add(value) + #packet.addedPart(slice) + of ParseState.Complete: + assert(false) + result.unparsed = packet.cursor .. input.high() + if result.needMoreInput: + packet.cursor = 0 + +suite "parser tests": + setup: + var packet = newPacket() + + test "state sync": + let input = ":_target\tpsyc://ve.symlynx.com/@blog\n\n?\n|\n" + while packet.state != ParseState.Complete: + discard parse(input, packet) + check(packet.routingHeader.len() == 1) + check(packet.routingHeader[0] == Modifier(op: ':', + name: "_target", + value: "psyc://ve.symlynx.com/@blog")) + check(packet.entityHeader.len() == 1) + check(packet.entityHeader[0] == Modifier(op: '?', name: nil, value: nil)) + + test "simple-arg": + let input = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_test\thello\n|\n" + while packet.state != ParseState.Complete: + discard parse(input, packet) + check(packet.entityHeader.len() == 1) + check(packet.entityHeader[0] == Modifier(op: ':', name: "_test", value: "hello")) + + test "binary-arg": + let input = ":_target\tpsyc://ve.symlynx.com/@blog\n\n:_test 5\thello\n|\n" + while packet.state != ParseState.Complete: + discard parse(input, packet) + check(packet.entityHeader.len() == 1) + check(packet.entityHeader[0] == Modifier(op: ':', name: "_test", value: "hello")) + + echo "parser tests completed"