nimpsyc/parse.nim

494 lines
18 KiB
Nim

import strutils, unittest
type
ParseState = enum
RoutingModifierName,
RoutingModifierValue,
ContentLength,
EntityModifierName,
EntityModifierValue,
Method,
Data,
Complete
Modifier = object
op: char
name: string
value: string
PsycPacket = object
routingHeader*: seq[Modifier]
entityHeader*: seq[Modifier]
entityHeaderLen*: int
contentLength*: int
methodName*: string
data*: string
state*: ParseState
cursor: int
remainingPartLen: int
PsycSyntaxError = object of Exception
InvalidModifierName = object of PsycSyntaxError
InvalidModifierValue = object of PsycSyntaxError
InvalidContentLength = object of PsycSyntaxError
Validator = proc(c: char): bool
proc validateTrue(c: char): bool = true
proc getUntil(input: string, delimiters: openArray[char], packet: var PsycPacket,
validate: Validator = validateTrue): tuple[complete: bool,
value: string] =
assert(packet.cursor < input.len())
result.complete = false
for i in packet.cursor .. input.high():
if input[i] in delimiters:
result.complete = true
if i > packet.cursor:
result.value = input[packet.cursor .. i - 1]
else:
result.value = ""
packet.cursor = i
break
if not validate(input[i]):
raise newException(InvalidModifierName, "invalid character in " & $packet.state)
proc parseKeyword(input: string,
packet: var PsycPacket): tuple[complete: bool, value: string] =
assert(packet.cursor < input.len())
proc validate(c: char): bool = c == '_' or c.isAlphaNumeric()
result = input.getUntil([' ', '\t', '\n'], packet, validate)
if result.complete:
packet.cursor.inc()
if result.value.len() == 0 or result.value[0] != '_':
raise newException(InvalidModifierName,
$packet.state & " does not start with '_'")
proc parseModifierValueLength(input: string,
packet: var PsycPacket): tuple[complete: bool,
value: int] =
let (complete, value) = input.getUntil(['\t'], packet)
if not complete:
return (false, -1)
packet.cursor.inc()
if value.len() == 0:
raise newException(InvalidModifierValue, "missing length of binary-arg")
try:
result = (true, value.parseInt())
except ValueError:
raise newException(InvalidModifierValue, "invalid length of binary-arg")
if result.value < 0:
raise newException(InvalidModifierValue, "invalid length of binary-arg")
proc parseModifierName(input: string,
packet: var PsycPacket): tuple[complete: bool,
op: char,
name: string] =
assert(packet.cursor < input.len())
if packet.cursor == input.high():
result.complete = false
return
result.op = input[packet.cursor]
packet.cursor.inc()
if result.op == '?':
if input[packet.cursor] != '\n':
raise newException(InvalidModifierName,
"state sync modifier '?' must not have a name")
result.complete = true
return
(result.complete, result.name) = parseKeyword(input, packet)
if not result.complete:
packet.cursor.dec()
return
# the following separator must either be ' ' (binary-arg) or '\t' (simple-arg)
if input[packet.cursor - 1] == ' ':
if packet.state != ParseState.EntityModifierName:
raise newException(InvalidModifierValue,
"binary-arg not allowed in routing modifier")
(result.complete, packet.remainingPartLen) = parseModifierValueLength(input,
packet)
elif input[packet.cursor - 1] != '\t':
raise newException(InvalidModifierValue, "modifier has invalid separator")
proc parseMethod(input: string, packet: var PsycPacket): tuple[complete: bool,
value: string] =
assert(packet.cursor < input.len())
if input[packet.cursor] == '|':
return (true, nil)
return parseKeyword(input, packet)
proc parseModifierSimpleValue(input: string,
packet: var PsycPacket): tuple[complete: bool,
value: string] =
assert(packet.cursor < input.len())
result = input.getUntil(['\n'], packet)
if result.complete:
packet.cursor.inc()
else:
result.value = input[packet.cursor .. input.high()]
packet.cursor += result.value.len()
proc parseModifierValue(input: string,
packet: var PsycPacket): tuple[complete: bool,
value: string] =
assert(packet.cursor < input.len())
if packet.remainingPartLen < 0:
return parseModifierSimpleValue(input, packet)
result.complete = packet.remainingPartLen <= input.high() + "\n".len() - packet.cursor
if result.complete:
result.value = input[packet.cursor .. packet.cursor + packet.remainingPartLen - 1]
if input[packet.cursor + packet.remainingPartLen] != '\n':
raise newException(InvalidModifierValue,
$packet.state & " is missing '\\n' after binary-arg")
packet.cursor += result.value.len() + 1
packet.remainingPartLen = -1
else:
result.value = input[packet.cursor .. input.high()]
packet.cursor += result.value.len()
packet.remainingPartLen -= result.value.len()
proc parseContentLength(input: string,
packet: var PsycPacket): tuple[complete: bool, value: int] =
assert(packet.cursor < input.len())
let (complete, value) = input.getUntil(['\n'], packet)
result.complete = complete
if result.complete:
packet.cursor.inc()
if value.len() == 0:
result.value = -1
else:
try:
result.value = value.parseInt()
except ValueError:
raise newException(InvalidContentLength,
$packet.state & " is not a positive integer")
proc parseData(input: string,
packet: var PsycPacket): tuple[complete: bool, value: string] =
assert(packet.cursor < input.len())
if packet.contentLength < 0:
if input.continuesWith("|\n", packet.cursor):
packet.cursor += 2
return (true, "")
let findResult = input.find("\n|\n")
if findResult < 0:
result.complete = false
result.value = input[packet.cursor .. input.high()]
packet.cursor = input.high() + 1
return
else:
result.complete = true
result.value = input[packet.cursor .. findResult - 1]
packet.cursor = findResult + 3
return
elif packet.remainingPartLen < 0:
packet.remainingPartLen = packet.contentLength -
packet.entityHeaderLen -
packet.methodName.len() -
sizeof('\n')
result.complete =
packet.remainingPartLen <= input.high() + "\n|\n".len() - packet.cursor
if result.complete:
result.value = input[packet.cursor .. packet.cursor + packet.remainingPartLen - 1]
packet.cursor += packet.remainingPartLen + "\n|\n".len()
packet.remainingPartLen = -1
else:
result.value = input[packet.cursor .. input.high()]
packet.cursor += result.value.len()
packet.remainingPartLen -= result.value.len()
proc isModifierName(input: string, cursor: int): bool =
const operators = ['=', ':', '+', '-', '?']
input[cursor] in operators
proc newPacket*(): PsycPacket =
PsycPacket(routingHeader: newSeq[Modifier](),
entityHeader: newSeq[Modifier](),
state: ParseState.RoutingModifierName,
contentLength: -1,
remainingPartLen: -1)
proc consumePart*(packet: var PsycPacket, slice: Slice[int]): string =
assert(packet.remainingPartLen >= slice.len())
case packet.state
of ParseState.RoutingModifierValue:
if packet.routingHeader.len() > 0: swap(result, packet.routingHeader[^1].value)
of ParseState.EntityModifierValue:
if packet.entityHeader.len() > 0: swap(result, packet.entityHeader[^1].value)
of ParseState.Data:
swap(result, packet.data)
else:
return
packet.remainingPartLen -= result.len()
proc parse*(input: string,
packet: var PsycPacket): tuple[needMoreInput: bool,
unparsed: Slice[int]] =
result.needMoreInput = true
case packet.state:
of ParseState.RoutingModifierName:
if not isModifierName(input, packet.cursor):
packet.state = ParseState.ContentLength
result.needMoreInput = false
else:
let (complete, op, name) = parseModifierName(input, packet)
if complete:
result.needMoreInput = false
packet.state = ParseState.RoutingModifierValue
let modifier = Modifier(op: op, name: name)
packet.routingHeader.add(modifier)
of ParseState.RoutingModifierValue:
let (complete, value) = parseModifierSimpleValue(input, packet)
result.needMoreInput = not complete
if value.len() > 0:
if packet.routingHeader[^1].value.isNil():
packet.routingHeader[^1].value = ""
packet.routingHeader[^1].value.add(value)
if complete:
packet.state = ParseState.RoutingModifierName
of ParseState.ContentLength:
let (complete, value) = parseContentLength(input, packet)
if complete:
result.needMoreInput = false
packet.state = ParseState.EntityModifierName
packet.contentLength = value
of ParseState.EntityModifierName:
if not input.isModifierName(packet.cursor):
packet.state = ParseState.Method
result.needMoreInput = false
else:
let oldCursor = packet.cursor
let (complete, op, name) = parseModifierName(input, packet)
packet.entityHeaderLen += packet.cursor - oldCursor
if complete:
result.needMoreInput = false
packet.state = ParseState.EntityModifierValue
let modifier = Modifier(op: op, name: name)
packet.entityHeader.add(modifier)
of ParseState.EntityModifierValue:
let oldCursor = packet.cursor
let (complete, value) = parseModifierValue(input, packet)
result.needMoreInput = not complete
packet.entityHeaderLen += (packet.cursor - oldCursor)
if value.len() > 0:
if packet.entityHeader[^1].value.isNil():
packet.entityHeader[^1].value = ""
packet.entityHeader[^1].value.add(value)
if complete:
packet.state = ParseState.EntityModifierName
of ParseState.Method:
let (complete, value) = parseMethod(input, packet)
if complete:
result.needMoreInput = false
packet.state = ParseState.Data
packet.methodName = value
packet.data = ""
of ParseState.Data:
let (complete, value) = parseData(input, packet)
result.needMoreInput = not complete
if value.len() > 0:
packet.data.add(value)
if complete:
packet.state = ParseState.Complete
of ParseState.Complete:
assert(false)
result.unparsed = packet.cursor .. input.high()
if packet.state != ParseState.Complete:
result.needMoreInput = result.needMoreInput or packet.cursor > input.high()
if result.needMoreInput:
packet.cursor = 0
suite "parser tests":
setup:
var
needMore: bool
unparsed: Slice[int]
packet = newPacket()
test "state sync":
let input = ":_target\talice\n\n?\n|\n"
while packet.state != ParseState.Complete:
(needMore, unparsed) = parse(input, packet)
check(not needMore)
check(unparsed == input.len()..input.high())
check(packet.routingHeader.len() == 1)
check(packet.routingHeader[0] == Modifier(op: ':',
name: "_target",
value: "alice"))
check(packet.entityHeader.len() == 1)
check(packet.entityHeader[0] == Modifier(op: '?', name: nil, value: nil))
test "simple-arg":
let input = ":_target\talice\n\n:_hello\tworld\n:_hallo\twelt\n|\n"
while packet.state != ParseState.Complete:
(needMore, unparsed) = parse(input, packet)
check(not needMore)
check(unparsed == input.len()..input.high())
check(packet.entityHeader.len() == 2)
check(packet.entityHeader[0] == Modifier(op: ':', name: "_hello", value: "world"))
check(packet.entityHeader[1] == Modifier(op: ':', name: "_hallo", value: "welt"))
test "binary-arg":
let input = ":_target\talice\n\n:_hello 5\tworld\n:_hallo 4\twelt\n|\n"
while packet.state != ParseState.Complete:
(needMore, unparsed) = parse(input, packet)
check(not needMore)
check(unparsed == input.len()..input.high())
check(packet.entityHeader.len() == 2)
check(packet.entityHeader[0] == Modifier(op: ':', name: "_hello", value: "world"))
check(packet.entityHeader[1] == Modifier(op: ':', name: "_hallo", value: "welt"))
test "method/data":
let input = ":_target\talice\n\n_hello_world\nHello Alice!\n|\n"
while packet.state != ParseState.Complete:
(needMore, unparsed) = parse(input, packet)
check(not needMore)
check(unparsed == input.len()..input.high())
check(packet.methodName == "_hello_world")
check(packet.data == "Hello Alice!")
test "content length":
let input = ":_target\talice\n39\n:_hello\tworld\n_hello_world\nHello Alice!\n|\n"
while packet.state != ParseState.Complete:
(needMore, unparsed) = parse(input, packet)
check(not needMore)
check(unparsed == input.len()..input.high())
check(packet.contentLength == 39)
check(packet.data == "Hello Alice!")
test "incomplete":
let
input1 = ":_target\t"
input2 = "alice\n"
input3 = "39\n"
input4 = ":_hello\t"
input5 = "world\n"
input6 = "_hello_world\n"
input7 = "Hello Alice!\n|\n"
(needMore, unparsed) = parse(input1[0..4], packet)
check(needMore)
check(unparsed == 0..4)
check(packet.state == ParseState.RoutingModifierName)
check(packet.routingHeader.len() == 0)
(needMore, unparsed) = parse(input1[unparsed] & input1[5..8], packet)
check(needMore)
check(unparsed == 9..8)
check(packet.state == ParseState.RoutingModifierValue)
check(packet.routingHeader[0] == Modifier(op: ':', name: "_target", value: nil))
(needMore, unparsed) = parse(input2[0..2], packet)
check(needMore)
check(unparsed == 3..2)
check(packet.state == ParseState.RoutingModifierValue)
check(packet.routingHeader[0] == Modifier(op: ':', name: "_target", value: "ali"))
(needMore, unparsed) = parse(input2[3..5], packet)
check(needMore)
check(unparsed == 3..2)
check(packet.state == ParseState.RoutingModifierName)
check(packet.routingHeader[0] == Modifier(op: ':', name: "_target", value: "alice"))
(needMore, unparsed) = parse(input3[0..1], packet)
check(not needMore)
check(unparsed == 0..1)
check(packet.state == ParseState.ContentLength)
(needMore, unparsed) = parse(input3[0..1], packet)
check(needMore)
check(unparsed == 0..1)
check(packet.state == ParseState.ContentLength)
check(packet.contentLength == -1)
(needMore, unparsed) = parse(input3[unparsed] & input3[2], packet)
check(needMore)
check(unparsed == 3..2)
check(packet.state == ParseState.EntityModifierName)
check(packet.contentLength == 39)
(needMore, unparsed) = parse(input4[0..3], packet)
check(needMore)
check(unparsed == 0..3)
check(packet.state == ParseState.EntityModifierName)
check(packet.entityHeader.len() == 0)
(needMore, unparsed) = parse(input4[unparsed] & input4[4..7], packet)
check(needMore)
check(unparsed == 8..7)
check(packet.state == ParseState.EntityModifierValue)
check(packet.entityHeader[0] == Modifier(op: ':', name: "_hello", value: nil))
(needMore, unparsed) = parse(input5[0..2], packet)
check(needMore)
check(unparsed == 3..2)
check(packet.state == ParseState.EntityModifierValue)
check(packet.entityHeader[0] == Modifier(op: ':', name: "_hello", value: "wor"))
(needMore, unparsed) = parse(input5[unparsed] & input5[3..5], packet)
check(needMore)
check(unparsed == 3..2)
check(packet.state == ParseState.EntityModifierName)
check(packet.entityHeader[0] == Modifier(op: ':', name: "_hello", value: "world"))
(needMore, unparsed) = parse(input5[unparsed] & input6[0..6], packet)
check(not needMore)
check(unparsed == 0..6)
check(packet.state == ParseState.Method)
(needMore, unparsed) = parse(input6[0..6], packet)
check(needMore)
check(unparsed == 0..6)
check(packet.state == ParseState.Method)
(needMore, unparsed) = parse(input6[unparsed] & input6[7..12], packet)
check(needMore)
check(unparsed == 13..12)
check(packet.state == ParseState.Data)
check(packet.methodName == "_hello_world")
(needMore, unparsed) = parse(input7[0..5], packet)
check(needMore)
check(unparsed == 6..5)
check(packet.state == ParseState.Data)
check(packet.data == "Hello ")
(needMore, unparsed) = parse(input7[6..14], packet)
check(not needMore)
check(unparsed == 9..8)
check(packet.state == ParseState.Complete)
check(packet.data == "Hello Alice!")
test "invalid character in keyword":
let input = ":\0\n"
expect(InvalidModifierName):
(needMore, unparsed) = parse(input, packet)
test "empty keyword":
let input = ":\n"
expect(InvalidModifierName):
(needMore, unparsed) = parse(input, packet)
test "routing modifier with binary-arg":
let input = ":_test 4\ttest\n"
expect(InvalidModifierValue):
(needMore, unparsed) = parse(input, packet)
test "binary-arg without length":
let input = "\n:_test \t"
(needMore, unparsed) = parse(input, packet)
(needMore, unparsed) = parse(input, packet)
check(packet.state == ParseState.EntityModifierName)
expect(InvalidModifierValue):
(needMore, unparsed) = parse(input, packet)
test "invalid content length":
let input = "abc\n"
(needMore, unparsed) = parse(input, packet)
check(packet.state == ParseState.ContentLength)
expect(InvalidContentLength):
(needMore, unparsed) = parse(input, packet)
echo "parser tests completed"