Initial commit

2022-06-09 19:34:12 +02:00
parent 74e388dda7
commit 8b265077e4
10 changed files with 587 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
--- a/README.md
+++ b/README.md
@@ -1,3 +1,15 @@
 # json-py

-Simple JSON-parser written in python.
+Simple JSON-parser written in python.
+
+Inspired by [Phil Eaton's article](https://notes.eatonphil.com/writing-a-simple-json-parser.html).
+
+## Example usage
+```python
+import json_py
+print(json_py.from_json('{"hello": ["world", "!", 69], 3: 9.1}'))
+# {'hello': ['world', '!', 69], 3: 9.1}
+```
+
+## Todo
+Implement `json_py.to_json()`
--- a/json_py/init.py
+++ b/json_py/init.py
@@ -0,0 +1 @@
+from json_py.json_py import *
--- a/json_py/json_py.py
+++ b/json_py/json_py.py
@@ -0,0 +1,19 @@
+from __future__ import annotations
+import typing
+import json_py.lexer as lexer
+import json_py.parser as parser
+
+
+def to_json(obj: typing.Any) -> str:
+    raise NotImplementedError("Not implemented yet")
+
+
+def from_json(string: str) -> typing.Any:
+    tokens = lexer.lex(string)
+    return parser.parse(tokens)[0]
+
+
+__all__ = (
+    "to_json",
+    "from_json",
+)
--- a/json_py/lexer.py
+++ b/json_py/lexer.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+import typing
+
+
+TypeLexer = typing.Tuple[typing.Optional[typing.Any], str]
+
+
+def lex_string(string: str) -> TypeLexer:
+    if not string.startswith('"'):
+        return None, string
+
+    string = string[1:]
+    for i in range(len(string)):
+        if string[i] == '"':
+            return string[:i], string[i + 1 :]
+
+    return None, string
+
+
+def lex_number(string: str) -> TypeLexer:
+    if not string[0].isdigit():
+        return None, string
+
+    has_decimal = False
+
+    for i in range(len(string)):
+        if string[i] == ".":
+            if has_decimal:
+                raise ValueError("Invalid number")
+
+            has_decimal = True
+            continue
+
+        if not string[i].isdigit():
+            if has_decimal:
+                return float(string[:i]), string[i:]
+            return int(string[:i]), string[i:]
+
+    if has_decimal:
+        return float(string), ""
+    return int(string), ""
+
+
+def lex_bool(string: str) -> TypeLexer:
+    if string[0].lower() not in "tf":
+        return None, string
+
+    if string[:4].lower() == "true":
+        return True, string[4:]
+    elif string[:5].lower() == "false":
+        return False, string[5:]
+
+    return None, string
+
+
+def lex_null(string: str) -> TypeLexer:
+    if string[:4].lower() == "null":
+        return True, string[4:]
+
+    return None, string
+
+
+TokenList = typing.List[typing.Any]
+
+
+def lex(string: str) -> TokenList:
+    tokens: TokenList = []
+    while len(string) > 0:
+        json_string, string = lex_string(string)
+        if json_string is not None:
+            tokens.append(json_string)
+            continue
+
+        json_number, string = lex_number(string)
+        if json_number is not None:
+            tokens.append(json_number)
+            continue
+
+        json_bool, string = lex_bool(string)
+        if json_bool is not None:
+            tokens.append(json_bool)
+            continue
+
+        json_null, string = lex_null(string)
+        if json_null is not None:
+            tokens.append(None)
+            continue
+
+        if string[0] in " ":
+            string = string[1:]
+        elif string[0] in ":{},[]":
+            tokens.append(string[0])
+            string = string[1:]
+        else:
+            raise Exception("Unexpected character: {}".format(string[0]))
+
+    return tokens
--- a/json_py/parser.py
+++ b/json_py/parser.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+import typing
+import json_py.lexer as lexer
+
+ParserResult = typing.Tuple[typing.Any, lexer.TokenList]
+
+
+def parse_array(tokens: lexer.TokenList) -> ParserResult:
+    json_array: typing.List[typing.Any] = []
+
+    if tokens[0] == "]":
+        return json_array, tokens[1:]
+
+    expect_comma = False
+    for i in range(len(tokens)):
+        t = tokens[i]
+        if t == "]":
+            if not expect_comma:
+                raise ValueError("Expected one more item")
+
+            return json_array, tokens[i + 1 :]
+        elif t == ",":
+            if not expect_comma:
+                raise ValueError("Unexpected comma")
+
+            expect_comma = False
+        else:
+            if expect_comma:
+                raise ValueError("Expected comma but got item")
+
+            json_array.append(t)
+            expect_comma = True
+
+    raise ValueError("List not closed")
+
+
+def parse_object(tokens: lexer.TokenList) -> ParserResult:
+    json_object: typing.Any = {}
+
+    if tokens[0] == "}":
+        return json_object, tokens[1:]
+
+    is_syntax: typing.Callable[[str], bool] = lambda x: str(x) in ":"
+    while True:
+        json_key = tokens[0]
+
+        if is_syntax(json_key):
+            raise Exception(f"Expected value before '{json_key}'")
+
+        colon = tokens[1]
+        if colon != ":":
+            raise Exception(f"Expected ':' but got '{colon}'")
+
+        json_value, tokens = parse(tokens[2:])
+        json_object[json_key] = json_value
+
+        next_token = tokens[0]
+        if next_token == ",":
+            tokens = tokens[1:]
+        elif next_token == "}":
+            return json_object, tokens[1:]
+        else:
+            raise Exception(f"Expected ',' or '{'}'}' but got '{next_token}'")
+
+
+def parse(tokens: lexer.TokenList) -> typing.Any:
+    t = tokens[0]
+    if t == "[":
+        return parse_array(tokens[1:])
+    elif t == "{":
+        return parse_object(tokens[1:])
+    else:
+        return t, tokens[1:]
--- a/poetry.lock
+++ b/poetry.lock
@@ -0,0 +1,155 @@
+[[package]]
+name = "atomicwrites"
+version = "1.4.0"
+description = "Atomic file writes."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
+[[package]]
+name = "attrs"
+version = "21.4.0"
+description = "Classes Without Boilerplate"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[package.extras]
+dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
+docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
+tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
+tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"]
+
+[[package]]
+name = "colorama"
+version = "0.4.4"
+description = "Cross-platform colored terminal text."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[[package]]
+name = "iniconfig"
+version = "1.1.1"
+description = "iniconfig: brain-dead simple config-ini parsing"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "packaging"
+version = "21.3"
+description = "Core utilities for Python packages"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
+
+[[package]]
+name = "pluggy"
+version = "1.0.0"
+description = "plugin and hook calling mechanisms for python"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
+[[package]]
+name = "py"
+version = "1.11.0"
+description = "library with cross-python path, ini-parsing, io, code, log facilities"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+
+[[package]]
+name = "pyparsing"
+version = "3.0.9"
+description = "pyparsing module - Classes and methods to define and execute parsing grammars"
+category = "dev"
+optional = false
+python-versions = ">=3.6.8"
+
+[package.extras]
+diagrams = ["railroad-diagrams", "jinja2"]
+
+[[package]]
+name = "pytest"
+version = "7.1.2"
+description = "pytest: simple powerful testing with Python"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
+attrs = ">=19.2.0"
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<2.0"
+py = ">=1.8.2"
+tomli = ">=1.0.0"
+
+[package.extras]
+testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
+
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[metadata]
+lock-version = "1.1"
+python-versions = "^3.10"
+content-hash = "0466c3c6aefd40e3134e296a4785f38fd969c0d33fe0e87f2d4bad83655a0a10"
+
+[metadata.files]
+atomicwrites = [
+    {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
+    {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
+]
+attrs = [
+    {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"},
+    {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"},
+]
+colorama = [
+    {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
+    {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
+]
+iniconfig = [
+    {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
+    {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
+]
+packaging = [
+    {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
+    {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
+]
+pluggy = [
+    {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
+    {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
+]
+py = [
+    {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
+    {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
+]
+pyparsing = [
+    {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
+    {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
+]
+pytest = [
+    {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"},
+    {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"},
+]
+tomli = [
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,15 @@
+[tool.poetry]
+name = "json-py"
+version = "0.1.0"
+description = ""
+authors = ["strNophix <nvdpoel01@gmail.com>"]
+
+[tool.poetry.dependencies]
+python = "^3.10"
+
+[tool.poetry.dev-dependencies]
+pytest = "^7.1.2"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -0,0 +1,54 @@
+import pytest
+import json_py
+import json_py.lexer as lexer
+import json_py.parser as parser
+
+
+def test_parser():
+    expected = {"hello": ["world", "!", 69], 3: 9.1}
+    assert json_py.from_json('{"hello": ["world", "!", 69], 3: 9.1}') == expected
+    assert json_py.from_json('{"friends": null}') == {"friends": None}
+
+
+def test_lex_string():
+    assert lexer.lex_string('"hello"') == ("hello", "")
+
+
+def test_lex_number():
+    assert lexer.lex_number("1349") == (1349, "")
+    assert lexer.lex_number("6.9") == (6.9, "")
+    with pytest.raises(ValueError):
+        lexer.lex_number("6..9")
+
+
+def test_lex_bool():
+    assert lexer.lex_bool("True") == (True, "")
+    assert lexer.lex_bool("False") == (False, "")
+    assert lexer.lex_bool("Sample") == (None, "Sample")
+
+
+def test_lex_null():
+    assert lexer.lex_null("null") == (True, "")
+
+
+def test_lex_object():
+    assert lexer.lex("{12: 2, 3: 4}") == ["{", 12, ":", 2, ",", 3, ":", 4, "}"]
+
+
+def test_parse_array():
+    # Skip first token 'cos parser consumes it.
+    assert parser.parse_array([12, ",", 2, "]"]) == ([12, 2], [])
+    assert parser.parse_array(["]"]) == ([], [])
+    with pytest.raises(ValueError):
+        parser.parse_array([12, "," "]"])
+    with pytest.raises(ValueError):
+        parser.parse_array([12, 3, "]"])
+    with pytest.raises(ValueError):
+        parser.parse_array([12, ",", 3])
+
+
+def test_parse_object():
+    # Skip first token 'cos parser consumes it.
+    assert parser.parse_object(["}"]) == ({}, [])
+    assert parser.parse_object(["age", ":", 21, "}"]) == ({"age": 21}, [])
+    assert parser.parse_object(["12", ":", "[", 1, "]", "}"]) == ({"12": [1]}, [])