diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..68bc17f --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/README.md b/README.md index 7b79b62..c51b7d3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,15 @@ # json-py -Simple JSON-parser written in python. \ No newline at end of file +Simple JSON-parser written in python. + +Inspired by [Phil Eaton's article](https://notes.eatonphil.com/writing-a-simple-json-parser.html). + +## Example usage +```python +import json_py +print(json_py.from_json('{"hello": ["world", "!", 69], 3: 9.1}')) +# {'hello': ['world', '!', 69], 3: 9.1} +``` + +## Todo +Implement `json_py.to_json()` \ No newline at end of file diff --git a/json_py/__init__.py b/json_py/__init__.py new file mode 100644 index 0000000..1e00f84 --- /dev/null +++ b/json_py/__init__.py @@ -0,0 +1 @@ +from json_py.json_py import * diff --git a/json_py/json_py.py b/json_py/json_py.py new file mode 100644 index 0000000..be957b7 --- /dev/null +++ b/json_py/json_py.py @@ -0,0 +1,19 @@ +from __future__ import annotations +import typing +import json_py.lexer as lexer +import json_py.parser as parser + + +def to_json(obj: typing.Any) -> str: + raise NotImplementedError("Not implemented yet") + + +def from_json(string: str) -> typing.Any: + tokens = lexer.lex(string) + return parser.parse(tokens)[0] + + +__all__ = ( + "to_json", + "from_json", +) diff --git a/json_py/lexer.py b/json_py/lexer.py new file mode 100644 index 0000000..c3de0c1 --- /dev/null +++ b/json_py/lexer.py @@ -0,0 +1,97 @@ +from __future__ import annotations +import typing + + +TypeLexer = typing.Tuple[typing.Optional[typing.Any], str] + + +def lex_string(string: str) -> TypeLexer: + if not string.startswith('"'): + return None, string + + string = string[1:] + for i in range(len(string)): + if string[i] == '"': + return string[:i], string[i + 1 :] + + return None, string + + +def lex_number(string: str) -> TypeLexer: + if not string[0].isdigit(): + return None, string + + has_decimal = False + + for i in range(len(string)): + if string[i] == ".": + if has_decimal: + raise ValueError("Invalid number") + + has_decimal = True + continue + + if not string[i].isdigit(): + if has_decimal: + return float(string[:i]), string[i:] + return int(string[:i]), string[i:] + + if has_decimal: + return float(string), "" + return int(string), "" + + +def lex_bool(string: str) -> TypeLexer: + if string[0].lower() not in "tf": + return None, string + + if string[:4].lower() == "true": + return True, string[4:] + elif string[:5].lower() == "false": + return False, string[5:] + + return None, string + + +def lex_null(string: str) -> TypeLexer: + if string[:4].lower() == "null": + return True, string[4:] + + return None, string + + +TokenList = typing.List[typing.Any] + + +def lex(string: str) -> TokenList: + tokens: TokenList = [] + while len(string) > 0: + json_string, string = lex_string(string) + if json_string is not None: + tokens.append(json_string) + continue + + json_number, string = lex_number(string) + if json_number is not None: + tokens.append(json_number) + continue + + json_bool, string = lex_bool(string) + if json_bool is not None: + tokens.append(json_bool) + continue + + json_null, string = lex_null(string) + if json_null is not None: + tokens.append(None) + continue + + if string[0] in " ": + string = string[1:] + elif string[0] in ":{},[]": + tokens.append(string[0]) + string = string[1:] + else: + raise Exception("Unexpected character: {}".format(string[0])) + + return tokens diff --git a/json_py/parser.py b/json_py/parser.py new file mode 100644 index 0000000..86fd68a --- /dev/null +++ b/json_py/parser.py @@ -0,0 +1,73 @@ +from __future__ import annotations +import typing +import json_py.lexer as lexer + +ParserResult = typing.Tuple[typing.Any, lexer.TokenList] + + +def parse_array(tokens: lexer.TokenList) -> ParserResult: + json_array: typing.List[typing.Any] = [] + + if tokens[0] == "]": + return json_array, tokens[1:] + + expect_comma = False + for i in range(len(tokens)): + t = tokens[i] + if t == "]": + if not expect_comma: + raise ValueError("Expected one more item") + + return json_array, tokens[i + 1 :] + elif t == ",": + if not expect_comma: + raise ValueError("Unexpected comma") + + expect_comma = False + else: + if expect_comma: + raise ValueError("Expected comma but got item") + + json_array.append(t) + expect_comma = True + + raise ValueError("List not closed") + + +def parse_object(tokens: lexer.TokenList) -> ParserResult: + json_object: typing.Any = {} + + if tokens[0] == "}": + return json_object, tokens[1:] + + is_syntax: typing.Callable[[str], bool] = lambda x: str(x) in ":" + while True: + json_key = tokens[0] + + if is_syntax(json_key): + raise Exception(f"Expected value before '{json_key}'") + + colon = tokens[1] + if colon != ":": + raise Exception(f"Expected ':' but got '{colon}'") + + json_value, tokens = parse(tokens[2:]) + json_object[json_key] = json_value + + next_token = tokens[0] + if next_token == ",": + tokens = tokens[1:] + elif next_token == "}": + return json_object, tokens[1:] + else: + raise Exception(f"Expected ',' or '{'}'}' but got '{next_token}'") + + +def parse(tokens: lexer.TokenList) -> typing.Any: + t = tokens[0] + if t == "[": + return parse_array(tokens[1:]) + elif t == "{": + return parse_object(tokens[1:]) + else: + return t, tokens[1:] diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..ca19d95 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,155 @@ +[[package]] +name = "atomicwrites" +version = "1.4.0" +description = "Atomic file writes." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "attrs" +version = "21.4.0" +description = "Classes Without Boilerplate" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.extras] +dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] +docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] +tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] + +[[package]] +name = "colorama" +version = "0.4.4" +description = "Cross-platform colored terminal text." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "iniconfig" +version = "1.1.1" +description = "iniconfig: brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "dev" +optional = false +python-versions = ">=3.6.8" + +[package.extras] +diagrams = ["railroad-diagrams", "jinja2"] + +[[package]] +name = "pytest" +version = "7.1.2" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +py = ">=1.8.2" +tomli = ">=1.0.0" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[metadata] +lock-version = "1.1" +python-versions = "^3.10" +content-hash = "0466c3c6aefd40e3134e296a4785f38fd969c0d33fe0e87f2d4bad83655a0a10" + +[metadata.files] +atomicwrites = [ + {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, + {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, +] +attrs = [ + {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, + {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, +] +colorama = [ + {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, + {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, +] +iniconfig = [ + {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, + {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, +] +packaging = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] +pyparsing = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] +pytest = [ + {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, + {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1b425ff --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[tool.poetry] +name = "json-py" +version = "0.1.0" +description = "" +authors = ["strNophix "] + +[tool.poetry.dependencies] +python = "^3.10" + +[tool.poetry.dev-dependencies] +pytest = "^7.1.2" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_json.py b/tests/test_json.py new file mode 100644 index 0000000..f7ab77a --- /dev/null +++ b/tests/test_json.py @@ -0,0 +1,54 @@ +import pytest +import json_py +import json_py.lexer as lexer +import json_py.parser as parser + + +def test_parser(): + expected = {"hello": ["world", "!", 69], 3: 9.1} + assert json_py.from_json('{"hello": ["world", "!", 69], 3: 9.1}') == expected + assert json_py.from_json('{"friends": null}') == {"friends": None} + + +def test_lex_string(): + assert lexer.lex_string('"hello"') == ("hello", "") + + +def test_lex_number(): + assert lexer.lex_number("1349") == (1349, "") + assert lexer.lex_number("6.9") == (6.9, "") + with pytest.raises(ValueError): + lexer.lex_number("6..9") + + +def test_lex_bool(): + assert lexer.lex_bool("True") == (True, "") + assert lexer.lex_bool("False") == (False, "") + assert lexer.lex_bool("Sample") == (None, "Sample") + + +def test_lex_null(): + assert lexer.lex_null("null") == (True, "") + + +def test_lex_object(): + assert lexer.lex("{12: 2, 3: 4}") == ["{", 12, ":", 2, ",", 3, ":", 4, "}"] + + +def test_parse_array(): + # Skip first token 'cos parser consumes it. + assert parser.parse_array([12, ",", 2, "]"]) == ([12, 2], []) + assert parser.parse_array(["]"]) == ([], []) + with pytest.raises(ValueError): + parser.parse_array([12, "," "]"]) + with pytest.raises(ValueError): + parser.parse_array([12, 3, "]"]) + with pytest.raises(ValueError): + parser.parse_array([12, ",", 3]) + + +def test_parse_object(): + # Skip first token 'cos parser consumes it. + assert parser.parse_object(["}"]) == ({}, []) + assert parser.parse_object(["age", ":", 21, "}"]) == ({"age": 21}, []) + assert parser.parse_object(["12", ":", "[", 1, "]", "}"]) == ({"12": [1]}, [])