From 0891000a4664d2e6d41c86787e8d797d9da3119c Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 6 Nov 2022 12:16:57 +0800 Subject: [PATCH] init --- .gitattributes | 2 + .github/workflows/main.yml | 20 + .gitignore | 160 +++++++ LICENSE | 674 ++++++++++++++++++++++++++ README.md | 8 + amalgamate.py | 54 +++ build_cpp.sh | 1 + scripts/get_opcodes.py | 13 + scripts/loc.py | 19 + scripts/run_tests.py | 23 + src/builtins.h | 165 +++++++ src/codeobject.h | 147 ++++++ src/compiler.h | 809 ++++++++++++++++++++++++++++++++ src/error.h | 39 ++ src/iter.h | 49 ++ src/main.cpp | 122 +++++ src/obj.h | 86 ++++ src/opcodes.h | 56 +++ src/parser.h | 245 ++++++++++ src/pocketpy.h | 454 ++++++++++++++++++ src/str.h | 153 ++++++ src/vm.h | 598 +++++++++++++++++++++++ test_cpp.sh | 11 + tests/1.py | 16 + tests/2.py | 15 + tests/3.py | 5 + tests/class.pk | 6 + tests/mixedtype/basic.py | 39 ++ tests/singletype/basic.py | 75 +++ tests/singletype/builtin_ty.py | 158 +++++++ tests/singletype/controlflow.py | 67 +++ tests/singletype/functions.py | 19 + 32 files changed, 4308 insertions(+) create mode 100644 .gitattributes create mode 100644 .github/workflows/main.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 amalgamate.py create mode 100644 build_cpp.sh create mode 100644 scripts/get_opcodes.py create mode 100644 scripts/loc.py create mode 100644 scripts/run_tests.py create mode 100644 src/builtins.h create mode 100644 src/codeobject.h create mode 100644 src/compiler.h create mode 100644 src/error.h create mode 100644 src/iter.h create mode 100644 src/main.cpp create mode 100644 src/obj.h create mode 100644 src/opcodes.h create mode 100644 src/parser.h create mode 100644 src/pocketpy.h create mode 100644 src/str.h create mode 100644 src/vm.h create mode 100644 test_cpp.sh create mode 100644 tests/1.py create mode 100644 tests/2.py create mode 100644 tests/3.py create mode 100644 tests/class.pk create mode 100644 tests/mixedtype/basic.py create mode 100644 tests/singletype/basic.py create mode 100644 tests/singletype/builtin_ty.py create mode 100644 tests/singletype/controlflow.py create mode 100644 tests/singletype/functions.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..dfe07704 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..715d4d82 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,20 @@ +name: build +on: [push, pull_request] +jobs: + build_win: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - uses: ilammy/msvc-dev-cmd@v1 + - name: Compiling + shell: bash + run: | + CL -std:c++17 -utf-8 -O2 -EHsc -Fe:pocketpy src/main.cpp + mv src/pocketpy.h src/pocketpy.cpp + CL -std:c++17 -utf-8 -O2 -EHsc -LD -Fe:libpocketpy src/pocketpy.cpp + - uses: actions/upload-artifact@v3 + with: + name: pocketpy + path: | + D:\a\pocketpy\pocketpy\pocketpy.exe + D:\a\pocketpy\pocketpy\libpocketpy.dll \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..45406b13 --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +.vscode + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ +src/main +src/test +gmon.out +gprof.txt +/pocketpy +amalgamated diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..e62ec04c --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ +GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md new file mode 100644 index 00000000..d662fa9d --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# pocketpy + +## 参考 + ++ [cpython](https://github.com/python/cpython) + ++ [byterun](http://qingyunha.github.io/taotao/) + diff --git a/amalgamate.py b/amalgamate.py new file mode 100644 index 00000000..f6a45b0b --- /dev/null +++ b/amalgamate.py @@ -0,0 +1,54 @@ +with open("src/opcodes.h", "rt", encoding='utf-8') as f: + OPCODES_TEXT = f.read() + +pipeline = [ + ["str.h", "builtins.h"], + ["obj.h", "iter.h", "parser.h", "codeobject.h"], + ["error.h", "vm.h", "compiler.h"], + ["pocketpy.h"] +] + +copied = set() + +text = "" + +import re +import shutil +import os +import time + +if os.path.exists("amalgamated"): + shutil.rmtree("amalgamated") + time.sleep(1) +os.mkdir("amalgamated") + +def remove_copied_include(text): + text = text.replace("#pragma once", "") + text = re.sub( + r'#include\s+"(.+)"\s*', + lambda m: "" if m.group(1) in copied else m.group(0), + text + ) + text = text.replace('#include "opcodes.h"', OPCODES_TEXT) + return text + +for seq in pipeline: + for j in seq: + with open("src/"+j, "rt", encoding='utf-8') as f: + text += remove_copied_include(f.read()) + '\n' + copied.add(j) + +with open("amalgamated/pocketpy.h", "wt", encoding='utf-8') as f: + final_text = \ +r'''/* + * Copyright (c) 2022 blueloveTH + * Distributed Under The GNU General Public License v3.0 + */ + +#ifndef POCKETPY_H +#define POCKETPY_H +''' + text + '\n#endif // POCKETPY_H' + f.write(final_text) + +shutil.copy("src/main.cpp", "amalgamated/main.cpp") +os.system("g++ -o pocketpy amalgamated/main.cpp --std=c++17 -O1") \ No newline at end of file diff --git a/build_cpp.sh b/build_cpp.sh new file mode 100644 index 00000000..50a9e701 --- /dev/null +++ b/build_cpp.sh @@ -0,0 +1 @@ +g++ -o pocketpy src/main.cpp --std=c++17 -O1 diff --git a/scripts/get_opcodes.py b/scripts/get_opcodes.py new file mode 100644 index 00000000..bc407f19 --- /dev/null +++ b/scripts/get_opcodes.py @@ -0,0 +1,13 @@ +import os +import re + +with open("src/opcodes.h", "rt", encoding='utf-8') as f: + text = f.read() + +# opcodes = re.findall(r"OP_(\w+)", text) + +# print('\n'.join([f"OPCODE({o})" + o for o in opcodes])) + +text = re.sub(r"OP_(\w+)", lambda m: f"OPCODE({m.group(1)})", text) + +print(text.replace(',', '')) \ No newline at end of file diff --git a/scripts/loc.py b/scripts/loc.py new file mode 100644 index 00000000..d121863d --- /dev/null +++ b/scripts/loc.py @@ -0,0 +1,19 @@ +import os + +def get_loc(path): + loc = 0 + with open(path, "rt", encoding='utf-8') as f: + loc += len(f.readlines()) + return loc + +def get_loc_for_dir(path): + loc = 0 + for root, dirs, files in os.walk(path): + for file in files: + if file.endswith('.h'): + _i = get_loc(os.path.join(root, file)) + print(f"{file}: {_i}") + loc += _i + return loc + +print(get_loc_for_dir('src')) \ No newline at end of file diff --git a/scripts/run_tests.py b/scripts/run_tests.py new file mode 100644 index 00000000..4f92d957 --- /dev/null +++ b/scripts/run_tests.py @@ -0,0 +1,23 @@ +import os + +singletypepath = 'tests/singletype' +mixedtypepath = 'tests/mixedtype' + +def test_file(filepath): + return os.system("./pocketpy " + filepath) == 0 + #return os.system("python3 " + filepath) == 0 + +def test_dir(path): + print("=" * 50) + for filename in os.listdir(path): + if filename.endswith('.py'): + filepath = os.path.join(path, filename) + code = test_file(filepath) + if not code: + print("[x] " + filepath) + else: + print("[√] " + filepath) + +if __name__ == '__main__': + test_dir(singletypepath) + test_dir(mixedtypepath) \ No newline at end of file diff --git a/src/builtins.h b/src/builtins.h new file mode 100644 index 00000000..28d7d1fa --- /dev/null +++ b/src/builtins.h @@ -0,0 +1,165 @@ +#pragma once + +const char* __BUILTINS_CODE = R"( +def len(x): + return x.__len__() + +def __str4join(self, seq): + s = "" + for i in seq: + s += str(i) + self # in Python3, it uses 'i' instead of 'str(i)' + if len(self) > 0: + s = s[:-len(self)] + return s +str.join = __str4join + +def __str4__mul__(self, n): + s = "" + for i in range(n): + s += self + return s +str.__mul__ = __str4__mul__ + +def __str4split(self, sep): + if sep == "": + return list(self) + res = [] + i = 0 + while i < len(self): + if self[i:i+len(sep)] == sep: + res.append(self[:i]) + self = self[i+len(sep):] + i = 0 + else: + i += 1 + res.append(self) + return res +str.split = __str4split + +def __list4__str__(self): + a = [] + for i in self: + a.append(str(i)) + return "[" + ", ".join(a) + "]" +list.__str__ = __list4__str__ + +def __list4extend(self, other): + for i in other: + self.append(i) +list.extend = __list4extend + +def __list4__mul__(self, n): + a = [] + for i in range(n): + a.extend(self) + return a +list.__mul__ = __list4__mul__ + +def __iterable4__eq__(self, other): + if len(self) != len(other): + return False + for i in range(len(self)): + if self[i] != other[i]: + return False + return True +list.__eq__ = __iterable4__eq__ +tuple.__eq__ = __iterable4__eq__ + +def __iterable4__contains__(self, item): + for i in self: + if i == item: + return True + return False +list.__contains__ = __iterable4__contains__ +tuple.__contains__ = __iterable4__contains__ + +# https://github.com/python/cpython/blob/main/Objects/dictobject.c +class dict: + def __init__(self, tuples): + self._capacity = 8 + self._a = [None] * self._capacity + self._len = 0 + for i in tuples: + self[i[0]] = i[1] + + def __len__(self): + return self._len + + def __probe(self, key): + i = hash(key) % self._capacity + while self._a[i] is not None: + if self._a[i][0] == key: + return [True, i] + i = ((5*i) + 1) % self._capacity + return [False, i] + + def __getitem__(self, key): + ret = self.__probe(key) + ok = ret[0]; i = ret[1] + if not ok: + raise KeyError(key) + return self._a[i][1] + + def __contains__(self, key): + ret = self.__probe(key) + ok = ret[0]; i = ret[1] + return ok + + def __setitem__(self, key, value): + ret = self.__probe(key) + ok = ret[0]; i = ret[1] + if ok: + self._a[i][1] = value + else: + self._a[i] = [key, value] + self._len += 1 + if self._len > self._capacity * 0.6: + self.__resize_2x() + + def __delitem__(self, key): + ret = self.__probe(key) + ok = ret[0]; i = ret[1] + if not ok: + raise KeyError(key) + self._a[i] = None + self._len -= 1 + + def __resize_2x(self): + old_a = self._a + self._capacity *= 2 + self._a = [None] * self._capacity + self._len = 0 + for kv in old_a: + if kv is not None: + self[kv[0]] = kv[1] + + def keys(self): + ret = [] + for kv in self._a: + if kv is not None: + ret.append(kv[0]) + return ret + + def values(self): + ret = [] + for kv in self._a: + if kv is not None: + ret.append(kv[1]) + return ret + + def items(self): + ret = [] + for kv in self._a: + if kv is not None: + ret.append(kv) + return ret + + def __str__(self): + ret = '{' + for kv in self.items(): + ret += str(kv[0]) + ': ' + str(kv[1]) + ', ' + if ret[-2:] == ', ': + ret = ret[:-2] + return ret + '}' + +)"; \ No newline at end of file diff --git a/src/codeobject.h b/src/codeobject.h new file mode 100644 index 00000000..0cbda99c --- /dev/null +++ b/src/codeobject.h @@ -0,0 +1,147 @@ +#pragma once + +#include "obj.h" + +enum Opcode { + #define OPCODE(name) OP_##name, + #include "opcodes.h" + #undef OPCODE +}; + +static const char* OP_NAMES[] = { + #define OPCODE(name) #name, + #include "opcodes.h" + #undef OPCODE +}; + +struct ByteCode{ + uint8_t op; + int arg; + uint16_t line; +}; + +_Str pad(const _Str& s, const int n){ + return s + _Str(n - s.size(), ' '); +} + +class CodeObject { +public: + std::vector co_code; + _Str co_filename; + _Str co_name; + + PyVarList co_consts; + std::vector<_Str> co_names; + + int addConst(PyVar v){ + co_consts.push_back(v); + return co_consts.size() - 1; + } + + int addName(const _Str& name){ + auto iter = std::find(co_names.begin(), co_names.end(), name); + if(iter == co_names.end()){ + co_names.push_back(name); + return co_names.size() - 1; + } + return iter - co_names.begin(); + } + + int getNameIndex(const _Str& name){ + auto iter = std::find(co_names.begin(), co_names.end(), name); + if(iter == co_names.end()) return -1; + return iter - co_names.begin(); + } + + _Str toString(){ + _StrStream ss; + int prev_line = -1; + for(int i=0; igetTypeName(); + if(i != co_consts.size() - 1) consts << ", "; + } + + _StrStream names; + names << "co_names: "; + for(int i=0; i(&co_consts[i]->_native); + if(fn) ss << '\n' << fn->code->co_name << ":\n" << fn->code->toString(); + } + return _Str(ss); + } +}; + +class Frame { +private: + std::stack s_data; + int ip = 0; +public: + StlDict* f_globals; + StlDict f_locals; + + const CodeObject* code; + + Frame(const CodeObject* code, StlDict locals, StlDict* globals) + : code(code), f_locals(locals), f_globals(globals) {} + + inline const ByteCode& readCode() { + return code->co_code[ip++]; + } + + int currentLine(){ + if(isEnd()) return -1; + return code->co_code[ip].line; + } + + inline bool isEnd() const { + return ip >= code->co_code.size(); + } + + inline PyVar popValue(){ + PyVar v = s_data.top(); + s_data.pop(); + return v; + } + + inline const PyVar& topValue() const { + return s_data.top(); + } + + inline void pushValue(PyVar v){ + s_data.push(v); + } + + inline int valueCount() const { + return s_data.size(); + } + + inline void jumpTo(int i){ + this->ip = i; + } + + inline PyVarList popNReversed(int n){ + PyVarList v(n); + for(int i=n-1; i>=0; i--) v[i] = popValue(); + return v; + } +}; \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 00000000..7349d4b7 --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,809 @@ +#pragma once + +#include +#include +#include + +#include "parser.h" +#include "error.h" +#include "vm.h" + +class Compiler; + +typedef void (Compiler::*GrammarFn)(); +typedef void (Compiler::*CompilerAction)(); + +struct GrammarRule{ + GrammarFn prefix; + GrammarFn infix; + Precedence precedence; +}; + +struct Loop { + bool forLoop; + int start; + std::vector breaks; + Loop(bool forLoop, int start) : forLoop(forLoop), start(start) {} +}; + +#define ExprCommaSplitArgs(end) \ + int ARGC = 0; \ + do { \ + matchNewLines(); \ + if (peek() == TK(end)) break; \ + compileExpression(); \ + ARGC++; \ + matchNewLines(); \ + } while (match(TK(","))); \ + matchNewLines(); \ + consume(TK(end)); + + +class Compiler { +public: + std::unique_ptr parser; + bool repl_mode; + bool l_value; + + std::stack<_Code> codes; + std::stack loops; + + bool isCompilingClass = false; + + _Str path = ""; + VM* vm; + + std::unordered_map<_TokenType, GrammarRule> rules; + + _Code getCode() { + return codes.top(); + } + + Loop& getLoop() { + return loops.top(); + } + + Compiler(VM* vm, const char* source, _Code code, bool repl_mode){ + this->vm = vm; + this->codes.push(code); + this->repl_mode = repl_mode; + if (!code->co_filename.empty()) path = code->co_filename; + this->parser = std::make_unique(source); + +// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ +#define METHOD(name) &Compiler::name +#define NO_INFIX PREC_NONE + for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, nullptr, PREC_NONE }; + rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB }; + rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL }; + rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT }; + rules[TK("{")] = { METHOD(exprMap), nullptr, NO_INFIX }; + rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM }; + rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM }; + rules[TK("*")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT }; + rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; + rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; + rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("lambda")] = { METHOD(exprLambda), nullptr, NO_INFIX }; + rules[TK("None")] = { METHOD(exprValue), nullptr, NO_INFIX }; + rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("and")] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; + rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; + rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY }; + rules[TK("True")] = { METHOD(exprValue), nullptr, NO_INFIX }; + rules[TK("False")] = { METHOD(exprValue), nullptr, NO_INFIX }; + rules[TK("@id")] = { METHOD(exprName), nullptr, NO_INFIX }; + rules[TK("@num")] = { METHOD(exprLiteral), nullptr, NO_INFIX }; + rules[TK("@str")] = { METHOD(exprLiteral), nullptr, NO_INFIX }; +#undef METHOD +#undef NO_INFIX + } + + void eatString(bool single_quote) { + std::vector buff; + char quote = (single_quote) ? '\'' : '"'; + while (true) { + char c = parser->eatChar(); + if (c == quote) break; + if (c == '\0') + throw SyntaxError(path, parser->makeErrToken(), "EOL while scanning string literal"); + if (c == '\\') { + switch (parser->eatCharIncludeNewLine()) { + case '"': buff.push_back('"'); break; + case '\'': buff.push_back('\''); break; + case '\\': buff.push_back('\\'); break; + case 'n': buff.push_back('\n'); break; + case 'r': buff.push_back('\r'); break; + case 't': buff.push_back('\t'); break; + case '\n': break; // Just ignore the next line. + case '\r': if (parser->matchChar('\n')) break; + default: throw SyntaxError(path, parser->makeErrToken(), "invalid syntax"); + } + } else { + buff.push_back(c); + } + } + + parser->setNextToken(TK("@str"), vm->PyStr(_Str(buff.data(), buff.size()))); + } + + void eatNumber() { + char c = *(parser->token_start); + bool is_float = false; + while (isdigit(parser->peekChar())) parser->eatChar(); + + if (parser->peekChar() == '.' && isdigit(parser->peekNextChar())) { + parser->matchChar('.'); + is_float = true; + while (isdigit(parser->peekChar())) parser->eatChar(); + } + + errno = 0; + PyVar value = vm->None; + if(is_float){ + value = vm->PyFloat(atof(parser->token_start)); + } else { + value = vm->PyInt(atoi(parser->token_start)); + } + if (errno == ERANGE) { + const char* start = parser->token_start; + int len = (int)(parser->current_char - start); + throw SyntaxError(path, parser->makeErrToken(), "number literal too large: %.*s", len, start); + } + parser->setNextToken(TK("@num"), value); + } + + // Lex the next token and set it as the next token. + void lexToken() { + parser->previous = parser->current; + parser->current = parser->nextToken(); + //printf("<%s> ", TK_STR(peek())); + + while (parser->peekChar() != '\0') { + parser->token_start = parser->current_char; + char c = parser->eatCharIncludeNewLine(); + switch (c) { + case '"': eatString(false); return; + case '\'': eatString(true); return; + case '#': parser->skipLineComment(); break; + case '{': parser->setNextToken(TK("{")); return; + case '}': parser->setNextToken(TK("}")); return; + case ',': parser->setNextToken(TK(",")); return; + case ':': parser->setNextToken(TK(":")); return; + case ';': parser->setNextToken(TK(";")); return; + case '(': parser->setNextToken(TK("(")); return; + case ')': parser->setNextToken(TK(")")); return; + case '[': parser->setNextToken(TK("[")); return; + case ']': parser->setNextToken(TK("]")); return; + case '%': parser->setNextToken(TK("%")); return; + case '.': parser->setNextToken(TK(".")); return; + case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return; + case '>': parser->setNextTwoCharToken('=', TK(">"), TK(">=")); return; + case '<': parser->setNextTwoCharToken('=', TK("<"), TK("<=")); return; + case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return; + case '-': parser->setNextTwoCharToken('=', TK("-"), TK("-=")); return; + case '!': + if(parser->matchChar('=')) parser->setNextToken(TK("!=")); + else SyntaxError(path, parser->makeErrToken(), "expected '=' after '!'"); + break; + case '*': + if (parser->matchChar('*')) { + parser->setNextToken(TK("**")); // '**' + } else { + parser->setNextTwoCharToken('=', TK("*"), TK("*=")); + } + return; + case '/': + if(parser->matchChar('/')) { + parser->setNextTwoCharToken('=', TK("//"), TK("//=")); + } else { + parser->setNextTwoCharToken('=', TK("/"), TK("/=")); + } + return; + case '\r': break; // just ignore '\r' + case ' ': case '\t': parser->eatSpaces(); break; + case '\n': { + parser->setNextToken(TK("@eol")); + while(parser->matchChar('\n')); + if(!parser->eatIndentation()) + throw SyntaxError(path, parser->makeErrToken(), "unindent does not match any outer indentation level"); + return; + } + default: { + if (isdigit(c)) { + eatNumber(); + } else if (isalpha(c) || c=='_') { + parser->eatName(); + } else { + throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c); + } + return; + } + } + } + + parser->token_start = parser->current_char; + parser->setNextToken(TK("@eof")); + } + + _TokenType peek() { + return parser->current.type; + } + + bool match(_TokenType expected) { + if (peek() != expected) return false; + lexToken(); + return true; + } + + void consume(_TokenType expected) { + lexToken(); + Token prev = parser->previous; + if (prev.type != expected){ + throw SyntaxError(path, prev, "expected '%s', but got '%s'", TK_STR(expected), TK_STR(prev.type)); + } + } + + bool matchNewLines(bool repl_throw=false) { + bool consumed = false; + if (peek() == TK("@eol")) { + while (peek() == TK("@eol")) lexToken(); + consumed = true; + } + if (repl_throw && peek() == TK("@eof")){ + throw NeedMoreLines(); + } + return consumed; + } + + bool matchEndStatement() { + if (match(TK(";"))) { + matchNewLines(); + return true; + } + if (matchNewLines() || peek() == TK("@eof")) + return true; + if (peek() == TK("@dedent")) return true; + return false; + } + + void consumeEndStatement() { + if (!matchEndStatement()) + throw SyntaxError(path, parser->current, "expected statement end"); + } + + bool matchAssignment() { + if (match(TK("="))) return true; + if (match(TK("+="))) return true; + if (match(TK("-="))) return true; + if (match(TK("*="))) return true; + if (match(TK("/="))) return true; + if (match(TK("//="))) return true; + return false; + } + +#define OP_STORE_AUTO (codes.size()==1) ? OP_STORE_NAME : OP_STORE_FAST + + void exprLiteral() { + PyVar value = parser->previous.value; + int index = getCode()->addConst(value); + emitCode(OP_LOAD_CONST, index); + } + + void exprLambda() { + + } + + void exprName() { + Token tkname = parser->previous; + _Str name(tkname.start, tkname.length); + int index = getCode()->addName(name); + + if (l_value && matchAssignment()) { + _TokenType assignment = parser->previous.type; + matchNewLines(); + if (assignment == TK("=")) { // name = (expr); + compileExpression(); + } else { // name += / -= / *= ... = (expr); + emitCode(OP_LOAD_NAME, index); + compileExpression(); + emitAssignOp(assignment); + } + emitCode(OP_STORE_AUTO, index); + } else { // Just the name and no assignment followed by. + emitCode(OP_LOAD_NAME, index); + } + } + + void emitAssignOp(_TokenType assignment){ + switch (assignment) { + case TK("+="): emitCode(OP_BINARY_OP, 0); break; + case TK("-="): emitCode(OP_BINARY_OP, 1); break; + case TK("*="): emitCode(OP_BINARY_OP, 2); break; + case TK("/="): emitCode(OP_BINARY_OP, 3); break; + case TK("//="): emitCode(OP_BINARY_OP, 4); break; + default: UNREACHABLE(); + } + } + + void exprOr() { + int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP); + matchNewLines(); + parsePrecedence(PREC_LOGICAL_OR); + patchJump(patch); + } + + void exprAnd() { + int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP); + matchNewLines(); + parsePrecedence(PREC_LOGICAL_AND); + patchJump(patch); + } + + void exprBinaryOp() { + _TokenType op = parser->previous.type; + matchNewLines(); + parsePrecedence((Precedence)(rules[op].precedence + 1)); + + switch (op) { + case TK("+"): emitCode(OP_BINARY_OP, 0); break; + case TK("-"): emitCode(OP_BINARY_OP, 1); break; + case TK("*"): emitCode(OP_BINARY_OP, 2); break; + case TK("/"): emitCode(OP_BINARY_OP, 3); break; + case TK("//"): emitCode(OP_BINARY_OP, 4); break; + case TK("%"): emitCode(OP_BINARY_OP, 5); break; + case TK("**"): emitCode(OP_BINARY_OP, 6); break; + + case TK("<"): emitCode(OP_COMPARE_OP, 0); break; + case TK("<="): emitCode(OP_COMPARE_OP, 1); break; + case TK("=="): emitCode(OP_COMPARE_OP, 2); break; + case TK("!="): emitCode(OP_COMPARE_OP, 3); break; + case TK(">"): emitCode(OP_COMPARE_OP, 4); break; + case TK(">="): emitCode(OP_COMPARE_OP, 5); break; + case TK("in"): emitCode(OP_CONTAINS_OP, 0); break; + case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break; + case TK("is"): emitCode(OP_IS_OP, 0); break; + case TK("is not"): emitCode(OP_IS_OP, 1); break; + default: UNREACHABLE(); + } + } + + void exprUnaryOp() { + _TokenType op = parser->previous.type; + matchNewLines(); + parsePrecedence((Precedence)(PREC_UNARY + 1)); + + switch (op) { + case TK("-"): emitCode(OP_UNARY_NEGATIVE); break; + case TK("not"): emitCode(OP_UNARY_NOT); break; + default: UNREACHABLE(); + } + } + + void exprGrouping() { + matchNewLines(); + compileExpression(); + matchNewLines(); + consume(TK(")")); + } + + void exprList() { + ExprCommaSplitArgs("]"); + emitCode(OP_BUILD_LIST, ARGC); + } + + void exprMap() { + int size = 0; + do { + matchNewLines(); + if (peek() == TK("}")) break; + compileExpression();consume(TK(":"));compileExpression(); + emitCode(OP_BUILD_TUPLE, 2); + size++; + matchNewLines(); + } while (match(TK(","))); + matchNewLines(); + consume(TK("}")); + emitCode(OP_BUILD_MAP, size); + } + + void exprCall() { + ExprCommaSplitArgs(")"); + emitCode(OP_CALL, ARGC); + } + + void exprAttrib() { + consume(TK("@id")); + const _Str& name = parser->previous.str(); + int index = getCode()->addName(name); + + if (match(TK("("))) { + emitCode(OP_LOAD_ATTR, index); + exprCall(); + return; + } + + if (l_value && matchAssignment()) { + _TokenType assignment = parser->previous.type; + matchNewLines(); + if (assignment != TK("=")) { // name += / -= / *= ... = (expr); + emitCode(OP_DUP_TOP); + emitCode(OP_LOAD_ATTR, index); + compileExpression(); + emitAssignOp(assignment); + } else { + compileExpression(); + } + emitCode(OP_STORE_ATTR, index); + } else { + emitCode(OP_LOAD_ATTR, index); + } + } + + // [:], [:b] + // [a], [a:], [a:b] + void exprSubscript() { + bool slice = false; + if(match(TK(":"))){ + emitCode(OP_LOAD_NONE); + if(match(TK("]"))){ + emitCode(OP_LOAD_NONE); + }else{ + compileExpression(); + consume(TK("]")); + } + emitCode(OP_BUILD_SLICE); + slice = true; + }else{ + compileExpression(); + if(match(TK(":"))){ + if(match(TK("]"))){ + emitCode(OP_LOAD_NONE); + }else{ + compileExpression(); + consume(TK("]")); + } + emitCode(OP_BUILD_SLICE); + slice = true; + }else{ + consume(TK("]")); + } + } + + if (l_value && matchAssignment()) { + if(slice) throw SyntaxError(path, parser->previous, "can't assign to slice"); + _TokenType assignment = parser->previous.type; + matchNewLines(); + + if (assignment != TK("=")) { + UNREACHABLE(); + } else { + compileExpression(); + } + emitCode(OP_STORE_SUBSCR); + } else { + emitCode(OP_BINARY_SUBSCR); + } + } + + void exprValue() { + _TokenType op = parser->previous.type; + switch (op) { + case TK("None"): emitCode(OP_LOAD_NONE); break; + case TK("True"): emitCode(OP_LOAD_TRUE); break; + case TK("False"): emitCode(OP_LOAD_FALSE); break; + default: UNREACHABLE(); + } + } + + void parsePrecedence(Precedence precedence) { + lexToken(); + GrammarFn prefix = rules[parser->previous.type].prefix; + + if (prefix == nullptr) { + throw SyntaxError(path, parser->previous, "expected an expression"); + } + + // Make a "backup" of the l value before parsing next operators to + // reset once it done. + bool l_value = this->l_value; + + this->l_value = precedence <= PREC_LOWEST; + (this->*prefix)(); + + while (rules[peek()].precedence >= precedence) { + lexToken(); + _TokenType op = parser->previous.type; + GrammarFn infix = rules[op].infix; + (this->*infix)(); + } + + this->l_value = l_value; + } + + void keepOpcodeLine(){ + int i = getCode()->co_code.size() - 1; + getCode()->co_code[i].line = getCode()->co_code[i-1].line; + } + + int emitCode(Opcode opcode, int arg=-1) { + int line = parser->previous.line; + getCode()->co_code.push_back( + ByteCode{(uint8_t)opcode, arg, (uint16_t)line} + ); + return getCode()->co_code.size() - 1; + } + + void patchJump(int addr_index) { + int target = getCode()->co_code.size(); + getCode()->co_code[addr_index].arg = target; + } + + void compileBlockBody(){ + __compileBlockBody(&Compiler::compileStatement); + } + + void __compileBlockBody(CompilerAction action) { + consume(TK(":")); + if(!matchNewLines(repl_mode)){ + throw SyntaxError(path, parser->previous, "expected a new line after ':'"); + } + consume(TK("@indent")); + while (peek() != TK("@dedent")) { + (this->*action)(); + matchNewLines(); + } + consume(TK("@dedent")); + } + + Token compileImportPath() { + consume(TK("@id")); + Token tkmodule = parser->previous; + int index = getCode()->addName(tkmodule.str()); + emitCode(OP_IMPORT_NAME, index); + return tkmodule; + } + + // import module1 [as alias1 [, module2 [as alias2 ...]] + void compileRegularImport() { + do { + Token tkmodule = compileImportPath(); + if (match(TK("as"))) { + consume(TK("@id")); + tkmodule = parser->previous; + } + int index = getCode()->addName(tkmodule.str()); + emitCode(OP_STORE_NAME, index); + } while (match(TK(",")) && (matchNewLines(), true)); + consumeEndStatement(); + } + + // Compiles an expression. An expression will result a value on top of the stack. + void compileExpression() { + parsePrecedence(PREC_LOWEST); + } + + void compileIfStatement() { + matchNewLines(); + compileExpression(); //< Condition. + + int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE); + compileBlockBody(); + + if (match(TK("elif"))) { + int exit_jump = emitCode(OP_JUMP_ABSOLUTE); + patchJump(ifpatch); + compileIfStatement(); + patchJump(exit_jump); + } else if (match(TK("else"))) { + int exit_jump = emitCode(OP_JUMP_ABSOLUTE); + patchJump(ifpatch); + compileBlockBody(); + patchJump(exit_jump); + } else { + patchJump(ifpatch); + } + } + + Loop& enterLoop(bool forLoop){ + Loop lp(forLoop, (int)getCode()->co_code.size()); + loops.push(lp); + return loops.top(); + } + + void exitLoop(){ + Loop& lp = loops.top(); + for(int addr : lp.breaks) patchJump(addr); + loops.pop(); + } + + void compileWhileStatement() { + Loop& loop = enterLoop(false); + compileExpression(); + int patch = emitCode(OP_POP_JUMP_IF_FALSE); + compileBlockBody(); + emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine(); + patchJump(patch); + exitLoop(); + } + + void compileForStatement() { + consume(TK("@id")); + const _Str& iterName = parser->previous.str(); + int iterIndex = getCode()->addName(iterName); + consume(TK("in")); + compileExpression(); + emitCode(OP_GET_ITER); + Loop& loop = enterLoop(true); + int patch = emitCode(OP_FOR_ITER); + emitCode(OP_STORE_AUTO, iterIndex); + compileBlockBody(); + emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine(); + patchJump(patch); + exitLoop(); + } + + void compileStatement() { + if (match(TK("break"))) { + if (loops.empty()) throw SyntaxError(path, parser->previous, "'break' outside loop"); + consumeEndStatement(); + if(getLoop().forLoop) emitCode(OP_POP_TOP); // pop the iterator of for loop. + int patch = emitCode(OP_JUMP_ABSOLUTE); + getLoop().breaks.push_back(patch); + } else if (match(TK("continue"))) { + if (loops.empty()) { + throw SyntaxError(path, parser->previous, "'continue' not properly in loop"); + } + consumeEndStatement(); + emitCode(OP_JUMP_ABSOLUTE, getLoop().start); + } else if (match(TK("return"))) { + if (codes.size() == 1) + throw SyntaxError(path, parser->previous, "'return' outside function"); + if(matchEndStatement()){ + emitCode(OP_LOAD_NONE); + }else{ + compileExpression(); + consumeEndStatement(); + } + emitCode(OP_RETURN_VALUE); + } else if (match(TK("if"))) { + compileIfStatement(); + } else if (match(TK("while"))) { + compileWhileStatement(); + } else if (match(TK("for"))) { + compileForStatement(); + } else if(match(TK("assert"))){ + compileExpression(); + emitCode(OP_ASSERT); + consumeEndStatement(); + } else if(match(TK("raise"))){ + consume(TK("@id")); // dummy exception type + emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str()))); + consume(TK("("));compileExpression();consume(TK(")")); + emitCode(OP_RAISE_ERROR); + consumeEndStatement(); + } else if(match(TK("del"))){ + // TODO: The del implementation is problematic in some cases. + compileExpression(); + ByteCode& lastCode = getCode()->co_code.back(); + if(lastCode.op == OP_BINARY_SUBSCR){ + lastCode.op = OP_DELETE_SUBSCR; + lastCode.arg = -1; + }else{ + throw SyntaxError(path, parser->previous, "you should use 'del a[b]' syntax"); + } + consumeEndStatement(); + } else if(match(TK("pass"))){ + consumeEndStatement(); + } else { + compileExpression(); + consumeEndStatement(); + + // If last op is not an assignment, pop the result. + uint8_t lastOp = getCode()->co_code.back().op; + if( lastOp != OP_STORE_NAME && lastOp != OP_STORE_FAST && lastOp != OP_STORE_SUBSCR && lastOp != OP_STORE_ATTR){ + if(repl_mode && parser->indents.top() == 0){ + emitCode(OP_PRINT_EXPR); + } + emitCode(OP_POP_TOP); + } + } + } + + void compileClass(){ + consume(TK("@id")); + int clsNameIdx = getCode()->addName(parser->previous.str()); + int superClsNameIdx = -1; + if(match(TK("("))){ + consume(TK("@id")); + superClsNameIdx = getCode()->addName(parser->previous.str()); + consume(TK(")")); + } + emitCode(OP_LOAD_NONE); + isCompilingClass = true; + __compileBlockBody(&Compiler::compileFunction); + isCompilingClass = false; + + if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE); + else emitCode(OP_LOAD_NAME, superClsNameIdx); + emitCode(OP_BUILD_CLASS, clsNameIdx); + } + + void compileFunction(){ + if(isCompilingClass){ + if(match(TK("pass"))) return; + consume(TK("def")); + } + consume(TK("@id")); + const _Str& name = parser->previous.str(); + + std::vector<_Str> argNames; + if (match(TK("(")) && !match(TK(")"))) { + do { + matchNewLines(); + consume(TK("@id")); + const _Str& argName = parser->previous.str(); + if (std::find(argNames.begin(), argNames.end(), argName) != argNames.end()) { + throw SyntaxError(path, parser->previous, "duplicate argument in function definition"); + } + argNames.push_back(argName); + } while (match(TK(","))); + consume(TK(")")); + } + + _Code fnCode = std::make_shared(); + fnCode->co_name = name; + fnCode->co_filename = path; + this->codes.push(fnCode); + compileBlockBody(); + this->codes.pop(); + PyVar fn = vm->PyFunction(_Func{name, fnCode, argNames}); + emitCode(OP_LOAD_CONST, getCode()->addConst(fn)); + if(!isCompilingClass) emitCode(OP_STORE_FUNCTION); + } + + void compileTopLevelStatement() { + if (match(TK("class"))) { + compileClass(); + } else if (match(TK("def"))) { + compileFunction(); + } else if (match(TK("import"))) { + compileRegularImport(); + } else { + compileStatement(); + } + } + +}; + + +_Code compile(VM* vm, const char* source, _Str filename, bool repl_mode=false) { + // Skip utf8 BOM if there is any. + if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; + + _Code code = std::make_shared(); + code->co_filename = filename; + Compiler compiler(vm, source, code, repl_mode); + + // Lex initial tokens. current <-- next. + compiler.lexToken(); + compiler.lexToken(); + compiler.matchNewLines(); + + while (!compiler.match(TK("@eof"))) { + compiler.compileTopLevelStatement(); + compiler.matchNewLines(); + } + + return code; +} \ No newline at end of file diff --git a/src/error.h b/src/error.h new file mode 100644 index 00000000..3231238f --- /dev/null +++ b/src/error.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include + +#include "parser.h" + +class NeedMoreLines : public std::exception {}; + +class SyntaxError : public std::exception { +private: + _Str _what; + +public: + char message[100]; + _Str path; + int lineno; + + SyntaxError(const _Str& path, Token tk, const char* msg, ...) { + va_list args; + va_start(args, msg); + vsnprintf(message, 100, msg, args); + va_end(args); + + this->path = path; + lineno = tk.line; + + _StrStream ss; + ss << " File '" << path << "', line " << std::to_string(lineno) << std::endl; + ss << _Str("SyntaxError: ") << message; + _what = ss.str(); + } + + const char* what() const noexcept override { + return _what.str().c_str(); + } +}; \ No newline at end of file diff --git a/src/iter.h b/src/iter.h new file mode 100644 index 00000000..d7b32353 --- /dev/null +++ b/src/iter.h @@ -0,0 +1,49 @@ +#pragma once + +#include "obj.h" + +typedef std::function _PyIntFn; + +class RangeIterator : public _Iterator { +private: + int current; + _Range r; + _PyIntFn fn; +public: + RangeIterator(PyVar _ref, _PyIntFn fn) : _Iterator(_ref), fn(fn) { + this->r = std::get<_Range>(_ref->_native); + this->current = r.start; + } + + PyVar next() override { + PyVar val = fn(current); + current += r.step; + return val; + } + + bool hasNext() override { + if(r.step > 0){ + return current < r.stop; + }else{ + return current > r.stop; + } + } +}; + +class VectorIterator : public _Iterator { +private: + int index = 0; + const PyVarList* vec; +public: + VectorIterator(PyVar _ref) : _Iterator(_ref) { + vec = &std::get(_ref->_native); + } + + bool hasNext(){ + return index < vec->size(); + } + + PyVar next(){ + return vec->at(index++); + } +}; \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 00000000..e78bc3a3 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,122 @@ +#include +#include + +#include +#include "pocketpy.h" + +//#define PK_DEBUG +//#define PK_DEBUG_TIME + +class Timer{ +private: + std::chrono::time_point start; + std::string title; +public: + Timer(const std::string& title){ +#ifdef PK_DEBUG_TIME + start = std::chrono::high_resolution_clock::now(); + this->title = title; +#endif + } + + void stop(){ +#ifdef PK_DEBUG_TIME + auto end = std::chrono::high_resolution_clock::now(); + double elapsed = std::chrono::duration_cast(end - start).count() / 1000000.0; + std::cout << title << ": " << elapsed << " s" << std::endl; +#endif + } +}; + +VM* newVM(){ + VM* vm = createVM([](const char* str) { + std::cout << str; + std::cout.flush(); + }); + registerModule(vm, "math", "pi = 3.141593"); + return vm; +} + +void REPL(){ + std::cout << "pocketpy 0.1.0" << std::endl; + + bool need_more_lines = false; + std::string buffer; + VM* vm = newVM(); + + while(true){ + vm->printFn(need_more_lines ? "... " : ">>> "); + std::string line; + std::getline(std::cin, line); + + if(need_more_lines){ + buffer += line; + buffer += '\n'; + int n = buffer.size(); + if(n>=2 && buffer[n-1]=='\n' && buffer[n-2]=='\n'){ + need_more_lines = false; + line = buffer; + buffer.clear(); + }else{ + continue; + } + }else{ + if(line == "exit()") break; + if(line.empty()) continue; + } + try{ + _Code code = compile(vm, line.c_str(), "", true); + vm->exec(code); +#ifdef PK_DEBUG + }catch(NeedMoreLines& e){ +#else + }catch(std::exception& e){ +#endif + if(need_more_lines = dynamic_cast(&e)){ + buffer += line; + buffer += '\n'; + }else{ + vm->printFn(e.what()); + vm->printFn("\n"); + vm->cleanError(); + } + } + } +} + +int main(int argc, char** argv){ + if(argc == 1){ + REPL(); + return 0; + + // argc = 2; + // argv = new char*[2]{argv[0], (char*)"../tests/singletype/basic.py"}; + } + + if(argc == 2){ + std::string filename = argv[1]; + if(filename == "-h" || filename == "--help"){ + std::cout << "Usage: pocketpy [filename]" << std::endl; + return 0; + } +#ifndef PK_DEBUG + try{ +#endif + std::ifstream file(filename); + std::string src((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + VM* vm = newVM(); + Timer timer("编译时间"); + _Code code = compile(vm, src.c_str(), filename, false); + timer.stop(); + //std::cout << code->toString() << std::endl; + Timer timer2("运行时间"); + vm->exec(code); + timer2.stop(); +#ifndef PK_DEBUG + }catch(std::exception& e){ + std::cout << e.what() << std::endl; + } +#endif + return 0; + } +} \ No newline at end of file diff --git a/src/obj.h b/src/obj.h new file mode 100644 index 00000000..cebaf7fd --- /dev/null +++ b/src/obj.h @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "str.h" + +class PyObject; +class CodeObject; +class VM; + +typedef std::shared_ptr PyVar; +typedef PyVar PyVarOrNull; +typedef std::vector PyVarList; +typedef std::unordered_map<_Str, PyVar> StlDict; + +typedef PyVar (*_CppFunc)(VM*, PyVarList); +typedef std::shared_ptr _Code; + +struct _Func { + _Str name; + _Code code; + std::vector<_Str> argNames; +}; + +struct BoundedMethod { + PyVar obj; + PyVar method; +}; + +struct _Range { + int start = 0; + int stop = -1; + int step = 1; +}; + +struct _Slice { + int start = 0; + int stop = 2147483647; + + void normalize(int len){ + if(start < 0) start += len; + if(stop < 0) stop += len; + if(start < 0) start = 0; + if(stop > len) stop = len; + } +}; + +class _Iterator { +private: + PyVar _ref; // keep a reference to the object so it will not be deleted while iterating +public: + virtual PyVar next() = 0; + virtual bool hasNext() = 0; + _Iterator(PyVar _ref) : _ref(_ref) {} +}; + +typedef std::variant,BoundedMethod,_Range,_Slice> _Value; + +#define UNREACHABLE() throw std::runtime_error("Unreachable code") + +struct PyObject { + StlDict attribs; + _Value _native; + + inline bool isType(const PyVar& type){ + return attribs[__class__] == type; + } + + // currently __name__ is only used for 'type' + _Str getName(){ + _Value val = attribs["__name__"]->_native; + return std::get<_Str>(val); + } + + _Str getTypeName(){ + return attribs[__class__]->getName(); + } + + PyObject(_Value val): _native(val) {} +}; \ No newline at end of file diff --git a/src/opcodes.h b/src/opcodes.h new file mode 100644 index 00000000..d5cf48e8 --- /dev/null +++ b/src/opcodes.h @@ -0,0 +1,56 @@ +#ifdef OPCODE + +OPCODE(LOAD_CONST) +OPCODE(LOAD_NAME) + +OPCODE(IMPORT_NAME) +OPCODE(STORE_FAST) +OPCODE(STORE_NAME) + +OPCODE(PRINT_EXPR) +OPCODE(POP_TOP) +OPCODE(CALL) +OPCODE(RETURN_VALUE) + +OPCODE(BINARY_OP) +OPCODE(COMPARE_OP) +OPCODE(IS_OP) +OPCODE(CONTAINS_OP) + +OPCODE(UNARY_NEGATIVE) +OPCODE(UNARY_NOT) + +OPCODE(DUP_TOP) + +OPCODE(BUILD_LIST) +OPCODE(BUILD_TUPLE) +OPCODE(BUILD_MAP) +OPCODE(BUILD_SLICE) + +OPCODE(BINARY_SUBSCR) +OPCODE(STORE_SUBSCR) +OPCODE(DELETE_SUBSCR) + +OPCODE(LOAD_ATTR) +OPCODE(STORE_ATTR) + +OPCODE(GET_ITER) +OPCODE(FOR_ITER) + +OPCODE(POP_JUMP_IF_FALSE) +OPCODE(JUMP_ABSOLUTE) +OPCODE(JUMP_IF_TRUE_OR_POP) +OPCODE(JUMP_IF_FALSE_OR_POP) + +// non-standard python opcodes +OPCODE(LOAD_NONE) +OPCODE(LOAD_TRUE) +OPCODE(LOAD_FALSE) + +OPCODE(ASSERT) +OPCODE(RAISE_ERROR) + +OPCODE(STORE_FUNCTION) +OPCODE(BUILD_CLASS) + +#endif \ No newline at end of file diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 00000000..ffa2effe --- /dev/null +++ b/src/parser.h @@ -0,0 +1,245 @@ +#pragma once + +#include +#include +#include + +#include "obj.h" + +typedef uint8_t _TokenType; + +constexpr const char* __TOKENS[] = { + "@error", "@eof", "@eol", "@sof", + ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", + "+", "-", "*", "/", "//", "**", "=", ">", "<", + "==", "!=", ">=", "<=", + "+=", "-=", "*=", "/=", "//=", + /** KW_BEGIN **/ + "class", "import", "as", "def", "lambda", "pass", "del", + "None", "in", "is", "and", "or", "not", "True", "False", + "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", + /** KW_END **/ + "is not", "not in", + "@id", "@num", "@str", + "@indent", "@dedent" +}; + +const _TokenType __TOKENS_LEN = sizeof(__TOKENS) / sizeof(__TOKENS[0]); + +constexpr _TokenType __tokenIndex(const char* token) { + for(int k=0; k<__TOKENS_LEN; k++){ + const char* i = __TOKENS[k]; + const char* j = token; + while(*i && *j && *i == *j){ + i++; j++; + } + if(*i == *j) return k; + } + return 0; +} + + +#define TK(s) __tokenIndex(s) +#define TK_STR(t) __TOKENS[t] + +const _TokenType __KW_BEGIN = __tokenIndex("class"); +const _TokenType __KW_END = __tokenIndex("raise"); + +const std::unordered_map __KW_MAP = [](){ + std::unordered_map map; + for(int k=__KW_BEGIN; k<=__KW_END; k++) map[__TOKENS[k]] = k; + return map; +}(); + + +struct Token{ + _TokenType type; + + const char* start; //< Begining of the token in the source. + int length; //< Number of chars of the token. + int line; //< Line number of the token (1 based). + PyVar value; //< Literal value of the token. + + const _Str str() const { + return _Str(start, length); + } +}; + +enum Precedence { + PREC_NONE, + PREC_LOWEST, + PREC_LOGICAL_OR, // or + PREC_LOGICAL_AND, // and + PREC_EQUALITY, // == != + PREC_TEST, // in is + PREC_COMPARISION, // < > <= >= + PREC_TERM, // + - + PREC_FACTOR, // * / % + PREC_UNARY, // - not + PREC_EXPONENT, // ** + PREC_CALL, // () + PREC_SUBSCRIPT, // [] + PREC_ATTRIB, // .index + PREC_PRIMARY, +}; + +// The context of the parsing phase for the compiler. +struct Parser { + const char* source; //< Currently compiled source. + const char* token_start; //< Start of the currently parsed token. + const char* current_char; //< Current char position in the source. + const char* line_start; //< Start of the current line. + + int current_line = 1; + + Token previous, current; + std::queue nexts; + + std::stack indents; + + Token nextToken(){ + if(nexts.empty()) return makeErrToken(); + Token t = nexts.front(); + if(t.type == TK("@eof") && indents.size()>1){ + nexts.pop(); + indents.pop(); + return Token{TK("@dedent"), token_start, 0, current_line}; + } + nexts.pop(); + return t; + } + + char peekChar() { + return *current_char; + } + + char peekNextChar() { + if (peekChar() == '\0') return '\0'; + return *(current_char + 1); + } + + int eatSpaces(){ + int count = 0; + while (true) { + switch (peekChar()) { + case ' ': count++; break; + case '\t': count+=4; break; + default: return count; + } + eatChar(); + } + } + + bool eatIndentation(){ + int spaces = eatSpaces(); + // https://docs.python.org/3/reference/lexical_analysis.html#indentation + if(spaces > indents.top()){ + indents.push(spaces); + nexts.push(Token{TK("@indent"), token_start, 0, current_line}); + } else if(spaces < indents.top()){ + while(spaces < indents.top()){ + indents.pop(); + nexts.push(Token{TK("@dedent"), token_start, 0, current_line}); + } + if(spaces != indents.top()){ + return false; + } + } + return true; + } + + char eatChar() { + char c = peekChar(); + if(c == '\n') throw std::runtime_error("eatChar() cannot consume a newline"); + current_char++; + return c; + } + + char eatCharIncludeNewLine() { + char c = peekChar(); + current_char++; + if (c == '\n'){ + current_line++; + line_start = current_char; + } + return c; + } + + void eatName() { + char c = peekChar(); + while (isalpha(c) || c=='_' || isdigit(c)) { + eatChar(); + c = peekChar(); + } + + const char* name_start = token_start; + int length = (int)(current_char - name_start); + std::string_view name(name_start, length); + if(__KW_MAP.count(name)){ + if(name == "not"){ + if(strncmp(current_char, " in", 3) == 0){ + current_char += 3; + setNextToken(TK("not in")); + return; + } + }else if(name == "is"){ + if(strncmp(current_char, " not", 4) == 0){ + current_char += 4; + setNextToken(TK("is not")); + return; + } + } + setNextToken(__KW_MAP.at(name)); + } else { + setNextToken(TK("@id")); + } + } + + void skipLineComment() { + char c; + while ((c = peekChar()) != '\0') { + if (c == '\n') return; + eatChar(); + } + } + + // If the current char is [c] consume it and advance char by 1 and returns + // true otherwise returns false. + bool matchChar(char c) { + if (peekChar() != c) return false; + eatCharIncludeNewLine(); + return true; + } + + // Returns an error token from the current position for reporting error. + Token makeErrToken() { + return Token{TK("@error"), token_start, (int)(current_char - token_start), current_line}; + } + + // Initialize the next token as the type. + void setNextToken(_TokenType type, PyVar value=nullptr) { + nexts.push( Token{ + type, + token_start, + (int)(current_char - token_start), + current_line - ((type == TK("@eol")) ? 1 : 0), + value + }); + } + + void setNextTwoCharToken(char c, _TokenType one, _TokenType two) { + if (matchChar(c)) setNextToken(two); + else setNextToken(one); + } + + Parser(const char* source) { + this->source = source; + this->token_start = source; + this->current_char = source; + this->line_start = source; + + this->nexts.push(Token{TK("@sof"), token_start, 0, current_line}); + + this->indents.push(0); + } +}; \ No newline at end of file diff --git a/src/pocketpy.h b/src/pocketpy.h new file mode 100644 index 00000000..c7b41c96 --- /dev/null +++ b/src/pocketpy.h @@ -0,0 +1,454 @@ +#pragma once + +#include "vm.h" +#include "compiler.h" + +inline int _round(float f){ + if(f > 0) return (int)(f + 0.5); + return (int)(f - 0.5); +} + +#define BIND_NUM_ARITH_OPT(name, op) \ + _vm->bindMethodMulti({"int","float"}, #name, [](VM* vm, PyVarList args){ \ + if(!vm->isIntOrFloat(args[0], args[1])) \ + vm->_error("TypeError", "unsupported operand type(s) for " #op ); \ + if(args[0]->isType(vm->_tp_int) && args[1]->isType(vm->_tp_int)){ \ + return vm->PyInt(vm->PyInt_AS_C(args[0]) op vm->PyInt_AS_C(args[1])); \ + }else{ \ + return vm->PyFloat(vm->numToFloat(args[0]) op vm->numToFloat(args[1])); \ + } \ + }); + +#define BIND_NUM_LOGICAL_OPT(name, op, fallback) \ + _vm->bindMethodMulti({"int","float"}, #name, [](VM* vm, PyVarList args){ \ + if(!vm->isIntOrFloat(args[0], args[1])){ \ + if constexpr(fallback) return vm->PyBool(args[0] op args[1]); \ + vm->_error("TypeError", "unsupported operand type(s) for " #op ); \ + } \ + return vm->PyBool(vm->numToFloat(args[0]) op vm->numToFloat(args[1])); \ + }); + + +void __initializeBuiltinFunctions(VM* _vm) { + BIND_NUM_ARITH_OPT(__add__, +) + BIND_NUM_ARITH_OPT(__sub__, -) + BIND_NUM_ARITH_OPT(__mul__, *) + + BIND_NUM_LOGICAL_OPT(__lt__, <, false) + BIND_NUM_LOGICAL_OPT(__le__, <=, false) + BIND_NUM_LOGICAL_OPT(__gt__, >, false) + BIND_NUM_LOGICAL_OPT(__ge__, >=, false) + BIND_NUM_LOGICAL_OPT(__eq__, ==, true) + BIND_NUM_LOGICAL_OPT(__ne__, !=, true) + +#undef BIND_NUM_ARITH_OPT +#undef BIND_NUM_LOGICAL_OPT + + _vm->bindBuiltinFunc("print", [](VM* vm, PyVarList args) { + for (auto& arg : args) vm->printFn(vm->PyStr_AS_C(vm->asStr(arg)) + " "); + vm->printFn("\n"); + return vm->None; + }); + + _vm->bindBuiltinFunc("hash", [](VM* vm, PyVarList args) { + return vm->PyInt(vm->hash(args.at(0))); + }); + + _vm->bindBuiltinFunc("chr", [](VM* vm, PyVarList args) { + int i = vm->PyInt_AS_C(args.at(0)); + if (i < 0 || i > 128) vm->_error("ValueError", "chr() arg not in range(128)"); + return vm->PyStr(_Str(1, (char)i)); + }); + + _vm->bindBuiltinFunc("round", [](VM* vm, PyVarList args) { + return vm->PyInt(_round(vm->numToFloat(args.at(0)))); + }); + + _vm->bindBuiltinFunc("ord", [](VM* vm, PyVarList args) { + _Str s = vm->PyStr_AS_C(args.at(0)); + if (s.size() != 1) vm->_error("TypeError", "ord() expected an ASCII character"); + return vm->PyInt((int)s[0]); + }); + + _vm->bindBuiltinFunc("dir", [](VM* vm, PyVarList args) { + PyVarList ret; + for (auto& [k, _] : args.at(0)->attribs) ret.push_back(vm->PyStr(k)); + return vm->PyList(ret); + }); + + _vm->bindMethod("object", "__new__", [](VM* vm, PyVarList args) { + PyVar obj = vm->newObject(args.at(0), -1); + args.erase(args.begin()); + PyVarOrNull init_fn = vm->getAttr(obj, __init__, false); + if (init_fn != nullptr) vm->call(init_fn, args); + return obj; + }); + + _vm->bindMethod("object", "__str__", [](VM* vm, PyVarList args) { + PyVar _self = args[0]; + _Str s = "<" + _self->getTypeName() + " object at " + std::to_string((uintptr_t)_self.get()) + ">"; + return vm->PyStr(s); + }); + + _vm->bindMethod("range", "__new__", [](VM* vm, PyVarList args) { + _Range r; + if( args.size() == 0 ) vm->_error("TypeError", "range expected 1 arguments, got 0"); + else if (args.size() == 1+1) { + r.stop = vm->PyInt_AS_C(args[1]); + } + else if (args.size() == 2+1) { + r.start = vm->PyInt_AS_C(args[1]); + r.stop = vm->PyInt_AS_C(args[2]); + } + else if (args.size() == 3+1) { + r.start = vm->PyInt_AS_C(args[1]); + r.stop = vm->PyInt_AS_C(args[2]); + r.step = vm->PyInt_AS_C(args[3]); + } + else { + vm->_error("TypeError", "range expected 1 to 3 arguments, got " + std::to_string(args.size()-1)); + } + return vm->PyRange(r); + }); + + _vm->bindMethod("range", "__iter__", [](VM* vm, PyVarList args) { + vm->__checkType(args.at(0), vm->_tp_range); + auto iter = std::make_shared(args[0], [=](int val){return vm->PyInt(val);}); + return vm->PyIter(iter); + }); + + _vm->bindMethod("NoneType", "__str__", [](VM* vm, PyVarList args) { + return vm->PyStr("None"); + }); + + _vm->bindMethodMulti({"int", "float"}, "__truediv__", [](VM* vm, PyVarList args) { + if(!vm->isIntOrFloat(args[0], args[1])) + vm->_error("TypeError", "unsupported operand type(s) for " "/" ); + return vm->PyFloat(vm->numToFloat(args[0]) / vm->numToFloat(args[1])); + }); + + _vm->bindMethodMulti({"int", "float"}, "__pow__", [](VM* vm, PyVarList args) { + if(!vm->isIntOrFloat(args[0], args[1])) + vm->_error("TypeError", "unsupported operand type(s) for " "**" ); + if(args[0]->isType(vm->_tp_int) && args[1]->isType(vm->_tp_int)){ + return vm->PyInt(_round(pow(vm->PyInt_AS_C(args[0]), vm->PyInt_AS_C(args[1])))); + }else{ + return vm->PyFloat((float)pow(vm->numToFloat(args[0]), vm->numToFloat(args[1]))); + } + }); + + /************ PyInt ************/ + _vm->bindMethod("int", "__floordiv__", [](VM* vm, PyVarList args) { + if(!args[0]->isType(vm->_tp_int) || !args[1]->isType(vm->_tp_int)) + vm->_error("TypeError", "unsupported operand type(s) for " "//" ); + return vm->PyInt(vm->PyInt_AS_C(args[0]) / vm->PyInt_AS_C(args[1])); + }); + + _vm->bindMethod("int", "__mod__", [](VM* vm, PyVarList args) { + if(!args[0]->isType(vm->_tp_int) || !args[1]->isType(vm->_tp_int)) + vm->_error("TypeError", "unsupported operand type(s) for " "%" ); + return vm->PyInt(vm->PyInt_AS_C(args[0]) % vm->PyInt_AS_C(args[1])); + }); + + _vm->bindMethod("int", "__neg__", [](VM* vm, PyVarList args) { + if(!args[0]->isType(vm->_tp_int)) + vm->_error("TypeError", "unsupported operand type(s) for " "-" ); + return vm->PyInt(-1 * vm->PyInt_AS_C(args[0])); + }); + + _vm->bindMethod("int", "__str__", [](VM* vm, PyVarList args) { + return vm->PyStr(std::to_string(vm->PyInt_AS_C(args[0]))); + }); + + /************ PyFloat ************/ + _vm->bindMethod("float", "__neg__", [](VM* vm, PyVarList args) { + return vm->PyFloat(-1.0f * vm->PyFloat_AS_C(args[0])); + }); + + _vm->bindMethod("float", "__str__", [](VM* vm, PyVarList args) { + return vm->PyStr(std::to_string(vm->PyFloat_AS_C(args[0]))); + }); + + /************ PyString ************/ + _vm->bindMethod("str", "__new__", [](VM* vm, PyVarList args) { + vm->_assert(args[0] == vm->_tp_str, "str.__new__ must be called with str as first argument"); + vm->_assert(args.size() == 2, "str expected 1 argument"); + return vm->asStr(args[1]); + }); + + _vm->bindMethod("str", "__add__", [](VM* vm, PyVarList args) { + if(!args[0]->isType(vm->_tp_str) || !args[1]->isType(vm->_tp_str)) + vm->_error("TypeError", "unsupported operand type(s) for " "+" ); + const _Str& lhs = vm->PyStr_AS_C(args[0]); + const _Str& rhs = vm->PyStr_AS_C(args[1]); + return vm->PyStr(lhs + rhs); + }); + + _vm->bindMethod("str", "__len__", [](VM* vm, PyVarList args) { + const _Str& _self = vm->PyStr_AS_C(args[0]); + return vm->PyInt(_self.u8_length()); + }); + + _vm->bindMethod("str", "__contains__", [](VM* vm, PyVarList args) { + const _Str& _self = vm->PyStr_AS_C(args[0]); + const _Str& _other = vm->PyStr_AS_C(args[1]); + return vm->PyBool(_self.str().find(_other.str()) != _Str::npos); + }); + + _vm->bindMethod("str", "__str__", [](VM* vm, PyVarList args) { + return args[0]; // str is immutable + }); + + _vm->bindMethod("str", "__eq__", [](VM* vm, PyVarList args) { + const _Str& _self = vm->PyStr_AS_C(args[0]); + const _Str& _other = vm->PyStr_AS_C(args[1]); + return vm->PyBool(_self == _other); + }); + + _vm->bindMethod("str", "__ne__", [](VM* vm, PyVarList args) { + const _Str& _self = vm->PyStr_AS_C(args[0]); + const _Str& _other = vm->PyStr_AS_C(args[1]); + return vm->PyBool(_self != _other); + }); + + _vm->bindMethod("str", "__getitem__", [](VM* vm, PyVarList args) { + const _Str& _self (vm->PyStr_AS_C(args[0])); + + if(args[1]->isType(vm->_tp_slice)){ + _Slice s = vm->PySlice_AS_C(args[1]); + s.normalize(_self.u8_length()); + return vm->PyStr(_self.u8_substr(s.start, s.stop)); + } + + int _index = vm->PyInt_AS_C(args[1]); + _index = vm->normalizedIndex(_index, _self.u8_length()); + return vm->PyStr(_self.u8_getitem(_index)); + }); + + _vm->bindMethod("str", "__gt__", [](VM* vm, PyVarList args) { + const _Str& _self (vm->PyStr_AS_C(args[0])); + const _Str& _obj (vm->PyStr_AS_C(args[1])); + return vm->PyBool(_self > _obj); + }); + + _vm->bindMethod("str", "__lt__", [](VM* vm, PyVarList args) { + const _Str& _self (vm->PyStr_AS_C(args[0])); + const _Str& _obj (vm->PyStr_AS_C(args[1])); + return vm->PyBool(_self < _obj); + }); + + _vm->bindMethod("str", "upper", [](VM* vm, PyVarList args) { + const _Str& _self (vm->PyStr_AS_C(args[0])); + _StrStream ss; + for(auto c : _self.str()) ss << (char)toupper(c); + return vm->PyStr(ss); + }); + + _vm->bindMethod("str", "lower", [](VM* vm, PyVarList args) { + const _Str& _self (vm->PyStr_AS_C(args[0])); + _StrStream ss; + for(auto c : _self.str()) ss << (char)tolower(c); + return vm->PyStr(ss); + }); + + _vm->bindMethod("str", "replace", [](VM* vm, PyVarList args) { + const _Str& _self = vm->PyStr_AS_C(args[0]); + const _Str& _old = vm->PyStr_AS_C(args[1]); + const _Str& _new = vm->PyStr_AS_C(args[2]); + std::string _copy = _self.str(); + // replace all occurences of _old with _new in _copy + size_t pos = 0; + while ((pos = _copy.find(_old.str(), pos)) != std::string::npos) { + _copy.replace(pos, _old.str().length(), _new.str()); + pos += _new.str().length(); + } + return vm->PyStr(_copy); + }); + + _vm->bindMethod("str", "startswith", [](VM* vm, PyVarList args) { + const _Str& _self = vm->PyStr_AS_C(args[0]); + const _Str& _prefix = vm->PyStr_AS_C(args[1]); + return vm->PyBool(_self.str().find(_prefix.str()) == 0); + }); + + _vm->bindMethod("str", "endswith", [](VM* vm, PyVarList args) { + const _Str& _self = vm->PyStr_AS_C(args[0]); + const _Str& _suffix = vm->PyStr_AS_C(args[1]); + return vm->PyBool(_self.str().rfind(_suffix.str()) == _self.str().length() - _suffix.str().length()); + }); + + /************ PyList ************/ + _vm->bindMethod("list", "__iter__", [](VM* vm, PyVarList args) { + vm->__checkType(args.at(0), vm->_tp_list); + auto iter = std::make_shared(args[0]); + return vm->PyIter(iter); + }); + + _vm->bindMethod("list", "append", [](VM* vm, PyVarList args) { + PyVarList& _self = vm->PyList_AS_C(args[0]); + _self.push_back(args[1]); + return vm->None; + }); + + _vm->bindMethod("list", "insert", [](VM* vm, PyVarList args) { + PyVarList& _self = vm->PyList_AS_C(args[0]); + int _index = vm->PyInt_AS_C(args[1]); + _index = vm->normalizedIndex(_index, _self.size()); + _self.insert(_self.begin() + _index, args[2]); + return vm->None; + }); + + _vm->bindMethod("list", "clear", [](VM* vm, PyVarList args) { + vm->PyList_AS_C(args[0]).clear(); + return vm->None; + }); + + _vm->bindMethod("list", "copy", [](VM* vm, PyVarList args) { + return vm->PyList(vm->PyList_AS_C(args[0])); + }); + + _vm->bindMethod("list", "pop", [](VM* vm, PyVarList args) { + PyVarList& _self = vm->PyList_AS_C(args[0]); + if(_self.empty()) vm->_error("IndexError", "pop from empty list"); + PyVar ret = _self.back(); + _self.pop_back(); + return ret; + }); + + _vm->bindMethod("list", "__add__", [](VM* vm, PyVarList args) { + const PyVarList& _self = vm->PyList_AS_C(args[0]); + const PyVarList& _obj = vm->PyList_AS_C(args[1]); + PyVarList _new_list = _self; + _new_list.insert(_new_list.end(), _obj.begin(), _obj.end()); + return vm->PyList(_new_list); + }); + + _vm->bindMethod("list", "__len__", [](VM* vm, PyVarList args) { + const PyVarList& _self = vm->PyList_AS_C(args[0]); + return vm->PyInt(_self.size()); + }); + + _vm->bindMethod("list", "__getitem__", [](VM* vm, PyVarList args) { + const PyVarList& _self = vm->PyList_AS_C(args[0]); + + if(args[1]->isType(vm->_tp_slice)){ + _Slice s = vm->PySlice_AS_C(args[1]); + s.normalize(_self.size()); + PyVarList _new_list; + for(int i = s.start; i < s.stop; i++) + _new_list.push_back(_self[i]); + return vm->PyList(_new_list); + } + + int _index = vm->PyInt_AS_C(args[1]); + _index = vm->normalizedIndex(_index, _self.size()); + return _self[_index]; + }); + + _vm->bindMethod("list", "__setitem__", [](VM* vm, PyVarList args) { + PyVarList& _self = vm->PyList_AS_C(args[0]); + int _index = vm->PyInt_AS_C(args[1]); + _index = vm->normalizedIndex(_index, _self.size()); + _self[_index] = args[2]; + return vm->None; + }); + + _vm->bindMethod("list", "__delitem__", [](VM* vm, PyVarList args) { + PyVarList& _self = vm->PyList_AS_C(args[0]); + int _index = vm->PyInt_AS_C(args[1]); + _index = vm->normalizedIndex(_index, _self.size()); + _self.erase(_self.begin() + _index); + return vm->None; + }); + + /************ PyTuple ************/ + _vm->bindMethod("tuple", "__iter__", [](VM* vm, PyVarList args) { + vm->__checkType(args.at(0), vm->_tp_tuple); + auto iter = std::make_shared(args[0]); + return vm->PyIter(iter); + }); + + _vm->bindMethod("tuple", "__len__", [](VM* vm, PyVarList args) { + const PyVarList& _self = vm->PyTuple_AS_C(args[0]); + return vm->PyInt(_self.size()); + }); + + _vm->bindMethod("tuple", "__getitem__", [](VM* vm, PyVarList args) { + const PyVarList& _self = vm->PyTuple_AS_C(args[0]); + int _index = vm->PyInt_AS_C(args[1]); + _index = vm->normalizedIndex(_index, _self.size()); + return _self[_index]; + }); + + /************ PyBool ************/ + _vm->bindMethod("bool", "__str__", [](VM* vm, PyVarList args) { + bool val = vm->PyBool_AS_C(args[0]); + return vm->PyStr(val ? "True" : "False"); + }); + + _vm->bindMethod("bool", "__eq__", [](VM* vm, PyVarList args) { + return vm->PyBool(args[0] == args[1]); + }); +} + +void __runCodeBuiltins(VM* vm, const char* src){ + _Code code = compile(vm, src, "builtins.py"); + vm->exec(code, {}, vm->builtins); +} + +#include +void __addModuleRandom(VM* vm){ + srand(time(NULL)); + PyVar random = vm->newModule("random"); + vm->bindFunc(random, "randint", [](VM* vm, PyVarList args) { + int _min = vm->PyInt_AS_C(args[0]); + int _max = vm->PyInt_AS_C(args[1]); + return vm->PyInt(rand() % (_max - _min + 1) + _min); + }); + vm->_modules["random"] = random; +} + +#include "builtins.h" + +#ifdef _WIN32 +#define __EXPORT __declspec(dllexport) +#elif __APPLE__ +#define __EXPORT __attribute__((visibility("default"))) __attribute__((used)) +#else +#define __EXPORT +#endif + +extern "C" { + __EXPORT + VM* createVM(PrintFn printFn){ + VM* vm = new VM(); + __initializeBuiltinFunctions(vm); + __runCodeBuiltins(vm, __BUILTINS_CODE); + __addModuleRandom(vm); + vm->printFn = printFn; + return vm; + } + + __EXPORT + void destroyVM(VM* vm){ + delete vm; + } + + __EXPORT + void exec(VM* vm, const char* source){ + try{ + _Code code = compile(vm, source, "main.py"); + vm->exec(code); + }catch(std::exception& e){ + vm->printFn(e.what()); + vm->printFn("\n"); + vm->cleanError(); + } + } + + __EXPORT + void registerModule(VM* vm, const char* name, const char* source){ + _Code code = compile(vm, source, name + _Str(".py")); + vm->registerCompiledModule(name, code); + } +} \ No newline at end of file diff --git a/src/str.h b/src/str.h new file mode 100644 index 00000000..392adcc2 --- /dev/null +++ b/src/str.h @@ -0,0 +1,153 @@ +#pragma once + +#include +#include +#include + +typedef std::stringstream _StrStream; + + +class _Str { +private: + mutable bool utf8_initialized = false; + mutable std::vector _u8_index; // max_len is 65535 + + std::string _s; + + mutable bool hash_initialized = false; + mutable size_t _hash; + + void utf8_lazy_init() const{ + if(utf8_initialized) return; + for(int i = 0; i < size(); i++){ + // https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80 + if((_s[i] & 0xC0) != 0x80) + _u8_index.push_back(i); + } + utf8_initialized = true; + } +public: + _Str(const char* s): _s(s) {} + _Str(const char* s, size_t len): _s(s, len) {} + _Str(int n, char fill = ' '): _s(n, fill) {} + _Str(const std::string& s): _s(s) {} + _Str(std::string&& s): _s(std::move(s)) {} + _Str(const _StrStream& ss): _s(ss.str()) {} + _Str(){} + + size_t hash() const{ + if(!hash_initialized){ + _hash = std::hash()(_s); + hash_initialized = true; + } + return _hash; + } + + int u8_length() const { + utf8_lazy_init(); + return _u8_index.size(); + } + + _Str u8_getitem(int i) const{ + return u8_substr(i, i+1); + } + + _Str u8_substr(int start, int end) const{ + utf8_lazy_init(); + if(start >= end) return _Str(); + int c_end = end >= _u8_index.size() ? size() : _u8_index[end]; + return _s.substr(_u8_index.at(start), c_end - _u8_index.at(start)); + } + + int size() const { + return _s.size(); + } + + bool empty() const { + return _s.empty(); + } + + bool operator==(const _Str& other) const { + return _s == other._s; + } + + bool operator!=(const _Str& other) const { + return _s != other._s; + } + + bool operator<(const _Str& other) const { + return _s < other._s; + } + + bool operator>(const _Str& other) const { + return _s > other._s; + } + + char operator[](int i) const { + return _s[i]; + } + + friend std::ostream& operator<<(std::ostream& os, const _Str& s){ + os << s._s; + return os; + } + + _Str operator+(const _Str& other) const { + return _Str(_s + other._s); + } + + _Str operator+(const char* other) const { + return _Str(_s + other); + } + + _Str operator+(const std::string& other) const { + return _Str(_s + other); + } + + friend _Str operator+(const char* other, const _Str& s){ + return _Str(other + s._s); + } + + friend _Str operator+(const std::string& other, const _Str& s){ + return _Str(other + s._s); + } + + const std::string& str() const { + return _s; + } + + static const std::size_t npos = std::string::npos; + + operator const char*() const { + return _s.c_str(); + } +}; + + +namespace std { + template<> + struct hash<_Str> { + std::size_t operator()(const _Str& s) const { + return s.hash(); + } + }; +} + +const _Str& __class__ = _Str("__class__"); +const _Str& __base__ = _Str("__base__"); +const _Str& __new__ = _Str("__new__"); +const _Str& __iter__ = _Str("__iter__"); +const _Str& __str__ = _Str("__str__"); +const _Str& __neg__ = _Str("__neg__"); +const _Str& __getitem__ = _Str("__getitem__"); +const _Str& __setitem__ = _Str("__setitem__"); +const _Str& __contains__ = _Str("__contains__"); +const _Str& __init__ = _Str("__init__"); + +const _Str CMP_SPECIAL_METHODS[] = { + "__lt__", "__le__", "__eq__", "__ne__", "__gt__", "__ge__" +}; + +const _Str BIN_SPECIAL_METHODS[] = { + "__add__", "__sub__", "__mul__", "__truediv__", "__floordiv__", "__mod__", "__pow__" +}; \ No newline at end of file diff --git a/src/vm.h b/src/vm.h new file mode 100644 index 00000000..1b30b55b --- /dev/null +++ b/src/vm.h @@ -0,0 +1,598 @@ +#pragma once + +#include "codeobject.h" +#include "iter.h" + +#define DEF_NATIVE(type, ctype, ptype) \ + inline PyVar Py##type(ctype value) { \ + return newObject(ptype, value); \ + } \ + \ + inline ctype& Py##type##_AS_C(const PyVar& obj) { \ + __checkType(obj, ptype); \ + return std::get(obj->_native); \ + } + +#define BINARY_XXX(i) \ + {PyVar rhs = frame->popValue(); \ + PyVar lhs = frame->popValue(); \ + frame->pushValue(fastCall(lhs, BIN_SPECIAL_METHODS[i], {lhs,rhs}));} + +#define COMPARE_XXX(i) \ + {PyVar rhs = frame->popValue(); \ + PyVar lhs = frame->popValue(); \ + frame->pushValue(fastCall(lhs, CMP_SPECIAL_METHODS[i], {lhs,rhs}));} + +// TODO: we should split this into stdout and stderr +typedef void(*PrintFn)(const char*); + +class VM{ +private: + std::stack< std::shared_ptr > callstack; +public: + StlDict _types; // builtin types + PyVar None, True, False; + + PrintFn printFn = [](auto s){}; + + PyVar builtins; // builtins module + PyVar _main; // __main__ module + StlDict _modules; // 3rd modules + + VM(){ + initializeBuiltinClasses(); + } + + void cleanError(){ + while(!callstack.empty()) callstack.pop(); + } + + void nameError(const _Str& name){ + _error("NameError", "name '" + name + "' is not defined"); + } + + void attributeError(PyVar obj, const _Str& name){ + _error("AttributeError", "type '" + obj->getTypeName() + "' has no attribute '" + name + "'"); + } + + inline void __checkType(const PyVar& obj, const PyVar& type){ + if(!obj->isType(type)){ + _error("TypeError", "expected '" + type->getName() + "', but got '" + obj->getTypeName() + "'"); + } + } + + PyVar asStr(const PyVar& obj){ + if(obj->isType(_tp_type)) return PyStr("getName() + "'>"); + return call(obj, __str__, {}); + } + + PyVar asBool(const PyVar& obj){ + if(obj == None) return False; + PyVar tp = obj->attribs[__class__]; + if(tp == _tp_bool) return obj; + if(tp == _tp_int) return PyBool(PyInt_AS_C(obj) != 0); + if(tp == _tp_float) return PyBool(PyFloat_AS_C(obj) != 0.0f); + PyVarOrNull len_fn = getAttr(obj, "__len__", false); + if(len_fn != nullptr){ + PyVar ret = call(len_fn, {}); + return PyBool(PyInt_AS_C(ret) > 0); + } + return True; + } + + PyVar fastCall(const PyVar& obj, const _Str& name, PyVarList args){ + PyVar cls = obj->attribs[__class__]; + while(cls != None) { + auto it = cls->attribs.find(name); + if(it != cls->attribs.end()){ + return call(it->second, args); + } + cls = cls->attribs[__base__]; + } + attributeError(obj, name); + return nullptr; + } + + PyVar call(PyVar callable, PyVarList args){ + if(callable->isType(_tp_type)){ + // add type itself as the first argument + args.insert(args.begin(), callable); + callable = getAttr(callable, __new__); + } + + if(callable->isType(_tp_bounded_method)){ + auto& bm = PyBoundedMethod_AS_C(callable); + args.insert(args.begin(), bm.obj); + callable = bm.method; + } + + if(callable->isType(_tp_native_function)){ + auto f = std::get<_CppFunc>(callable->_native); + return f(this, args); + } else if(callable->isType(_tp_function)){ + _Func fn = PyFunction_AS_C(callable); + if(args.size() != fn.argNames.size()){ + _error("TypeError", "expected " + std::to_string(fn.argNames.size()) + " arguments, but got " + std::to_string(args.size())); + } + StlDict locals; + for(int i=0; igetTypeName() + "' object is not callable"); + return None; + } + + inline PyVar call(const PyVar& obj, const _Str& func, PyVarList args){ + return call(getAttr(obj, func), args); + } + + PyVar runFrame(std::shared_ptr frame){ + callstack.push(frame); + while(!frame->isEnd()){ + const ByteCode& byte = frame->readCode(); + //printf("%s (%d)\n", OP_NAMES[byte.op], byte.arg); + + switch (byte.op) + { + case OP_LOAD_CONST: + frame->pushValue(frame->code->co_consts[byte.arg]); + break; + case OP_LOAD_NAME: + { + const _Str& name = frame->code->co_names[byte.arg]; + auto it = frame->f_locals.find(name); + if(it != frame->f_locals.end()){ + frame->pushValue(it->second); + break; + } + + it = frame->f_globals->find(name); + if(it != frame->f_globals->end()){ + frame->pushValue(it->second); + break; + } + + it = builtins->attribs.find(name); + if(it != builtins->attribs.end()){ + frame->pushValue(it->second); + break; + } + + nameError(name); + } break; + case OP_STORE_FAST: + { + const _Str& name = frame->code->co_names[byte.arg]; + frame->f_locals[name] = frame->popValue(); + } break; + case OP_STORE_NAME: + { + const _Str& name = frame->code->co_names[byte.arg]; + if(frame->f_locals.find(name) != frame->f_locals.end()){ + frame->f_locals[name] = frame->popValue(); + }else{ + frame->f_globals->operator[](name) = frame->popValue(); + } + } break; + case OP_STORE_FUNCTION: + { + PyVar obj = frame->popValue(); + const _Func& fn = PyFunction_AS_C(obj); + frame->f_globals->operator[](fn.name) = obj; + } break; + case OP_BUILD_CLASS: + { + _Str clsName = frame->code->co_names[byte.arg]; + PyVar clsBase = frame->popValue(); + if(clsBase == None) clsBase = _tp_object; + __checkType(clsBase, _tp_type); + PyVar cls = newUserClassType(clsName, clsBase); + while(true){ + PyVar fn = frame->popValue(); + if(fn == None) break; + const _Func& f = PyFunction_AS_C(fn); + setAttr(cls, f.name, fn); + } + frame->f_globals->operator[](clsName) = cls; + } break; + case OP_RETURN_VALUE: + { + PyVar ret = frame->popValue(); + callstack.pop(); + return ret; + } break; + case OP_PRINT_EXPR: + { + const PyVar& expr = frame->topValue(); + if(expr == None) break; + printFn(PyStr_AS_C(asStr(expr))); + printFn("\n"); + } break; + case OP_POP_TOP: frame->popValue(); break; + case OP_BINARY_OP: BINARY_XXX(byte.arg) break; + case OP_COMPARE_OP: COMPARE_XXX(byte.arg) break; + case OP_IS_OP: + { + bool ret_c = frame->popValue() == frame->popValue(); + if(byte.arg == 1) ret_c = !ret_c; + frame->pushValue(PyBool(ret_c)); + } break; + case OP_CONTAINS_OP: + { + PyVar right = frame->popValue(); + PyVar left = frame->popValue(); + bool ret_c = PyBool_AS_C(call(right, __contains__, {left})); + if(byte.arg == 1) ret_c = !ret_c; + frame->pushValue(PyBool(ret_c)); + } break; + case OP_UNARY_NEGATIVE: + { + PyVar obj = frame->popValue(); + frame->pushValue(call(obj, __neg__, {})); + } break; + case OP_UNARY_NOT: + { + PyVar obj = frame->popValue(); + PyVar obj_bool = asBool(obj); + frame->pushValue(PyBool(!PyBool_AS_C(obj_bool))); + } break; + case OP_LOAD_ATTR: + { + PyVar obj = frame->popValue(); + const _Str& name = frame->code->co_names[byte.arg]; + frame->pushValue(getAttr(obj, name)); + } break; + case OP_STORE_ATTR: + { + PyVar value = frame->popValue(); + PyVar obj = frame->popValue(); + const _Str& name = frame->code->co_names[byte.arg]; + setAttr(obj, name, value); + } break; + case OP_POP_JUMP_IF_FALSE: + if(!PyBool_AS_C(asBool(frame->popValue()))) frame->jumpTo(byte.arg); + break; + case OP_LOAD_NONE: frame->pushValue(None); break; + case OP_LOAD_TRUE: frame->pushValue(True); break; + case OP_LOAD_FALSE: frame->pushValue(False); break; + case OP_ASSERT: + { + PyVar expr = frame->popValue(); + if(!PyBool_AS_C(expr)) _error("AssertionError", "assertion failed"); + } break; + case OP_RAISE_ERROR: + { + _Str msg = PyStr_AS_C(asStr(frame->popValue())); + _Str type = PyStr_AS_C(frame->popValue()); + _error(type, msg); + } break; + case OP_BUILD_LIST: + { + PyVarList items = frame->popNReversed(byte.arg); + frame->pushValue(PyList(items)); + } break; + case OP_BUILD_MAP: + { + PyVarList items = frame->popNReversed(byte.arg); + PyVar obj = call(builtins->attribs["dict"], {PyList(items)}); + frame->pushValue(obj); + } break; + case OP_BUILD_TUPLE: + { + PyVarList items = frame->popNReversed(byte.arg); + frame->pushValue(PyTuple(items)); + } break; + case OP_BINARY_SUBSCR: + { + PyVar key = frame->popValue(); + PyVar obj = frame->popValue(); + frame->pushValue(call(obj, __getitem__, {key})); + } break; + case OP_STORE_SUBSCR: + { + PyVar value = frame->popValue(); + PyVar key = frame->popValue(); + PyVar obj = frame->popValue(); + call(obj, __setitem__, {key, value}); + } break; + case OP_DUP_TOP: frame->pushValue(frame->topValue()); break; + case OP_CALL: + { + PyVarList args = frame->popNReversed(byte.arg); + PyVar callable = frame->popValue(); + frame->pushValue(call(callable, args)); + } break; + case OP_JUMP_ABSOLUTE: frame->jumpTo(byte.arg); break; + case OP_GET_ITER: + { + PyVar obj = frame->popValue(); + PyVarOrNull iter_fn = getAttr(obj, __iter__, false); + if(iter_fn != nullptr){ + PyVar tmp = call(iter_fn, {obj}); + if(tmp->isType(_tp_native_iterator)){ + frame->pushValue(tmp); + break; + } + } + _error("TypeError", "'" + obj->getTypeName() + "' object is not iterable"); + } break; + case OP_FOR_ITER: + { + const PyVar& iter = frame->topValue(); + auto& it = PyIter_AS_C(iter); + if(it->hasNext()){ + frame->pushValue(it->next()); + } + else{ + frame->popValue(); + frame->jumpTo(byte.arg); + } + } break; + case OP_JUMP_IF_FALSE_OR_POP: + { + const PyVar& expr = frame->topValue(); + if(!PyBool_AS_C(asBool(expr))) frame->jumpTo(byte.arg); + else frame->popValue(); + } break; + case OP_JUMP_IF_TRUE_OR_POP: + { + const PyVar& expr = frame->topValue(); + if(PyBool_AS_C(asBool(expr))) frame->jumpTo(byte.arg); + else frame->popValue(); + } break; + case OP_BUILD_SLICE: + { + PyVar stop = frame->popValue(); + PyVar start = frame->popValue(); + _Slice s; + if(start != None) {__checkType(start, _tp_int); s.start = PyInt_AS_C(start);} + if(stop != None) {__checkType(stop, _tp_int); s.stop = PyInt_AS_C(stop);} + frame->pushValue(PySlice(s)); + } break; + case OP_IMPORT_NAME: + { + const _Str& name = frame->code->co_names[byte.arg]; + auto it = _modules.find(name); + if(it == _modules.end()){ + _error("ImportError", "module '" + name + "' not found"); + }else{ + frame->pushValue(it->second); + } + } break; + case OP_DELETE_SUBSCR: + { + PyVar index = frame->popValue(); + PyVar obj = frame->popValue(); + call(obj, "__delitem__", {index}); + } break; + default: + _error("SystemError", _Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); + break; + } + } + callstack.pop(); + return None; + } + + PyVar exec(const _Code& code, const StlDict& locals={}, PyVar _module=nullptr){ + if(_module == nullptr) _module = _main; + auto frame = std::make_shared( + code.get(), + locals, + &_module->attribs + ); + return runFrame(frame); + } + + void _assert(bool val, const _Str& msg){ + if (!val) _error("AssertionError", msg); + } + + void _error(const _Str& name, const _Str& msg){ + _StrStream ss; + auto frame = callstack.top(); + ss << "Traceback (most recent call last):" << std::endl; + ss << " File '" << frame->code->co_filename << "', line "; + ss << frame->currentLine() << '\n' << name << ": " << msg; + cleanError(); + throw std::runtime_error(ss.str()); + } + + PyVar newUserClassType(_Str name, PyVar base){ + PyVar obj = newClassType(name, base); + setAttr(obj, "__name__", PyStr(name)); + _types.erase(name); + return obj; + } + + PyVar newClassType(_Str name, PyVar base=nullptr) { + if(base == nullptr) base = _tp_object; + PyVar obj = std::make_shared(0); + setAttr(obj, __class__, _tp_type); + setAttr(obj, __base__, base); + _types[name] = obj; + return obj; + } + + PyVar newObject(PyVar type, _Value _native) { + __checkType(type, _tp_type); + PyVar obj = std::make_shared(_native); + setAttr(obj, __class__, type); + return obj; + } + + PyVar newModule(_Str name) { + PyVar obj = newObject(_tp_module, 0); + setAttr(obj, "__name__", PyStr(name)); + return obj; + } + + PyVarOrNull getAttr(const PyVar& obj, const _Str& name, bool throw_err=true) { + auto it = obj->attribs.find(name); + if(it != obj->attribs.end()) return it->second; + + PyVar cls = obj->attribs[__class__]; + while(cls != None) { + it = cls->attribs.find(name); + if(it != cls->attribs.end()){ + PyVar valueFromCls = it->second; + if(valueFromCls->isType(_tp_function) || valueFromCls->isType(_tp_native_function)){ + if(name == __new__) return valueFromCls; + return PyBoundedMethod({obj, valueFromCls}); + }else{ + return valueFromCls; + } + } + cls = cls->attribs[__base__]; + } + if(throw_err) attributeError(obj, name); + return nullptr; + } + + inline void setAttr(PyVar& obj, const _Str& name, PyVar value) { + obj->attribs[name] = value; + } + + void bindMethod(_Str typeName, _Str funcName, _CppFunc fn) { + PyVar type = _types[typeName]; + PyVar func = PyNativeFunction(fn); + setAttr(type, funcName, func); + } + + void bindMethodMulti(std::vector<_Str> typeNames, _Str funcName, _CppFunc fn) { + for(auto& typeName : typeNames){ + bindMethod(typeName, funcName, fn); + } + } + + void bindBuiltinFunc(_Str funcName, _CppFunc fn) { + bindFunc(builtins, funcName, fn); + } + + void bindFunc(PyVar module, _Str funcName, _CppFunc fn) { + __checkType(module, _tp_module); + PyVar func = PyNativeFunction(fn); + setAttr(module, funcName, func); + } + + bool isInstance(PyVar obj, PyVar type){ + PyVar t = obj->attribs[__class__]; + while (t != None){ + if (t == type) return true; + t = t->attribs[__base__]; + } + return false; + } + + inline bool isIntOrFloat(const PyVar& obj){ + return obj->isType(_tp_int) || obj->isType(_tp_float); + } + + inline bool isIntOrFloat(const PyVar& obj1, const PyVar& obj2){ + return isIntOrFloat(obj1) && isIntOrFloat(obj2); + } + + float numToFloat(const PyVar& obj){ + if (obj->isType(_tp_int)){ + return (float)PyInt_AS_C(obj); + }else if(obj->isType(_tp_float)){ + return PyFloat_AS_C(obj); + } + UNREACHABLE(); + } + + int normalizedIndex(int index, int size){ + if(index < 0) index += size; + if(index < 0 || index >= size){ + _error("IndexError", "index out of range, " + std::to_string(index) + " not in [0, " + std::to_string(size) + ")"); + } + return index; + } + + // for quick access + PyVar _tp_object, _tp_type, _tp_int, _tp_float, _tp_bool, _tp_str; + PyVar _tp_list, _tp_tuple; + PyVar _tp_function, _tp_native_function, _tp_native_iterator, _tp_bounded_method; + PyVar _tp_slice, _tp_range, _tp_module; + + DEF_NATIVE(Int, int, _tp_int) + DEF_NATIVE(Float, float, _tp_float) + DEF_NATIVE(Str, _Str, _tp_str) + DEF_NATIVE(List, PyVarList, _tp_list) + DEF_NATIVE(Tuple, PyVarList, _tp_tuple) + DEF_NATIVE(Function, _Func, _tp_function) + DEF_NATIVE(NativeFunction, _CppFunc, _tp_native_function) + DEF_NATIVE(Iter, std::shared_ptr<_Iterator>, _tp_native_iterator) + DEF_NATIVE(BoundedMethod, BoundedMethod, _tp_bounded_method) + DEF_NATIVE(Range, _Range, _tp_range) + DEF_NATIVE(Slice, _Slice, _tp_slice) + + inline bool PyBool_AS_C(PyVar obj){return obj == True;} + inline PyVar PyBool(bool value){return value ? True : False;} + + void initializeBuiltinClasses(){ + _tp_object = std::make_shared(0); + _tp_type = std::make_shared(0); + + _types["object"] = _tp_object; + _types["type"] = _tp_type; + + _tp_bool = newClassType("bool"); + _tp_int = newClassType("int"); + _tp_float = newClassType("float"); + _tp_str = newClassType("str"); + _tp_list = newClassType("list"); + _tp_tuple = newClassType("tuple"); + _tp_slice = newClassType("slice"); + _tp_range = newClassType("range"); + _tp_module = newClassType("module"); + + newClassType("NoneType"); + + _tp_function = newClassType("function"); + _tp_native_function = newClassType("_native_function"); + _tp_native_iterator = newClassType("_native_iterator"); + _tp_bounded_method = newClassType("_bounded_method"); + + this->None = newObject(_types["NoneType"], 0); + this->True = newObject(_tp_bool, true); + this->False = newObject(_tp_bool, false); + this->builtins = newModule("__builtins__"); + this->_main = newModule("__main__"); + + setAttr(_tp_type, __base__, _tp_object); + setAttr(_tp_type, __class__, _tp_type); + setAttr(_tp_object, __base__, None); + setAttr(_tp_object, __class__, _tp_type); + + for (auto& [name, type] : _types) { + setAttr(type, "__name__", PyStr(name)); + } + + std::vector<_Str> publicTypes = {"type", "object", "bool", "int", "float", "str", "list", "tuple", "range"}; + for (auto& name : publicTypes) { + setAttr(builtins, name, _types[name]); + } + } + + int hash(const PyVar& obj){ + if (obj->isType(_tp_int)) return PyInt_AS_C(obj); + if (obj->isType(_tp_bool)) return PyBool_AS_C(obj) ? 1 : 0; + if (obj->isType(_tp_float)){ + float val = PyFloat_AS_C(obj); + return (int)std::hash()(val); + } + if (obj->isType(_tp_str)) return PyStr_AS_C(obj).hash(); + if (obj->isType(_tp_type)) return (int64_t)obj.get(); + _error("TypeError", "unhashable type: " + obj->getTypeName()); + return 0; + } + + void registerCompiledModule(_Str name, _Code code){ + PyVar _m = newModule(name); + exec(code, {}, _m); + _modules[name] = _m; + } +}; \ No newline at end of file diff --git a/test_cpp.sh b/test_cpp.sh new file mode 100644 index 00000000..7c316a84 --- /dev/null +++ b/test_cpp.sh @@ -0,0 +1,11 @@ +g++ -o pocketpy src/main.cpp --std=c++17 -pg -O1 + +./pocketpy tests/1.py + +gprof pocketpy gmon.out > gprof.txt + +#gprof pocketpy | gprof2dot | dot -Tsvg -o output.svg +rm gmon.out + + + diff --git a/tests/1.py b/tests/1.py new file mode 100644 index 00000000..4a5d1f56 --- /dev/null +++ b/tests/1.py @@ -0,0 +1,16 @@ +def is_prime(x): + if x<2: + return False + for i in range(2,x): + if x%i == 0: + return False + return True + +def test(n): + k = 0 + for i in range(n): + if is_prime(i): + k += 1 + return k + +print(test(10000)) \ No newline at end of file diff --git a/tests/2.py b/tests/2.py new file mode 100644 index 00000000..200c8f9e --- /dev/null +++ b/tests/2.py @@ -0,0 +1,15 @@ +def test(n): + k = 0 + for x in range(n): + if x<2: + continue + flag = True + for i in range(2,x): + if x%i == 0: + flag = False + break + if flag: + k += 1 + return k + +print(test(10000)) \ No newline at end of file diff --git a/tests/3.py b/tests/3.py new file mode 100644 index 00000000..963023fa --- /dev/null +++ b/tests/3.py @@ -0,0 +1,5 @@ +k = 0 +for i in range(2, 10000000): + if i % 2 == 0: + k += 1 +print(k) \ No newline at end of file diff --git a/tests/class.pk b/tests/class.pk new file mode 100644 index 00000000..e9bbf6f5 --- /dev/null +++ b/tests/class.pk @@ -0,0 +1,6 @@ +class Complex: + def __init__(self, realpart, imagpart): + self.r = realpart + self.i = imagpart +x = Complex(3.0, -4.5) +assert x.r == 3.0 \ No newline at end of file diff --git a/tests/mixedtype/basic.py b/tests/mixedtype/basic.py new file mode 100644 index 00000000..6bd58525 --- /dev/null +++ b/tests/mixedtype/basic.py @@ -0,0 +1,39 @@ +def compare(a,b): + d = a-b + if d > -0.0001 and d < 0.0001: + return 1 + return 0 + + +assert compare(32 + 32.0,64) == 1 +assert compare(8855 / 3.2,2767.1875) == 1 +#assert 6412//6.5 == 986.0 #TypeError: unsupported operand type(s) for // +assert compare(1054.5*7985,8420182.5) == 1 +#assert 4 % 2.0 == 0.0 #TypeError: unsupported operand type(s) for % +l = [3.2,5,10,8.9] +assert 2.3 + l[0] == 5.5 +assert 3 + l[1] == 8 +assert compare(3/l[2],0.3) == 1 +assert 3 // l[1] == 0 +assert l[2] % 3 == 1 +assert compare(3*l[3],26.7) == 1 +assert 'a' * l[1] == 'aaaaa' + +assert compare(2.9**2,8.41) == 1 +assert compare(2.5**(-1),0.4) == 1 + +assert 2.5 > 2 +assert 1.6 < 100 +assert 1.0 == 1 +x = 2.6 +y = 5 +l = [5.4,8,'40',3.14] +assert x <= y +assert y >= x +assert x != y +assert y < l[0] + +str = ['s','bb'] +s = 'jack' + str[0] +assert s == 'jacks' +assert str[1] * 3 == 'bbbbbb' \ No newline at end of file diff --git a/tests/singletype/basic.py b/tests/singletype/basic.py new file mode 100644 index 00000000..cb709c12 --- /dev/null +++ b/tests/singletype/basic.py @@ -0,0 +1,75 @@ + +def compare(a,b): + d = a-b + if d > -0.0001 and d < 0.0001: + return 1 + return 0 + +s = 'foo'; s += 'bar' +assert s == 'foobar' +assert 1 + 2 * 3 == 7 +assert (1 + 2)* 3 == 9 +assert compare(1.2*3.5 , 4.2) == 1 +assert compare(9.8*(2.5 - 3),-4.9) == 1 +assert compare(2.4*8.6,20.64) == 1 + +assert compare(1.5 + 3,4.5) == 1 +assert compare(1.5 + 3.9,5.4) == 1 +assert 2 - 1 == 1 +assert compare(5.3 - 2.5,2.8) == 1 +assert 42 % 40 == 2 +assert -15 % 6 == -3 # in python -15 % 6 == 3 +assert 2/1 == 2 +assert 3//2 == 1 +assert 1 - 9 == -8 + +a = 1 +assert -a == -1 +assert 'testing'== 'test' + 'ing' + +x = 42 +assert x%3 == 0 +x = 27 +assert x%8 == 3 + + +assert 2**3 == 8 +assert -2**2 == -4 +assert (-2)**2 == 4 +assert compare(0.2**2,0.04) == 1 +x = 4 +assert x**4 == 256 +assert compare(x**0.5,2) == 1 +assert compare(4**(-1.0),0.25) == 1 + +assert 'abc' * 3 == 'abcabcabc' +assert '' * 1000 == '' +assert 'foo' * 0 == '' + + +assert 1 < 2 +assert 3 > 1 +x = 1 +y = 8 +assert x <= y +assert y >= x +assert x != y + +assert 42 in [12, 42, 3.14] +assert 'key' in {'key':'value'} +assert 'a' in 'abc' +assert 'd' not in 'abc' + +x = 1 +y = 0 +assert not x == False +assert not y == True + +a = 1 +b = 1 +c = 0.1 +assert (a==b) and (a is not b) # small int cache +assert a is not c + + + diff --git a/tests/singletype/builtin_ty.py b/tests/singletype/builtin_ty.py new file mode 100644 index 00000000..af139821 --- /dev/null +++ b/tests/singletype/builtin_ty.py @@ -0,0 +1,158 @@ +############################################## +##String +############################################## + +a = '' +b = 'test' +c ='test' +assert len(a) == 0 +assert len(b) == 4 +assert b == c + +assert ''.lower() == '' and ''.upper() == '' +assert 'already+lower '.lower() == 'already+lower ' +assert 'ALREADY+UPPER '.upper() == 'ALREADY+UPPER ' +assert 'tEST+InG'.lower() == 'test+ing' +assert 'tEST+InG'.upper() == 'TEST+ING' + +s = "football" +q = "abcd" +r = "zoo" +str = "this is string example....wow!!!" +assert s[0] == 'f' +assert s[1:4] == 'oot' +assert s[:-1] == 'footbal' +assert s[:10] == 'football' +assert s[-3] == 'a' +assert str[-5:] == 'ow!!!' +assert str[3:-3] == 's is string example....wow' +assert s > q;assert s < r +assert s.replace("foo","ball") == "balltball" +assert s.startswith('f') == True;assert s.endswith('o') == False +assert str.startswith('this') == True; + + +assert str.split('w') == ['this is string example....', 'o', '!!!'] +assert "a,b,c".split(',') == ['a', 'b', 'c'] +assert 'a,'.split(',') == ['a', ''] +assert 'foo!!bar!!baz'.split('!!') == ['foo', 'bar', 'baz'] + +str = "*****this is **string** example....wow!!!*****" +s = "123abcrunoob321" +# assert str.strip( '*' ) == "this is **string** example....wow!!!" +# assert s.strip( '12' ) == "3abcrunoob3" + +s1 = "-" +s2 = "" +seq = ["r","u","n","o","o","b"] +assert s1.join( seq ) == "r-u-n-o-o-b" +assert s2.join( seq ) == "runoob" + + +##num = 6 +##assert str(num) == '6' TypeError: 'str' object is not callable + + +############################################## +##Lists +############################################## + +l = [1,2,3,4] +assert l[2] == 3 +assert l[-1] == 4 +assert l[:32] == [1,2,3,4] +assert l[32:] == [] +assert l[1:4] == [2,3,4] +assert l[-1:-3] == [] +assert l[-3:-1] == [2,3] + + +l1 = [1];l2 = l1;l1 += [2];l3 = [1,1,2] +assert l2[1] == 2 +assert l1 == l2 +assert l1*3 == [1,2,1,2,1,2] +assert l3.count(1) == 2 + +member = ['Tom', 'Sunny', 'Honer', 'Lily'] +teacher = [1,2,3] +assert len(member + teacher) == 7 +assert member[0] == 'Tom' +assert member[-2] == 'Honer' +assert member[0:3] == ['Tom', 'Sunny', 'Honer'] + +member.remove('Sunny') +assert member == ['Tom', 'Honer', 'Lily'] +member.pop() +assert member == ['Tom', 'Honer'] +del member[0] +assert member == ['Honer'] +member.append('Jack') +assert member == ['Honer','Jack'] +member.extend(teacher) +assert member == ['Honer','Jack',1,2,3] +member.insert(1,'Tom') +assert member == ['Honer','Tom','Jack',1,2,3] +member.clear() +assert member == [] +member = teacher.copy() +assert member == [1,2,3] + +l = [] +l.insert(0, 'l') +l.insert(1, 'l') +l.insert(0, 'h') +l.insert(3, 'o') +l.insert(1, 'e') +assert l == ['h', 'e', 'l', 'l', 'o'] +assert l.pop(-2) == 'l' + +############################################## +##tuple +############################################## + +# tup = ('Google', 'Runoob', 'Taobao', 'Wiki', 'Weibo','Weixin') +# assert tup[1] == 'Runoob';assert tup[-2] == 'Weibo' +# assert tup[1:] == ('Runoob', 'Taobao', 'Wiki', 'Weibo', 'Weixin') +# assert tup[2:4] == ('Taobao', 'Wiki') +# assert len(tup) == 6 + + + +############################################## +##dict +############################################## +emptyDict = dict() +assert len(emptyDict) == 0 +tinydict = {'Name': 'Tom', 'Age': 7, 'Class': 'First'} +assert tinydict['Name'] == 'Tom';assert tinydict['Age'] == 7 +tinydict['Age'] = 8;tinydict['School'] = "aaa" +assert tinydict['Age'] == 8;assert tinydict['School'] == "aaa" +del tinydict['Name'] +assert len(tinydict) == 3 +tinydict.clear() +assert len(tinydict) == 0 + +dict1 = {'user':'circle','num':[1,2,3]} +dict2 = dict1.copy() +assert dict2 == {'user':'circle','num':[1,2,3]} +dict1['user'] = 'root' +assert dict1 == {'user': 'root', 'num': [1, 2, 3]};assert dict2 == {'user':'circle','num':[1,2,3]} + +tinydict = {'Name': 'circle', 'Age': 7} +tinydict2 = {'Sex': 'female' } +tinydict.update(tinydict2) +assert tinydict == {'Name': 'circle', 'Age': 7, 'Sex': 'female'} + +dishes = {'eggs': 2, 'sausage': 1, 'bacon': 1, 'spam': 500} +keys = dishes.keys() +values = dishes.values() +assert list(keys) == ['eggs', 'sausage', 'bacon', 'spam'];assert list(values) == [2, 1, 1, 500] + +d={1:"a",2:"b",3:"c"} +result=[] +for kv in d.items(): + k = kv[0]; v=kv[1] + result.append(k) + result.append(v) +assert result == [1, 'a', 2, 'b', 3, 'c'] + diff --git a/tests/singletype/controlflow.py b/tests/singletype/controlflow.py new file mode 100644 index 00000000..40ebf24a --- /dev/null +++ b/tests/singletype/controlflow.py @@ -0,0 +1,67 @@ +# if tests +flag = False +name = 'luren' +if name == 'python': + flag = True +else: + flag +assert flag == False + + +num = 9 +flag = 0 +if num >= 0 and num <= 10: + flag = 1 +else: + flag +assert flag == 1 + +num = 10 +flag = 0 +if num < 0 or num > 10: + flag = 1 +else: + flag +assert flag == 0 + +num = 5 +result = 0 +if num == 3: + result = num +elif num == 2: + result = num +elif num == 1: + result = num +elif num < 0: + result = num +else: + result = num +assert result == 5 + +# for tests + +k = 0 +for i in range(2, 1000): + if i % 2 == 0: + k += 1 +assert k ==499 + +k = 0 +for x in range(100): + if x<2: + continue + flag = True + for i in range(2,x): + if x%i == 0: + flag = False + break + if flag: + k += 1 +assert k == 25 + +#while tests +count = 0 +while (count < 1000): + count = count + 1 +assert count == 1000 + diff --git a/tests/singletype/functions.py b/tests/singletype/functions.py new file mode 100644 index 00000000..37fdcd8a --- /dev/null +++ b/tests/singletype/functions.py @@ -0,0 +1,19 @@ +## Function Tests. + + +def f1(): + return 'f1' +assert f1() == 'f1' +def f2(a, b, c, d): + return c +assert f2('a', 'b', 'c', 'd') == 'c' +def f3(a,b): + return a - b +assert f3(1,2) == -1 + +def fact(n): + if n == 1: + return 1 + return n * fact(n - 1) +assert fact(5)==120 +