The code displayed on this page is not copyrighted by me but by the owners of respective repositories as also mentioned in the headers of the various files.
7F 45 4C 46 02 01 01 03 00 00 00 00 00 00 00 00 02 00 3E 00 01 00 00 00 78 00 60 00 00 00 00 00 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 40 00 38 00 01 00 00 00 00 00 00 00 01 00 00 00 07 00 00 00 00 00 00 00 00 00 00 00 00 00 60 00 00 00 00 00 00 00 60 00 00 00 00 00 6A 02 00 00 00 00 00 00 6A 02 00 00 00 00 00 00 01 00 00 00 00 00 00 00 58 5F 5F 85 FF 75 06 50 BF 48 02 60 00 31 F6 6A 02 58 0F 05 85 C0 7E 66 49 89 C5 58 48 89 E5 6A 0C 58 31 FF 0F 05 49 89 C6 31 C0 B4 08 E8 33 01 00 00 48 89 C7 48 89 FE 31 C0 4D 31 FF E8 95 00 00 00 85 C0 74 07 48 89 06 48 83 C6 08 4D 85 FF 74 EB 48 39 FE 74 D2 E8 4D 00 00 00 48 8B 07 85 C0 74 1B 57 6A 39 58 0F 05 5F 85 C0 7C 10 75 13 6A 3B 58 48 89 EA 48 89 FE 48 8B 3F 0F 05 6A 01 5F EB 21 48 89 C7 50 48 89 E6 31 D2 6A 3D 58 0F 05 58 85 C0 74 93 B8 58 02 60 00 E8 F9 00 00 00 EB DC 31 FF 6A 3C 58 0F 05 57 B8 53 02 60 00 E8 E5 00 00 00 48 8B 07 E8 DD 00 00 00 48 83 C7 08 6A 20 58 E8 F0 00 00 00 48 39 FE 75 E7 6A 0A 58 E8 E3 00 00 00 5F C3 57 56 31 C0 B4 10 E8 83 00 00 00 48 89 C7 48 89 C6 E8 90 00 00 00 3C FC 74 B1 3C 20 74 37 3C 09 74 33 3C 0A 75 02 EB 14 3C 22 75 07 E8 31 00 00 00 EB 22 3C 23 75 0B E8 40 00 00 00 6A 01 41 5F EB 13 3C 5C 75 07 E8 5B 00 00 00 EB 08 88 06 48 83 C6 01 EB BC 48 39 FE 75 02 31 FF 48 89 F8 5E 5F C3 E8 3F 00 00 00 3C FC 0F 84 38 FF FF FF 3C 22 74 08 88 06 48 83 C6 01 EB E7 C3 E8 25 00 00 00 3C FC 0F 84 1E FF FF FF 3C 0A 75 EF C3 57 56 52 4C 89 F7 48 01 C7 6A 0C 58 0F 05 4C 89 F0 49 89 FE 5A 5E 5F C3 57 56 52 6A FC 58 50 48 8D 34 24 4C 89 EF 31 C0 6A 01 5A 0F 05 58 3C FC 5A 5E 5F C3 57 56 48 89 C7 85 C0 74 13 31 C0 8A 07 85 C0 74 0B E8 09 00 00 00 48 83 C7 01 EB ED 5E 5F C3 57 56 52 50 48 8D 34 24 6A 01 5F 48 89 F8 48 89 FA 0F 05 58 5A 5E 5F C3 6B 61 65 6D 2E 61 6D 64 36 34 00 20 2B 3E 20 00 53 75 62 70 72 6F 63 65 73 73 20 65 72 72 6F 72 0A 00
7F 45 4C 46 02 01 01 03 00 00 00 00 00 00 00 00 02 00 3E 00 01 00 00 00 78 00 60 00 00 00 00 00 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 40 00 38 00 01 00 00 00 00 00 00 00 01 00 00 00 07 00 00 00 00 00 00 00 00 00 00 00 00 00 60 00 00 00 00 00 00 00 60 00 00 00 00 00 24 01 00 00 00 00 00 00 24 01 00 00 00 00 00 00 01 00 00 00 00 00 00 00 58 5F 5F 31 F6 6A 02 58 0F 05 49 89 C1 5F 66 BE 41 02 66 BA C0 01 6A 02 58 0F 05 49 89 C2 6A FF 5B 31 ED E8 69 00 00 00 E8 1C 00 00 00 85 C0 7C F2 85 DB 7D 06 89 C5 31 DB EB E8 C1 E5 04 01 E8 FF CB E8 39 00 00 00 EB DA 3C 23 74 1E 3C 3B 74 1A 3C 30 7C 1F 3C 3A 7C 1F 3C 41 7C 17 3C 47 7C 1C 3C 61 7C 0F 3C 67 7C 12 EB 09 E8 21 00 00 00 3C 0A 75 F7 6A FF 58 C3 2C 30 C3 2C 20 2C 37 C3 6A 01 5A 50 48 89 E6 4C 89 D7 6A 01 58 0F 05 5F C3 6A 01 5A 55 48 89 E6 4C 89 CF 31 C0 0F 05 85 C0 74 02 58 C3 31 FF 6A 3C 58 0F 05
#! /usr/bin/env bash # Mes --- Maxwell Equations of Software # Copyright © 2017,2019 Jan Nieuwenhuizen <janneke@gnu.org> # Copyright © 2017,2019 Jeremiah Orians # # This file is part of Mes. # # Mes is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at # your option) any later version. # # Mes is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Mes. If not, see <http://www.gnu.org/licenses/>. # Can also be run by kaem or any other shell of your personal choice # To run in kaem simply: kaem --verbose --strict ################################################## # Phase 0-11 Build hex0 from bootstrapped binary # ################################################## ./bootstrap-seeds/POSIX/AMD64/kaem-optional-seed ./AMD64/mescc-tools-seed-kaem.kaem ./AMD64/artifact/kaem-0 ./AMD64/mescc-tools-mini-kaem.kaem ####################################### # Run remaining phases with full kaem # ####################################### ./AMD64/bin/kaem --verbose --strict --file ./AMD64/kaem.run
#! /usr/bin/env bash # Mes --- Maxwell Equations of Software # Copyright © 2017 Jan Nieuwenhuizen <janneke@gnu.org> # Copyright © 2017 Jeremiah Orians # # This file is part of Mes. # # Mes is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at # your option) any later version. # # Mes is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Mes. If not, see <http://www.gnu.org/licenses/>. # Can also be run by kaem or any other shell of your personal choice # To run in kaem simply: kaem --verbose --strict # Warning all binaries prior to the use of blood-elf will not be readable by # Objdump, you may need to use ndism or gdb to view the assembly in the binary. ############################################### # Phase-0 Build hex0 from bootstrapped binary # ############################################### ./bootstrap-seeds/POSIX/AMD64/hex0-seed ./AMD64/hex0_AMD64.hex0 ./AMD64/artifact/hex0 # hex0 should have the exact same checksum as hex0-seed as they are both supposed # to be built from hex0_amd64.hex0 and by definition must be identical ######################################### # Phase-0b Build minimal kaem from hex0 # ######################################### ./AMD64/artifact/hex0 ./AMD64/kaem-minimal.hex0 ./AMD64/artifact/kaem-0 # for checksum validation reasons
# SPDX-FileCopyrightText: 2017 Jeremiah Orians <jeremiah@pdp10.guru> # SPDX-FileCopyrightText: 2023 Andrius Å tikonas <andrius@stikonas.eu> # # SPDX-License-Identifier: GPL-3.0-or-later ## ELF Header #:ELF_base 7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number 02 ## e_ident[EI_CLASS] Indicating 64 bit 01 ## e_ident[EI_DATA] Indicating little endianness 01 ## e_ident[EI_VERSION] Indicating original elf 03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares 00 00 00 00 00 00 00 ## e_ident[EI_PAD] 02 00 ## e_type Indicating Executable 3E 00 ## e_machine Indicating AMD64 01 00 00 00 ## e_version Indicating original elf 78 00 60 00 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address) 40 00 00 00 00 00 00 00 ## e_phoff Address of program header table 00 00 00 00 00 00 00 00 ## e_shoff Address of section header table 00 00 00 00 ## e_flags 40 00 ## e_ehsize Indicating our 64 Byte header 38 00 ## e_phentsize size of a program header table 01 00 ## e_phnum number of entries in program table 00 00 ## e_shentsize size of a section header table 00 00 ## e_shnum number of entries in section table 00 00 ## e_shstrndx index of the section names ## Program Header #:ELF_program_headers 01 00 00 00 ## p_type 07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 ## p_offset 00 00 60 00 00 00 00 00 ## p_vaddr 00 00 60 00 00 00 00 00 ## p_physaddr 24 01 00 00 00 00 00 00 ## p_filesz 24 01 00 00 00 00 00 00 ## p_memsz 01 00 00 00 00 00 00 00 ## Required alignment #:ELF_text # Where the ELF Header is going to hit # Simply jump to _start # Our main function #:_start (0x600078) 58 ; pop_rax # Get the number of arguments 5F ; pop_rdi # Get the program name 5F ; pop_rdi # Get the actual input name 31F6 ; xor_esi,esi # prepare read_only, rsi = 0 6A 02 ; push !2 # prepare syscall number 58 ; pop_rax # the syscall number for open() 0F05 ; syscall # Now open that damn file 4989C1 ; mov_r9,rax # Preserve the file pointer we were given 5F ; pop_rdi # Get the actual output name 66BE 4102 ; mov_si, @577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC 66BA C001 ; mov_dx, @448 # Prepare file as RWX for owner only (700 in octal) 6A 02 ; push !2 # prepare syscall number 58 ; pop_rax # the syscall number for open() 0F05 ; syscall # Now open that damn file 4989C2 ; mov_r10,rax # Preserve the file pointer we were given # Our flag for byte processing 6A FF ; push !-1 5B ; pop_rbx # rbx = -1 # temp storage for the sum 31ED ; xor_ebp,ebp # rbp = 0 #:loop (0x60009B) # Read a byte E8 69000000 ; call %read_byte # process byte E8 1C000000 ; call %hex # deal with -1 values 85C0 ; test_eax,eax 7C F2 ; jl !loop # deal with toggle 85DB ; test_ebx,ebx # jump if rbx >= 0 7D 06 ; jge !print # process first byte of pair 89C5 ; mov_ebp,eax 31DB ; xor_ebx,ebx # rbx = 0 EB E8 ; jmp !loop # process second byte of pair #:print (0x6000B3) # update the sum and store in output C1E5 04 ; shl_ebp, !4 01E8 ; add_eax,ebp # flip the toggle FFCB ; dec_ebx # rbx = -1 E8 39000000 ; call %write_byte EB DA ; jmp !loop #:hex (0x6000C1) # Purge Comment Lines (#) 3C 23 ; cmp_al, !35 74 1E ; je !purge_comment # Purge Comment Lines (;) 3C 3B ; cmp_al, !59 74 1A ; je !purge_comment # deal all ascii less than '0' 3C 30 ; cmp_al, !48 7C 1F ; jl !ascii_other # deal with 0-9 3C 3A ; cmp_al, !58 7C 1F ; jl !ascii_num # deal with all ascii less than 'A' 3C 41 ; cmp_al, !65 7C 17 ; jl !ascii_other # deal with 'A'-'F' 3C 47 ; cmp_al, !71 7C 1C ; jl !ascii_high # deal with all ascii less than 'a' 3C 61 ; cmp_al, !97 7C 0F ; jl !ascii_other # deal with 'a'-'f' 3C 67 ; cmp_al, !103 7C 12 ; jl !ascii_low # The rest that remains needs to be ignored EB 09 ; jmp !ascii_other #:purge_comment (0x6000E3) # Read a byte E8 21000000 ; call %read_byte # Loop if not LF 3C 0A ; cmp_al, !10 75 F7 ; jne !purge_comment # Otherwise return -1 #:ascii_other (0x6000EC) 6A FF ; push !-1 58 ; pop_rax # return = -1 C3 ; ret #:ascii_num (0x6000F0) 2C 30 ; sub_al, !48 C3 ; ret #:ascii_low (0x6000F3) 2C 20 ; sub_al, !32 # convert to uppercase #:ascii_high (0x6000F5) 2C 37 ; sub_al, !55 C3 ; ret # Writes byte stored in al #:write_byte (0x6000F8) # Print our Hex 6A 01 ; push !1 # prepare to set rdx to 1 5A ; pop_rdx # set the size of chars we want 50 ; push_rax # Move output to stack 4889E6 ; mov_rsi,rsp # What we are writing 4C89D7 ; mov_rdi,r10 # Where are we writing to 6A 01 ; push !1 # prepare syscall number for write 58 ; pop_rax # get the syscall number for write 0F05 ; syscall # call the Kernel 5F ; pop_rdi # deallocate stack C3 ; ret #:read_byte (0x600109) # Attempt to read 1 byte from STDIN 6A 01 ; push !1 # prepare to set rdx to 1 5A ; pop_rdx # set the size of chars we want 55 ; push_rbp # allocate stack 4889E6 ; mov_rsi,rsp # Where to put it 4C89CF ; mov_rdi,r9 # Where are we reading from 31C0 ; xor_eax,eax # the syscall number for read 0F05 ; syscall # call the Kernel 85C0 ; test_eax,eax # check what we got 74 02 ; je !Done # Got EOF call it done # load byte 58 ; pop_rax # load char C3 ; ret #:Done (0x60011D) # program completed Successfully 31FF ; xor_edi,edi # All is well, rdi = 0 6A 3C ; push !60 # syscall number for exit is 60 58 ; pop_rax # put the exit syscall number in rax 0F05 ; syscall # Call it a good day #:ELF_end
# SPDX-FileCopyrightText: 2020 Jeremiah Orians <jeremiah@pdp10.guru> # SPDX-FileCopyrightText: 2023 Andrius Å tikonas <andrius@stikonas.eu> # # SPDX-License-Identifier: GPL-3.0-or-later # Register usage # * R12 status # * R13 script # * R14 MALLOC # * R15 command_done ## ELF Header # :ELF_base ; (0x600000) 7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number 02 ## e_ident[EI_CLASS] Indicating 64 bit 01 ## e_ident[EI_DATA] Indicating little endianness 01 ## e_ident[EI_VERSION] Indicating original elf 03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares 00 00 00 00 00 00 00 ## e_ident[EI_PAD] 02 00 ## e_type Indicating Executable 3E 00 ## e_machine Indicating AMD64 01 00 00 00 ## e_version Indicating original elf 78006000 00000000 ## e_entry Address of the entry point (Number of bytes this header is + Base Address) 40000000 00000000 ## e_phoff Address of program header table 00 00 00 00 00 00 00 00 ## e_shoff Address of section header table 00 00 00 00 ## e_flags 40 00 ## e_ehsize Indicating our 64 Byte header 38 00 ## e_phentsize size of a program header table 01 00 ## e_phnum number of entries in program table 00 00 ## e_shentsize size of a section header table 00 00 ## e_shnum number of entries in section table 00 00 ## e_shstrndx index of the section names ## Program Header # :ELF_program_headers ; (0x600040) 01 00 00 00 ## p_type 07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 ## p_offset 00006000 00000000 ## p_vaddr 00006000 00000000 ## p_physaddr 6A020000 00000000 ## p_filesz 6A020000 00000000 ## p_memsz 01 00 00 00 00 00 00 00 ## Required alignment # :ELF_text # :_start ; (0x0600078) 58 ; pop_rax # Get the number of arguments 5F ; pop_rdi # Get the program name 5F ; pop_rdi # Get the actual input name 85FF ; test_edi,edi # Check for missing output 75 06 ; jne8 !_start_out # Have real input 50 ; push_rax # Need to adjust stack BF 48026000 ; mov_edi, &default_file # Use "kaem.amd64" # :_start_out ; (0x600085) 31F6 ; xor_esi,esi # prepare read_only 6A 02 ; push !2 58 ; pop_rax # the syscall number for open() 0F05 ; syscall # Now open that damn file 85C0 ; test_eax,eax # IF NULL We couldn't open the file 7E 66 ; jle8 !Exit_Failure # Abort hard 4989C5 ; mov_r13,rax # Set input pointer 58 ; pop_rax # Get start of envp 4889E5 ; mov_rbp,rsp # Protect envp 6A 0C ; push !12 58 ; pop_rax # the Syscall # for SYS_BRK 31FF ; xor_edi,edi # Get current brk 0F05 ; syscall # Let the kernel do the work 4989C6 ; mov_r14,rax # Set our malloc pointer # Where the main work gets done # Using RDI for tokens and RSI for tokens[i] # :main_loop ; (0x6000A1) 31C0 ; xor_eax,eax # Zero RAX B4 08 ; mov_ah, !0x8 # Using 256 char* of space (0x800) E8 33010000 ; call %malloc # get it 4889C7 ; mov_rdi,rax # set tokens 4889FE ; mov_rsi,rdi # I = 0 31C0 ; xor_eax,eax # Using 0 4D31FF ; xor_r15,r15 # command_done = 0 # Using RAX for result and RDI for tokens[i] # :collect_command ; (0x6000B5) E8 95000000 ; call %collect_token # Get another token 85C0 ; test_eax,eax # if NULL == result 74 07 ; je8 !collect_command_comment # It is a comment, don't store 488906 ; mov_[rsi],rax # tokens[i] = result 4883C6 08 ; add_rsi, !8 # i = i + 1 (adjusted for char* size) # :collect_command_comment ; (0x6000C5) 4D85FF ; test_r15,r15 # IF 0 == command_done 74 EB ; je8 !collect_command # keep looping # Deal with line comments 4839FE ; cmp_rsi,rdi # if 0 < i 74 D2 ; je8 !main_loop # It was a comment E8 4D000000 ; call %print_command # print the command 488B07 ; mov_rax,[rdi] # program = tokens[0] 85C0 ; test_eax,eax # IF NULL == program 74 1B ; je8 !Exit_Failure # Some shit went down, abort 57 ; push_rdi # Protect Tokens 6A 39 ; push !57 58 ; pop_rax # FORKing 0F05 ; syscall # int f = FORK() 5F ; pop_rdi # Restore Tokens 85C0 ; test_eax,eax # Check fork 7C 10 ; jl8 !Exit_Failure # IF f == -1 abort hard 75 13 ; jne8 !collect_command_parent # IF f == 0 it is child # Deal with child case 6A 3B ; push !59 58 ; pop_rax # EXECVE 4889EA ; mov_rdx,rbp # third arg = envp 4889FE ; mov_rsi,rdi # second arg = tokens 488B3F ; mov_rdi,[rdi] # program = tokens[0] 0F05 ; syscall # execve(program, tokens, envp); # Exit_Failure function # Receives nothing # And aborts hard # DOES NOT RETURN # :Exit_Failure ; (0x6000F6) 6A 01 ; push !1 5F ; pop_rdi # All is wrong EB 21 ; jmp8 !Exit # Exit # :collect_command_parent ; (0x6000FB) 4889C7 ; mov_rdi,rax # first arg = f 50 ; push_rax # allocate stack for status 4889E6 ; mov_rsi,rsp # second arg = &status 31D2 ; xor_edx,edx # third arg = NULL 6A 3D ; push !61 58 ; pop_rax # WAITPID 0F05 ; syscall # wait4pid(f, &status, 0, 0); [r10 = 0] 58 ; pop_rax # Using status 85C0 ; test_eax,eax # IF 0 == status 74 93 ; je8 !main_loop # Loop forever # Deal with very unhappy case B8 58026000 ; mov_eax, &hard # Using "Subprocess error\n" E8 F9000000 ; call %File_Print # Print it EB DC ; jmp8 !Exit_Failure # return error # :Done ; (0x60011A) # program completed Successfully 31FF ; xor_edi,edi # All is well # :Exit ; (0x60011C) 6A 3C ; push !60 # SYS_exit 58 ; pop_rax # put the exit syscall number in rax 0F05 ; syscall # Call it a day # print_command function # Receives tokens[j] in RDI and tokens[i] in RSI # Modifies RAX # :print_command ; (0x600121) 57 ; push_rdi # Protect RDI B8 53026000 ; mov_eax, &prefix # using " +> " E8 E5000000 ; call %File_Print # print it # :print_command_loop ; (0x60012C) 488B07 ; mov_rax,[rdi] # using tokens[j] E8 DD000000 ; call %File_Print # print it 4883C7 08 ; add_rdi, !8 # j = j + 1 6A 20 ; push !32 58 ; pop_rax # using ' ' E8 F0000000 ; call %fputc # print it 4839FE ; cmp_rsi,rdi # IF j < i 75 E7 ; jne8 !print_command_loop # otherwise keep looping 6A 0A ; push !10 58 ; pop_rax # using '\n' E8 E3000000 ; call %fputc # print it 5F ; pop_rdi # Restore RDI C3 ; ret # collect_token function # Receives nothing # Overwrites RAX # Uses RAX as C, RDI as token and RSI as token[i] # :collect_token ; (0x60014F) 57 ; push_rdi # Protect RDI 56 ; push_rsi # Protect RSI 31C0 ; xor_eax,eax # Zero RAX B4 10 ; mov_ah, !0x10 # max_string = 4096 * sizeof(char) E8 83000000 ; call %malloc # allocate space 4889C7 ; mov_rdi,rax # token = malloc(max_string); 4889C6 ; mov_rsi,rax # i = 0; set token[i] # :collect_token_loop ; (0x600160) E8 90000000 ; call %fgetc # c = fgetc(input); 3C FC ; cmp_al, !-4 # if C == EOF 74 B1 ; je8 !Done # We are done 3C 20 ; cmp_al, !32 # IF C == ' ' 74 37 ; je8 !collect_token_done # Space terminates token 3C 09 ; cmp_al, !9 # IF C == '\t' 74 33 ; je8 !collect_token_done # tab terminates token 3C 0A ; cmp_al, !10 # IF C == '\n' 75 02 ; jne8 !collect_token_string # otherwise check next # It is a newline EB 14 ; jmp8 !collect_token_set_command_done # Set command_done = TRUE # :collect_token_string ; (0x600177) 3C 22 ; cmp_al, !34 # IF C == '\"' 75 07 ; jne8 !collect_token_comment # otherwise check next # It is a RAW STRING E8 31000000 ; call %collect_string # Get the rest of the string EB 22 ; jmp8 !collect_token_done # Be done # :collect_token_comment ; (0x600182) 3C 23 ; cmp_al, !35 # IF C == '#' 75 0B ; jne8 !collect_token_escape # otherwise check next # It is a line comment E8 40000000 ; call %collect_comment # Read it all # : collect_token_set_command_done ; (0x60018B) 6A 01 ; push !1 415F ; pop_r15 # Set command_done = TRUE EB 13 ; jmp8 !collect_token_done # Be done # :collect_token_escape ; (0x600191) 3C 5C ; cmp_al, !92 # IF C == '\\' 75 07 ; jne8 !collect_token_other # otherwise just store it # It is an escape char E8 5B000000 ; call %fgetc # Read the char to drop EB 08 ; jmp8 !collect_token_done # Be done # :collect_token_other ; (0x60019C) 8806 ; mov_[rsi],al # token[i] = C 4883C6 01 ; add_rsi, !1 # i = i + 1 EB BC ; jmp8 !collect_token_loop # Keep going # :collect_token_done ; (0x6001A4) 4839FE ; cmp_rsi,rdi # IF i == 0 75 02 ; jne8 !collect_token_good # otherwise return the token 31FF ; xor_edi,edi # token = NULL # :collect_token_good ; (0x6001AB) 4889F8 ; mov_rax,rdi # Return token 5E ; pop_rsi # Restore RSI 5F ; pop_rdi # Restore RDI C3 ; ret # collect_string function # Receives target[index] in RSI # Modifies RAX # Uses RAX as C # :collect_string ; (0x6001B1) E8 3F000000 ; call %fgetc # C = fgetc(input) 3C FC ; cmp_al, !-4 # if C == EOF 0F84 38FFFFFF ; je %Exit_Failure # Something went horribly wrong 3C 22 ; cmp_al, !34 # IF C == '\"' 74 08 ; je8 !collect_string_done # be done # deal with inside of string 8806 ; mov_[rsi],al # target[index] = C 4883C6 01 ; add_rsi, !1 # index = index + 1 EB E7 ; jmp8 !collect_string # Keep going # :collect_string_done ; (0x6001CA) C3 ; ret # collect_comment function # Receives nothing # Modifies RAX # uses RAX as Int C # Just throws away everything it reads # :collect_comment ; (0x6001CB) E8 25000000 ; call %fgetc # C = fgetc(input) 3C FC ; cmp_al, !-4 # IF C == EOF 0F84 1EFFFFFF ; je %Exit_Failure # abort hard 3C 0A ; cmp_al, !10 # IF C == '\n' 75 EF ; jne8 !collect_comment # otherwise keep looping C3 ; ret # Malloc isn't actually required if the program being built fits in the initial memory # However, it doesn't take much to add it. # Requires [MALLOC] to be initialized and RAX to have the number of desired bytes # :malloc ; (0x6001DD) 57 ; push_rdi # Protect RDI 56 ; push_rsi # Protect RSI 52 ; push_rdx # Protect RDX 4C89F7 ; mov_rdi,r14 # Using the current pointer 4801C7 ; add_rdi,rax # Request the number of desired bytes 6A 0C ; push !12 58 ; pop_rax # the Syscall # for SYS_BRK 0F05 ; syscall # call the Kernel 4C89F0 ; mov_rax,r14 # Return pointer 4989FE ; mov_r14,rdi # Update pointer 5A ; pop_rdx # Restore RDX 5E ; pop_rsi # Restore RSI 5F ; pop_rdi # Restore RDI C3 ; ret # fgetc function # Loads FILE* from [script] # Returns -4 (EOF) or char in RAX # :fgetc ; (0x6001F5) 57 ; push_rdi # Protect RDI 56 ; push_rsi # Protect RSI 52 ; push_rdx # Protect RDX 6A FC ; push !-4 58 ; pop_rax # Put EOF in RAX 50 ; push_rax # Assume bad (If nothing read, value will remain EOF) 488D3424 ; lea_rsi,[rsp] # Get stack address 4C89EF ; mov_rdi,r13 # Where are we reading from 31C0 ; xor_eax,eax # the syscall number for read 6A 01 ; push !1 5A ; pop_rdx # set the size of chars we want 0F05 ; syscall # call the Kernel 58 ; pop_rax # Get either char or EOF 3C FC ; cmp_al, !-4 # Check for EOF # :fgetc_done ; (0x60020D) 5A ; pop_rdx # Restore RDX 5E ; pop_rsi # Restore RSI 5F ; pop_rdi # Restore RDI C3 ; ret # File_Print function # Receives CHAR* in RAX # calls fputc for every non-null char # :File_Print ; (0x600211) 57 ; push_rdi # Protect RDI 56 ; push_rsi # Protect RSI 4889C7 ; mov_rdi,rax # Protect S 85C0 ; test_eax,eax # Protect against nulls 74 13 ; je8 !File_Print_Done # Simply don't try to print them # :File_Print_Loop ; (0x60021A) 31C0 ; xor_eax,eax # Zero RAX 8A07 ; mov_al,[rdi] # Read byte 85C0 ; test_eax,eax # Check for NULL 74 0B ; je8 !File_Print_Done # Stop at NULL E8 09000000 ; call %fputc # write it 4883C7 01 ; add_rdi, !1 # S = S + 1 EB ED ; jmp8 !File_Print_Loop # Keep going # :File_Print_Done ; (0x60022D) 5E ; pop_rsi # Restore RSI 5F ; pop_rdi # Restore RDI C3 ; ret # fputc function # receives CHAR in RAX and load FILE* from stdout # writes char and returns # :fputc ; (0x600230) 57 ; push_rdi # Protect RDI 56 ; push_rsi # Protect RSI 52 ; push_rdx # Protect RDX 50 ; push_rax # We are writing rax 488D3424 ; lea_rsi,[rsp] # Get stack address 6A 01 ; push !1 5F ; pop_rdi # Write to target file 4889F8 ; mov_rax,rdi # the syscall number for write 4889FA ; mov_rdx,rdi # set the size of chars we want 0F05 ; syscall # call the Kernel 58 ; pop_rax # Restore stack 5A ; pop_rdx # Restore RDX 5E ; pop_rsi # Restore RSI 5F ; pop_rdi # Restore RDI C3 ; ret # :default_file ; (0x600248) 6B 61 65 6D 2E 61 6D 64 36 34 00 ; "kaem.amd64" # :prefix ; (0x600253) 20 2B 3E 20 00 ; " +> " # :hard ; (0x600258) 53 75 62 70 72 6F 63 65 73 73 20 65 72 72 6F 72 0A 00 ; "Subprocess error\n" # :ELF_end ; (0x60026A)
#! /usr/bin/env bash # Mes --- Maxwell Equations of Software # Copyright © 2017 Jan Nieuwenhuizen <janneke@gnu.org> # Copyright © 2017 Jeremiah Orians # # This file is part of Mes. # # Mes is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at # your option) any later version. # # Mes is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Mes. If not, see <http://www.gnu.org/licenses/>. # Can also be run by kaem or any other shell of your personal choice # To run in kaem simply: kaem --verbose --strict # Warning all binaries prior to the use of blood-elf will not be readable by # Objdump, you may need to use ndism or gdb to view the assembly in the binary. ################################# # Phase-1 Build hex1 from hex0 # ################################# ./AMD64/artifact/hex0 ./AMD64/hex1_AMD64.hex0 ./AMD64/artifact/hex1 # hex1 adds support for single character labels and is available in various forms # in mescc-tools/amd64_bootstrap to allow you various ways to verify correctness ################################# # Phase-2 Build hex2 from hex1 # ################################# ./AMD64/artifact/hex1 ./AMD64/hex2_AMD64.hex1 ./AMD64/artifact/hex2-0 # hex2 adds support for long labels and absolute addresses thus allowing it # to function as an effective linker for later stages of the bootstrap # This is a minimal version which will be used to bootstrap a much more advanced # version in a later stage. ################################# # Phase-2b Build catm from hex2 # ################################# ./AMD64/artifact/hex2-0 ./AMD64/catm_AMD64.hex2 ./AMD64/artifact/catm # catm removes the need for cat or shell support for redirection by providing # equivalent functionality via catm output_file input1 input2 ... inputN ############################### # Phase-3 Build M0 from hex2 # ############################### ./AMD64/artifact/catm ./AMD64/artifact/M0.hex2 ./AMD64/ELF-amd64.hex2 ./AMD64/M0_AMD64.hex2 ./AMD64/artifact/hex2-0 ./AMD64/artifact/M0.hex2 ./AMD64/artifact/M0 # M0 is the architecture specific version of M1 and is by design single # architecture only and will be replaced by the C code version of M1 ################################### # Phase-4 Build cc_amd64 from M0 # ################################### ./AMD64/artifact/M0 ./AMD64/cc_amd64.M1 ./AMD64/artifact/cc_amd64.hex2 ./AMD64/artifact/catm ./AMD64/artifact/cc_amd64-0.hex2 ./AMD64/ELF-amd64.hex2 ./AMD64/artifact/cc_amd64.hex2 ./AMD64/artifact/hex2-0 ./AMD64/artifact/cc_amd64-0.hex2 ./AMD64/artifact/cc_amd64 ######################################### # Phase-5 Build M2-Planet from cc_amd64 # ######################################### ./AMD64/artifact/catm ./AMD64/artifact/M2-0.c \ ./M2libc/amd64/linux/bootstrap.c \ ./M2-Planet/cc.h \ ./M2libc/bootstrappable.c \ ./M2-Planet/cc_globals.c \ ./M2-Planet/cc_reader.c \ ./M2-Planet/cc_strings.c \ ./M2-Planet/cc_types.c \ ./M2-Planet/cc_core.c \ ./M2-Planet/cc_macro.c \ ./M2-Planet/cc.c ./AMD64/artifact/cc_amd64 ./AMD64/artifact/M2-0.c ./AMD64/artifact/M2-0.M1 ./AMD64/artifact/catm ./AMD64/artifact/M2-0-0.M1 ./AMD64/amd64_defs.M1 ./AMD64/libc-core.M1 ./AMD64/artifact/M2-0.M1 ./AMD64/artifact/M0 ./AMD64/artifact/M2-0-0.M1 ./AMD64/artifact/M2-0.hex2 ./AMD64/artifact/catm ./AMD64/artifact/M2-0-0.hex2 ./AMD64/ELF-amd64.hex2 ./AMD64/artifact/M2-0.hex2 ./AMD64/artifact/hex2-0 ./AMD64/artifact/M2-0-0.hex2 ./AMD64/artifact/M2 ############################################# # Phase-6 Build blood-elf-0 from C sources # ############################################# ./AMD64/artifact/M2 --architecture amd64 \ -f ./M2libc/amd64/linux/bootstrap.c \ -f ./M2libc/bootstrappable.c \ -f ./mescc-tools/stringify.c \ -f ./mescc-tools/blood-elf.c \ --bootstrap-mode\ -o ./AMD64/artifact/blood-elf-0.M1 ./AMD64/artifact/catm ./AMD64/artifact/blood-elf-0-0.M1 ./M2libc/amd64/amd64_defs.M1 ./M2libc/amd64/libc-core.M1 ./AMD64/artifact/blood-elf-0.M1 ./AMD64/artifact/M0 ./AMD64/artifact/blood-elf-0-0.M1 ./AMD64/artifact/blood-elf-0.hex2 ./AMD64/artifact/catm ./AMD64/artifact/blood-elf-0-0.hex2 ./M2libc/amd64/ELF-amd64.hex2 ./AMD64/artifact/blood-elf-0.hex2 ./AMD64/artifact/hex2-0 ./AMD64/artifact/blood-elf-0-0.hex2 ./AMD64/artifact/blood-elf-0 # This is the last stage where the binaries will not have debug info # and the last piece built that isn't part of the output binaries ##################################### # Phase-7 Build M1-0 from C sources # ##################################### ./AMD64/artifact/M2 --architecture amd64 \ -f ./M2libc/amd64/linux/bootstrap.c \ -f ./M2libc/bootstrappable.c \ -f ./mescc-tools/stringify.c \ -f ./mescc-tools/M1-macro.c \ --bootstrap-mode\ --debug \ -o ./AMD64/artifact/M1-macro-0.M1 ./AMD64/artifact/blood-elf-0 --64 --little-endian -f ./AMD64/artifact/M1-macro-0.M1 -o ./AMD64/artifact/M1-macro-0-footer.M1 ./AMD64/artifact/catm ./AMD64/artifact/M1-macro-0-0.M1 ./M2libc/amd64/amd64_defs.M1 ./M2libc/amd64/libc-core.M1 ./AMD64/artifact/M1-macro-0.M1 ./AMD64/artifact/M1-macro-0-footer.M1 ./AMD64/artifact/M0 ./AMD64/artifact/M1-macro-0-0.M1 ./AMD64/artifact/M1-macro-0.hex2 ./AMD64/artifact/catm ./AMD64/artifact/M1-macro-0-0.hex2 ./M2libc/amd64/ELF-amd64-debug.hex2 ./AMD64/artifact/M1-macro-0.hex2 ./AMD64/artifact/hex2-0 ./AMD64/artifact/M1-macro-0-0.hex2 ./AMD64/artifact/M1-0 # This is the last stage where catm will need to be used and the last stage where # M0 is used, as we will being using it's much more powerful and cross-platform # version with a bunch of extra goodies. ####################################### # Phase-8 Build hex2-1 from C sources # ####################################### ./AMD64/artifact/M2 --architecture amd64 \ -f ./M2libc/sys/types.h \ -f ./M2libc/stddef.h \ -f ./M2libc/amd64/linux/fcntl.c \ -f ./M2libc/fcntl.c \ -f ./M2libc/sys/utsname.h \ -f ./M2libc/amd64/linux/unistd.c \ -f ./M2libc/amd64/linux/sys/stat.c \ -f ./M2libc/stdlib.c \ -f ./M2libc/stdio.h \ -f ./M2libc/stdio.c \ -f ./M2libc/bootstrappable.c \ -f ./mescc-tools/hex2.h \ -f ./mescc-tools/hex2_linker.c \ -f ./mescc-tools/hex2_word.c \ -f ./mescc-tools/hex2.c \ --debug \ -o ./AMD64/artifact/hex2_linker-1.M1 ./AMD64/artifact/blood-elf-0 --64 --little-endian -f ./AMD64/artifact/hex2_linker-1.M1 -o ./AMD64/artifact/hex2_linker-1-footer.M1 ./AMD64/artifact/M1-0 --architecture amd64 \ --little-endian \ -f ./M2libc/amd64/amd64_defs.M1 \ -f ./M2libc/amd64/libc-full.M1 \ -f ./AMD64/artifact/hex2_linker-1.M1 \ -f ./AMD64/artifact/hex2_linker-1-footer.M1 \ -o ./AMD64/artifact/hex2_linker-1.hex2 ./AMD64/artifact/catm ./AMD64/artifact/hex2_linker-1-0.hex2 ./M2libc/amd64/ELF-amd64-debug.hex2 ./AMD64/artifact/hex2_linker-1.hex2 ./AMD64/artifact/hex2-0 ./AMD64/artifact/hex2_linker-1-0.hex2 ./AMD64/artifact/hex2-1 # This is the last stage where we will be using the handwritten hex2 and instead # be using the far more powerful, cross-platform version with a bunch more goodies ################################### # Phase-9 Build M1 from C sources # ################################### ./AMD64/artifact/M2 --architecture amd64 \ -f ./M2libc/sys/types.h \ -f ./M2libc/stddef.h \ -f ./M2libc/amd64/linux/fcntl.c \ -f ./M2libc/fcntl.c \ -f ./M2libc/sys/utsname.h \ -f ./M2libc/amd64/linux/unistd.c \ -f ./M2libc/string.c \ -f ./M2libc/stdlib.c \ -f ./M2libc/stdio.h \ -f ./M2libc/stdio.c \ -f ./M2libc/bootstrappable.c \ -f ./mescc-tools/stringify.c \ -f ./mescc-tools/M1-macro.c \ --debug \ -o ./AMD64/artifact/M1-macro-1.M1 ./AMD64/artifact/blood-elf-0 --64 --little-endian -f ./AMD64/artifact/M1-macro-1.M1 -o ./AMD64/artifact/M1-macro-1-footer.M1 ./AMD64/artifact/M1-0 --architecture amd64 \ --little-endian \ -f ./M2libc/amd64/amd64_defs.M1 \ -f ./M2libc/amd64/libc-full.M1 \ -f ./AMD64/artifact/M1-macro-1.M1 \ -f ./AMD64/artifact/M1-macro-1-footer.M1 \ -o ./AMD64/artifact/M1-macro-1.hex2 ./AMD64/artifact/hex2-1 --architecture amd64 \ --little-endian \ --base-address 0x00600000 \ -f ./M2libc/amd64/ELF-amd64-debug.hex2 \ -f ./AMD64/artifact/M1-macro-1.hex2 \ -o ./AMD64/bin/M1 ###################################### # Phase-10 Build hex2 from C sources # ###################################### ./AMD64/artifact/M2 --architecture amd64 \ -f ./M2libc/sys/types.h \ -f ./M2libc/stddef.h \ -f ./M2libc/amd64/linux/fcntl.c \ -f ./M2libc/fcntl.c \ -f ./M2libc/sys/utsname.h \ -f ./M2libc/amd64/linux/unistd.c \ -f ./M2libc/amd64/linux/sys/stat.c \ -f ./M2libc/stdlib.c \ -f ./M2libc/stdio.h \ -f ./M2libc/stdio.c \ -f ./M2libc/bootstrappable.c \ -f ./mescc-tools/hex2.h \ -f ./mescc-tools/hex2_linker.c \ -f ./mescc-tools/hex2_word.c \ -f ./mescc-tools/hex2.c \ --debug \ -o ./AMD64/artifact/hex2_linker-2.M1 ./AMD64/artifact/blood-elf-0 --64 --little-endian -f ./AMD64/artifact/hex2_linker-2.M1 -o ./AMD64/artifact/hex2_linker-2-footer.M1 ./AMD64/bin/M1 --architecture amd64 \ --little-endian \ -f ./M2libc/amd64/amd64_defs.M1 \ -f ./M2libc/amd64/libc-full.M1 \ -f ./AMD64/artifact/hex2_linker-2.M1 \ -f ./AMD64/artifact/hex2_linker-2-footer.M1 \ -o ./AMD64/artifact/hex2_linker-2.hex2 ./AMD64/artifact/hex2-1 --architecture amd64 \ --little-endian \ --base-address 0x00600000 \ -f ./M2libc/amd64/ELF-amd64-debug.hex2 \ -f ./AMD64/artifact/hex2_linker-2.hex2 \ -o ./AMD64/bin/hex2 ##################################### # Phase-11 Build kaem from C sources# ##################################### ./AMD64/artifact/M2 --architecture amd64 \ -f ./M2libc/sys/types.h \ -f ./M2libc/stddef.h \ -f ./M2libc/string.c \ -f ./M2libc/amd64/linux/fcntl.c \ -f ./M2libc/fcntl.c \ -f ./M2libc/sys/utsname.h \ -f ./M2libc/amd64/linux/unistd.c \ -f ./M2libc/stdlib.c \ -f ./M2libc/stdio.h \ -f ./M2libc/stdio.c \ -f ./M2libc/bootstrappable.c \ -f ./mescc-tools/Kaem/kaem.h \ -f ./mescc-tools/Kaem/variable.c \ -f ./mescc-tools/Kaem/kaem_globals.c \ -f ./mescc-tools/Kaem/kaem.c \ --debug \ -o ./AMD64/artifact/kaem.M1 ./AMD64/artifact/blood-elf-0 --64 --little-endian -f ./AMD64/artifact/kaem.M1 -o ./AMD64/artifact/kaem-footer.M1 ./AMD64/bin/M1 --architecture amd64 \ --little-endian \ -f ./M2libc/amd64/amd64_defs.M1 \ -f ./M2libc/amd64/libc-full.M1 \ -f ./AMD64/artifact/kaem.M1 \ -f ./AMD64/artifact/kaem-footer.M1 \ -o ./AMD64/artifact/kaem.hex2 ./AMD64/bin/hex2 --architecture amd64 \ --little-endian \ --base-address 0x00600000 \ -f ./M2libc/amd64/ELF-amd64-debug.hex2 \ -f ./AMD64/artifact/kaem.hex2 \ -o ./AMD64/bin/kaem
# SPDX-FileCopyrightText: 2017 Jeremiah Orians <jeremiah@pdp10.guru> # SPDX-FileCopyrightText: 2023 Andrius Å tikonas <andrius@stikonas.eu> # # SPDX-License-Identifier: GPL-3.0-or-later ## ELF Header #:ELF_base 7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number 02 ## e_ident[EI_CLASS] Indicating 64 bit 01 ## e_ident[EI_DATA] Indicating little endianness 01 ## e_ident[EI_VERSION] Indicating original elf 03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares 00 00 00 00 00 00 00 ## e_ident[EI_PAD] 02 00 ## e_type Indicating Executable 3E 00 ## e_machine Indicating AMD64 01 00 00 00 ## e_version Indicating original elf 78 00 60 00 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address) 40 00 00 00 00 00 00 00 ## e_phoff Address of program header table 00 00 00 00 00 00 00 00 ## e_shoff Address of section header table 00 00 00 00 ## e_flags 40 00 ## e_ehsize Indicating our 64 Byte header 38 00 ## e_phentsize size of a program header table 01 00 ## e_phnum number of entries in program table 00 00 ## e_shentsize size of a section header table 00 00 ## e_shnum number of entries in section table 00 00 ## e_shstrndx index of the section names ## Program Header #:ELF_program_headers 01 00 00 00 ## p_type 07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 ## p_offset 00 00 60 00 00 00 00 00 ## p_vaddr 00 00 60 00 00 00 00 00 ## p_physaddr 6E 02 00 00 00 00 00 00 ## p_filesz 6E 02 00 00 00 00 00 00 ## p_memsz 01 00 00 00 00 00 00 00 ## Required alignment #:ELF_text # Where the ELF Header is going to hit # Simply jump to _start # Our main function # :_start (0x600078) 58 ; pop_rax # Get the number of arguments 5F ; pop_rdi # Get the program name$ 5F ; pop_rdi # Get the actual input name 48C7C6 00000000 ; mov_rsi, %0 # prepare read_only 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that damn file 4989C1 ; mov_r9,rax # Preserve the file pointer we were given 5F ; pop_rdi # Get the actual output name 48C7C6 41020000 ; mov_rsi, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC 48C7C2 C0010000 ; mov_rdx, %448 # Prepare file as RWX for owner only (700 in octal) 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that damn file 4989C2 ; mov_r10,rax # Preserve the file pointer we were given 49C7C7 FFFFFFFF ; mov_r15, %-1 # Our flag for byte processing 49C7C6 00000000 ; mov_r14, %0 # temp storage for the sum 49C7C5 00000000 ; mov_r13, %0 # Our starting IP E8 39000000 ; call %First_pass # Process it # rewind input file 4C89CF ; mov_rdi,r9 # Using our input file 48C7C6 00000000 ; mov_rsi, %0 # Offset Zero 48C7C2 00000000 ; mov_rdx, %0 # Whence Zero 48C7C0 08000000 ; mov_rax, %8 # lseek 0F05 ; syscall 49C7C7 FFFFFFFF ; mov_r15, %-1 # Our flag for byte processing 49C7C6 00000000 ; mov_r14, %0 # temp storage for the sum 49C7C5 00000000 ; mov_r13, %0 # Our starting IP E8 69000000 ; call %Second_pass # Process it E9 E4000000 ; jmp %Done # :First_pass (0x6000FC) E8 EF000000 ; call %Read_byte # Deal with EOF 3C FC ; cmp_al, !-4 74 34 ; je8 !First_pass_done # Check for : 3C 3A ; cmp_al, !0x3a 75 05 ; jne8 !First_pass_0 # Deal with label E8 32010000 ; call %StoreLabel # :First_pass_0 (0x60010E) # Check for % 3C 25 ; cmp_al, !0x25 74 1C ; je8 !First_pass_pointer # Deal with everything else E8 23000000 ; call %hex # Process our char # Deal with EOF 3C FC ; cmp_al, !-4 74 1E ; je8 !First_pass_done # deal with -1 values 3C 00 ; cmp_al, !0 7C DD ; jl8 !First_pass # deal with toggle 4983FF 00 ; cmp_r15, !0 74 04 ; je8 !First_pass_1 4983C5 01 ; add_r13, !1 # Increment IP # :First_pass_1 (0x600129) 49F7D7 ; not_r15 EB CE ; jmp8 !First_pass # :First_pass_pointer (0x60012E) # Deal with Pointer to label E8 BD000000 ; call %Read_byte # Drop the char 4983C5 04 ; add_r13, !4 # Increment IP EB C3 ; jmp8 !First_pass # Loop again # :First_pass_done (0x600139) C3 ; ret # :hex (0x60013A) # deal with EOF 3C FC ; cmp_al, !-4 74 60 ; je8 !EOF # deal with line comments starting with # 3C 23 ; cmp_al, !0x23 74 69 ; je8 !ascii_comment # deal with line comments starting with ; 3C 3B ; cmp_al, !0x3b 74 65 ; je8 !ascii_comment # deal all ascii less than 0 3C 30 ; cmp_al, !0x30 7C 5E ; jl8 !ascii_other # deal with 0-9 3C 3A ; cmp_al, !0x3a 7C 51 ; jl8 !ascii_num # deal with all ascii less than A 3C 41 ; cmp_al, !0x41 7C 56 ; jl8 !ascii_other # deal with A-F 3C 47 ; cmp_al, !0x47 7C 4F ; jl8 !ascii_high # deal with all ascii less than a 3C 61 ; cmp_al, !0x61 7C 4E ; jl8 !ascii_other # deal with a-f 3C 67 ; cmp_al, !0x67 7C 44 ; jl8 !ascii_low # The rest that remains needs to be ignored EB 48 ; jmp8 !ascii_other # :Second_pass (0x600160) E8 8B000000 ; call %Read_byte # Deal with EOF 3C FC ; cmp_al, !-4 74 35 ; je8 !Second_pass_done # Simply drop the label 3C 3A ; cmp_al, !0x3a 75 07 ; jne8 !Second_pass_0 E8 7E000000 ; call %Read_byte EB EC ; jmp8 !Second_pass # :Second_pass_0 (0x600174) # Deal with % pointer 3C 25 ; cmp_al, !0x25 75 07 ; jne8 !Second_pass_1 E8 CE000000 ; call %StorePointer EB E1 ; jmp8 !Second_pass # :Second_pass_1 (0x60017F) # Deal with everything else E8 B6FFFFFF ; call %hex # Process our char # Deal with EOF 3C FC ; cmp_al, !-4 74 16 ; je8 !Second_pass_done # deal with -1 values 3C 00 ; cmp_al, !0 7C D4 ; jl8 !Second_pass # deal with toggle 4983FF 00 ; cmp_r15, !0 74 29 ; je8 !print # process first byte of pair 4989C6 ; mov_r14,rax 49C7C7 00000000 ; mov_r15, %0 EB C2 ; jmp8 !Second_pass # :Second_pass_done (0x60019E) # :EOF C3 ; ret # :ascii_num (0x60019F) 2C 30 ; sub_al, !0x30 C3 ; ret # :ascii_low (0x6001A2) 2C 57 ; sub_al, !0x57 C3 ; ret # :ascii_high (0x6001A5) 2C 37 ; sub_al, !0x37 C3 ; ret # :ascii_other (0x6001A8) B0 FF ; mov_al, !-1 C3 ; ret # :ascii_comment (0x6001AB) E8 40000000 ; call %Read_byte 3C 0D ; cmp_al, !0xd 74 04 ; je8 !ascii_comment_cr 3C 0A ; cmp_al, !0xa 75 F3 ; jne8 !ascii_comment # :ascii_comment_cr (0x6001B8) B0 FF ; mov_al, !-1 C3 ; ret # process second byte of pair # :print (0x6001BB) # update the sum and store in output 49C1E6 04 ; shl_r14, !4 4C01F0 ; add_rax,r14 8805 A6000000 ; mov_[rip+DWORD],al %table # flip the toggle 49F7D7 ; not_r15 # Print our first Hex 48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want E8 41000000 ; call %print_chars 4983C5 01 ; add_r13, !1 # Increment IP E9 80FFFFFF ; jmp %Second_pass # :Done (0x6001E0) # program completed Successfully 48C7C7 00000000 ; mov_rdi, %0 # All is well 48C7C0 3C000000 ; mov_rax, %0x3c # put the exit syscall number in rax 0F05 ; syscall # Call it a good day # :Read_byte (0x6001F0) # Attempt to read 1 byte from STDIN 48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want 488D35 70000000 ; lea_rsi,[rip+DWORD] %table # Where to put it 4C89CF ; mov_rdi,r9 # Where are we reading from 31C0 ; xor_eax,eax # the syscall number for read 0F05 ; syscall # call the Kernel 4885C0 ; test_rax,rax # check what we got 74 0B ; je8 !Read_byte_1 # Got EOF call it done # load byte 8A05 5E000000 ; mov_al,[rip+DWORD] %table # load char 480FB6C0 ; movzx_rax,al # We have to zero extend it to use it C3 ; ret # Deal with EOF # :Read_byte_1 (0x600215) B0 FC ; mov_al, !-4 # Put EOF in rax C3 ; ret # :print_chars (0x600218) 488D35 4F000000 ; lea_rsi,[rip+DWORD] %table # What we are writing 4C89D7 ; mov_rdi,r10 # Write to target file 48C7C0 01000000 ; mov_rax, %1 # the syscall number for write 0F05 ; syscall # call the Kernel C3 ; ret # :Get_table_target (0x60022C) E8 BFFFFFFF ; call %Read_byte # Get single char label 48C1E0 03 ; shl_rax, !3 # Each label in table takes 8 bytes to store 488D0D 32000000 ; lea_rcx,[rip+DWORD] %table # Get table 4801C8 ; add_rax,rcx # Calculate offset C3 ; ret # :StoreLabel (0x600240) E8 E7FFFFFF ; call %Get_table_target 4C8928 ; mov_[rax],r13 # Write out pointer to table 31C0 ; xor_eax,eax # wipe higher bits of rax, so that cmp al works C3 ; ret # :StorePointer (0x60024B) 4983C5 04 ; add_r13, !4 # Increment IP E8 D8FFFFFF ; call %Get_table_target # Get address of pointer 488B00 ; mov_rax,[rax] # Get pointer 4C29E8 ; sub_rax,r13 # target - ip 488905 0D000000 ; mov_[rip+DWORD],rax %table # put value in output 48C7C2 04000000 ; mov_rdx, %4 # set the size of chars we want E8 ABFFFFFF ; call %print_chars C3 ; ret # :table (0x60026E) # :ELF_end
# SPDX-FileCopyrightText: 2016 Jeremiah Orians <jeremiah@pdp10.guru> # SPDX-FileCopyrightText: 2017 Jan Nieuwenhuizen <janneke@gnu.org> # # SPDX-License-Identifier: GPL-3.0-or-later ## ELF Header # :ELF_base 7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number 02 ## e_ident[EI_CLASS] Indicating 64 bit 01 ## e_ident[EI_DATA] Indicating little endianness 01 ## e_ident[EI_VERSION] Indicating original elf 03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares 00 00 00 00 00 00 00 ## e_ident[EI_PAD] 02 00 ## e_type Indicating Executable 3E 00 ## e_machine Indicating AMD64 01 00 00 00 ## e_version Indicating original elf 78 00 60 00 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address) 40 00 00 00 00 00 00 00 ## e_phoff Address of program header table 00 00 00 00 00 00 00 00 ## e_shoff Address of section header table 00 00 00 00 ## e_flags 40 00 ## e_ehsize Indicating our 64 Byte header 38 00 ## e_phentsize size of a program header table 01 00 ## e_phnum number of entries in program table 00 00 ## e_shentsize size of a section header table 00 00 ## e_shnum number of entries in section table 00 00 ## e_shstrndx index of the section names ## Program Header # :ELF_program_headers 01 00 00 00 ## p_type 07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 ## p_offset 00 00 60 00 00 00 00 00 ## p_vaddr 00 00 60 00 00 00 00 00 ## p_physaddr EF 05 00 00 00 00 00 00 ## p_filesz EF 05 00 00 00 00 00 00 ## p_memsz 01 00 00 00 00 00 00 00 ## Required alignment # :ELF_text # Where the ELF Header is going to hit # Simply jump to _start # Our main function # Register usage: # RAX, RDX, RSI, RDI => Temps # R15 => Flag # R14 => High bits # R13 => IP # R12 => MALLOC # R11 => HEAD # Struct format: (size 24) # NEXT => 0 # TARGET => 8 # NAME => 16 # :_start 48C7C7 00000000 ; mov_rdi, %0 # Get current pointer E8 %w ; call %malloc # Get current HEAP 4889C7 ; mov_rdi,rax # Using current 4989C4 ; mov_r12,rax # Setup MALLOC 4881C7 00008000 ; add_rdi, %8388608 # Create space for temp [8MB] E8 %w ; call %malloc # Give ourselves 8192000 bytes to work with 4C8925 %T ; mov_[rip+DWORD],r12 %scratch # Allocate space for scratch area 4981C4 00080000 ; add_r12, %0x800 # 2 KiB of scratch 58 ; pop_rax # Get the number of arguments 5F ; pop_rdi # Get the program name 5F ; pop_rdi # Get the actual input name 48C7C6 00000000 ; mov_rsi, %0 # prepare read_only 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that damn file 4989C1 ; mov_r9,rax # Preserve the file pointer we were given 5F ; pop_rdi # Get the actual output name 48C7C6 41020000 ; mov_rsi, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC 48C7C2 C0010000 ; mov_rdx, %448 # Prepare file as RWX for owner only (700 in octal) 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that damn file 4883F8 00 ; cmp_rax, !0 # Check for missing output 0F8F %R ; jg %_start_out # Have real input 48C7C0 01000000 ; mov_rax, %1 # Use stdout :R # :_start_out 4989C2 ; mov_r10,rax # Preserve the file pointer we were given E8 %H ; call %ClearScratch # Zero scratch 49C7C7 FFFFFFFF ; mov_r15, %-1 # Our flag for byte processing 49C7C6 00000000 ; mov_r14, %0 # temp storage for the sum 49C7C5 00006000 ; mov_r13, %0x00600000 # Our starting IP 49C7C3 00000000 ; mov_r11, %0 # HEAD = NULL E8 %a ; call %First_pass # Process it # rewind input file 4C89CF ; mov_rdi,r9 # Using our input file 48C7C6 00000000 ; mov_rsi, %0 # Offset Zero 48C7C2 00000000 ; mov_rdx, %0 # Whence Zero 48C7C0 08000000 ; mov_rax, %8 # lseek 4153 ; push_r11 # Protect HEAD 0F05 ; syscall 415B ; pop_r11 # Restore HEAD 49C7C7 FFFFFFFF ; mov_r15, %-1 # Our flag for byte processing 49C7C6 00000000 ; mov_r14, %0 # temp storage for the sum 49C7C5 00006000 ; mov_r13, %0x00600000 # Our starting IP E8 %k ; call %Second_pass # Process it E9 %v ; jmp %Done :a # :First_pass E8 %x ; call %Read_byte # Deal with EOF 4883F8 FC ; cmp_rax, !-4 0F84 %i ; je %First_pass_done # Check for : 4883F8 3A ; cmp_rax, !0x3A 0F85 %b ; jne %First_pass_0 # Deal with label E9 %C ; jmp %StoreLabel :b # :First_pass_0 # Check for ! 4883F8 21 ; cmp_rax, !0x21 0F84 %h ; je %First_pass_pointer # Check for @ 4883F8 40 ; cmp_rax, !0x40 0F84 %h ; je %First_pass_pointer # Check for $ 4883F8 24 ; cmp_rax, !0x24 0F84 %h ; je %First_pass_pointer # Check for % 4883F8 25 ; cmp_rax, !0x25 0F84 %h ; je %First_pass_pointer # Check for & 4883F8 26 ; cmp_rax, !0x26 0F84 %h ; je %First_pass_pointer # Deal with everything else E8 %j ; call %hex # Process our char # Deal with EOF 4883F8 FC ; cmp_rax, !-4 0F84 %i ; je %First_pass_done # deal with -1 values 4883F8 00 ; cmp_rax, !0 0F8C %a ; jl %First_pass # deal with toggle 4983FF 00 ; cmp_r15, !0 0F84 %c ; je %First_pass_1 4983C5 01 ; add_r13, !1 # Increment IP :c # :First_pass_1 49F7D7 ; not_r15 E9 %a ; jmp %First_pass :d # :Update_Pointer # Check for ! 4883F8 21 ; cmp_rax, !0x21 0F84 %g ; je %Update_Pointer_1 # Check for @ 4883F8 40 ; cmp_rax, !0x40 0F84 %f ; je %Update_Pointer_2 # Check for $ 4883F8 24 ; cmp_rax, !0x24 0F84 %f ; je %Update_Pointer_2 # Check for % 4883F8 25 ; cmp_rax, !0x25 0F84 %e ; je %Update_Pointer_4 # Check for & 4883F8 26 ; cmp_rax, !0x26 0F84 %e ; je %Update_Pointer_4 # deal with bad input E8 %Q # call %fail :e # :Update_Pointer_4 4983C5 02 ; add_r13, !2 # Increment IP :f # :Update_Pointer_2 4983C5 01 ; add_r13, !1 # Increment IP :g # :Update_Pointer_1 4983C5 01 ; add_r13, !1 # Increment IP C3 ; ret :h # :First_pass_pointer # Deal with Pointer to label E8 %d ; call %Update_Pointer # Increment IP 488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Using scratch E8 %A ; call %consume_token # Read token E8 %H ; call %ClearScratch # Throw away token 4883F8 3E ; cmp_rax, !0x3E # check for '>' 0F85 %a ; jne %First_pass # Loop again # Deal with %label>label case 488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Write to scratch E8 %A ; call %consume_token # get token E8 %H ; call %ClearScratch # Clean up after ourselves E9 %a ; jmp %First_pass # Loop again :i # :First_pass_done C3 ; ret :j # :hex # deal with EOF 4883F8 FC ; cmp_rax, !-4 0F84 %n ; je %EOF # deal with line comments starting with # 4883F8 23 ; cmp_rax, !0x23 0F84 %s ; je %ascii_comment # deal with line comments starting with ; 4883F8 3B ; cmp_rax, !0x3B 0F84 %s ; je %ascii_comment # deal all ascii less than 0 4883F8 30 ; cmp_rax, !0x30 0F8C %r ; jl %ascii_other # deal with 0-9 4883F8 3A ; cmp_rax, !0x3A 0F8C %o ; jl %ascii_num # deal with all ascii less than A 4883F8 41 ; cmp_rax, !0x41 0F8C %r ; jl %ascii_other # deal with A-F 4883F8 47 ; cmp_rax, !0x47 0F8C %q ; jl %ascii_high # deal with all ascii less than a 4883F8 61 ; cmp_rax, !0x61 0F8C %r ; jl %ascii_other # deal with a-f 4883F8 67 ; cmp_rax, !0x67 0F8C %p ; jl %ascii_low # The rest that remains needs to be ignored E9 %r ; jmp %ascii_other :k # :Second_pass E8 %x ; call %Read_byte # Deal with EOF 4883F8 FC ; cmp_rax, !-4 0F84 %m ; je %Second_pass_done # Simply drop the label 4883F8 3A ; cmp_rax, !0x3A 0F85 %l ; jne %Second_pass_0 488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Using scratch E8 %A ; call %consume_token # Read token E8 %H ; call %ClearScratch # Throw away token E9 %k ; jmp %Second_pass :l # :Second_pass_0 # Deal with % pointer 4883F8 25 ; cmp_rax, !0x25 0F84 %L ; je %StorePointer_rel4 # Deal with @ pointer 4883F8 40 ; cmp_rax, !0x40 0F84 %M ; je %StorePointer_rel2 # Deal with ! pointer 4883F8 21 ; cmp_rax, !0x21 0F84 %N ; je %StorePointer_rel1 # Deal with & pointer 4883F8 26 ; cmp_rax, !0x26 0F84 %O ; je %StorePointer_abs4 # Deal with $ pointer 4883F8 24 ; cmp_rax, !0x24 0F84 %P ; je %StorePointer_abs2 # :Second_pass_1 # Deal with everything else E8 %j ; call %hex # Process our char # Deal with EOF 4883F8 FC ; cmp_rax, !-4 0F84 %m ; je %Second_pass_done # deal with -1 values 4883F8 00 ; cmp_rax, !0 0F8C %k ; jl %Second_pass # deal with toggle 4983FF 00 ; cmp_r15, !0 0F84 %u ; je %print # process first byte of pair 4989C6 ; mov_r14,rax 49C7C7 00000000 ; mov_r15, %0 E9 %k ; jmp %Second_pass :m # :Second_pass_done :n # :EOF C3 ; ret :o # :ascii_num 83E8 30 ; sub_rax, !0x30 C3 ; ret :p # :ascii_low 83E8 57 ; sub_rax, !0x57 C3 ; ret :q # :ascii_high 83E8 37 ; sub_rax, !0x37 C3 ; ret :r # :ascii_other 48C7C0 FFFFFFFF ; mov_rax, %-1 C3 ; ret :s # :ascii_comment E8 %x ; call %Read_byte 4883F8 0D ; cmp_rax, !0x0D 0F84 %t ; je %ascii_comment_cr 4883F8 0A ; cmp_rax, !0x0A 0F85 %s ; jne %ascii_comment :t # :ascii_comment_cr 48C7C0 FFFFFFFF ; mov_rax, %-1 C3 ; ret # process second byte of pair :u # :print # update the sum and store in output 49C1E6 04 ; shl_r14, !4 4C01F0 ; add_rax,r14 # flip the toggle 49F7D7 ; not_r15 # Print our first Hex 48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want E8 %z ; call %print_chars 4983C5 01 ; add_r13, !1 # Increment IP E9 %k ; jmp %Second_pass :v # :Done # program completed Successfully 48C7C7 00000000 ; mov_rdi, %0 # All is well 48C7C0 3C000000 ; mov_rax, %0x3C # put the exit syscall number in eax 0F05 ; syscall # Call it a good day # Malloc isn't actually required if the program being built fits in the initial memory # However, it doesn't take much to add it. # Requires a value in RDI :w # :malloc 48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK 4153 ; push_r11 # Protect r11 0F05 ; syscall # call the Kernel 415B ; pop_r11 # Restore r11 C3 ; ret :x # :Read_byte # Attempt to read 1 byte from STDIN 48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want 488D35 %S ; lea_rsi,[rip+DWORD] %write # Where to put it 4C89CF ; mov_rdi,r9 # Where are we reading from 48C7C0 00000000 ; mov_rax, %0 # the syscall number for read 4153 ; push_r11 # Protect r11 0F05 ; syscall # call the Kernel 415B ; pop_r11 # Restore r11 4885C0 ; test_rax,rax # check what we got 0F84 %y ; je %Read_byte_1 # Got EOF call it done # load byte 8A05 %S ; mov_al,[rip+DWORD] %write # load char 480FB6C0 ; movzx_rax,al # We have to zero extend it to use it C3 ; ret # Deal with EOF :y # :Read_byte_1 48C7C0 FCFFFFFF ; mov_rax, %-4 # Put EOF in rax C3 ; ret :z # :print_chars 50 ; push_rax # Push address of chars onto stack 4889E6 ; mov_rsi,rsp # What we are writing 4C89D7 ; mov_rdi,r10 # Write to target file 48C7C0 01000000 ; mov_rax, %1 # the syscall number for write 4153 ; push_r11 # Protect HEAD 0F05 ; syscall # call the Kernel 415B ; pop_r11 # Restore HEAD 58 ; pop_rax # deallocate stack C3 ; ret # Receives pointer in RBX # Writes out char and updates RBX :A # :consume_token E8 %x ; call %Read_byte # Consume_token # Check for \t 4883F8 09 ; cmp_rax, !0x09 0F84 %B ; je %consume_token_done # Check for \n 4883F8 0A ; cmp_rax, !0x0A 0F84 %B ; je %consume_token_done # Check for ' ' 4883F8 20 ; cmp_rax, !0x20 0F84 %B ; je %consume_token_done # Check for '>' 4883F8 3E ; cmp_rax, !0x3E 0F84 %B ; je %consume_token_done # Looks like we are still reading token 8803 ; mov_[rbx],al # Store char 4883C3 01 ; add_rbx, !1 # Point to next spot E9 %A ; jmp %consume_token # loop until done :B # :consume_token_done 48C7C1 00000000 ; mov_rcx, %0 # Pad with nulls 48890B ; mov_[rbx],rcx 4883C3 08 ; add_rbx, !8 C3 ; ret :C # :StoreLabel 4C89E0 ; mov_rax,r12 # ENTRY 4981C4 18000000 ; add_r12, %24 # CALLOC 4C8968 08 ; mov_[rax+BYTE],r13 !8 # ENTRY->TARGET = IP 4C8918 ; mov_[rax],r11 # ENTRY->NEXT = JUMP_TABLE 4989C3 ; mov_r11,rax # JUMP_TABLE = ENTRY 4D8963 10 ; mov_[r11+BYTE],r12 !16 # ENTRY->NAME = TOKEN 4C89E3 ; mov_rbx,r12 # Write Starting after struct E8 %A ; call %consume_token # Collect whole string 4989DC ; mov_r12,rbx # Update HEAP E9 %a ; jmp %First_pass :D # :GetTarget 488B3D %T ; mov_rdi,[rip+DWORD] %scratch # Reset scratch 4C89D9 ; mov_rcx,r11 # Grab JUMP_TABLE 488B71 10 ; mov_rsi,[rcx+BYTE] !16 # I->NAME :E # :GetTarget_loop 8A06 ; mov_al,[rsi] # I->NAME[0] 8A1F ; mov_bl,[rdi] # scratch[0] 480FB6DB ; movzx_rbx,bl # Zero extend 480FB6C0 ; movzx_rax,al # Zero extend 38D8 ; cmp_al,bl # IF TOKEN == I->NAME 0F85 %F ; jne %GetTarget_miss # Oops 4883C6 01 ; add_rsi, !1 4881C7 01000000 ; add_rdi, %1 3C 00 ; cmp_al, !0 0F85 %E ; jne %GetTarget_loop # Loop until E9 %G ; jmp %GetTarget_done # Match # Miss :F # :GetTarget_miss 488B09 ; mov_rcx,[rcx] # I = I->NEXT 4883F9 00 ; cmp_rcx, !0 # IF NULL == I 0F84 %Q ; je %fail # Abort hard 488B71 10 ; mov_rsi,[rcx+BYTE] !16 # I->NAME 488B3D %T ; mov_rdi,[rip+DWORD] %scratch # Reset scratch E9 %E ; jmp %GetTarget_loop :G # :GetTarget_done 488B41 08 ; mov_rax,[rcx+BYTE] !8 # Get address C3 ; ret :H # :ClearScratch 50 ; push_rax # Protect against changes 53 ; push_rbx # And overwrites 51 ; push_rcx # While we work 488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Where our scratch is 48C7C0 00000000 ; mov_rax, %0 # Using null :I # :ClearScratch_loop 488B0B ; mov_rcx,[rbx] # Get current value 8803 ; mov_[rbx],al # Because we want null 4883C3 01 ; add_rbx, !1 # Increment 4883F9 00 ; cmp_rcx, !0 # Check if we hit null 0F85 %I ; jne %ClearScratch_loop # Keep looping 59 ; pop_rcx # Don't Forget to 5B ; pop_rbx # Restore Damage 58 ; pop_rax # Entirely C3 ; ret :J # :StorePointer E8 %d ; call %Update_Pointer # Increment IP 488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Write to scratch E8 %A ; call %consume_token # get token 50 ; push_rax # Protect base_sep_p 488B05 %T ; mov_rax,[rip+DWORD] %scratch # Pointer to scratch E8 %D ; call %GetTarget # Get address of pointer E8 %H ; call %ClearScratch # Clean up after ourselves 4C89EA ; mov_rdx,r13 # base = IP 5B ; pop_rbx # Restore base_sep_p 4883FB 3E ; cmp_rbx, !0x3E # If base_sep_p == '>' 0F85 %K ; jne %StorePointer_done # If not # Deal with %label>label case 50 ; push_rax # We need to preserve main target 488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Write to scratch E8 %A ; call %consume_token # get token 488B05 %T ; mov_rax,[rip+DWORD] %scratch # Pointer to scratch E8 %D ; call %GetTarget # Get address of pointer E8 %H ; call %ClearScratch # Clean up after ourselves 4889C2 ; mov_rdx,rax # Use our new base 58 ; pop_rax # Restore main target :K # :StorePointer_done C3 ; ret :L # :StorePointer_rel4 E8 %J ; call %StorePointer # Do Common 4829D0 ; sub_rax,rdx # target - ip 48C7C2 04000000 ; mov_rdx, %4 # set the size of chars we want E8 %z ; call %print_chars E8 %H ; call %ClearScratch # Clean up after ourselves E9 %k ; jmp %Second_pass :M # :StorePointer_rel2 E8 %J ; call %StorePointer # Do Common 4829D0 ; sub_rax,rdx # target - ip 48C7C2 02000000 ; mov_rdx, %2 # set the size of chars we want E8 %z ; call %print_chars E8 %H ; call %ClearScratch # Clean up after ourselves E9 %k ; jmp %Second_pass :N # :StorePointer_rel1 E8 %J ; call %StorePointer # Do Common 4829D0 ; sub_rax,rdx # target - ip 48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want E8 %z ; call %print_chars E8 %H ; call %ClearScratch # Clean up after ourselves E9 %k ; jmp %Second_pass :O # :StorePointer_abs4 E8 %J ; call %StorePointer # Do Common 48C7C2 04000000 ; mov_rdx, %4 # set the size of chars we want E8 %z ; call %print_chars E8 %H ; call %ClearScratch # Clean up after ourselves E9 %k ; jmp %Second_pass :P # :StorePointer_abs2 E8 %J ; call %StorePointer # Do Common 48C7C2 02000000 ; mov_rdx, %2 # set the size of chars we want E8 %z ; call %print_chars E8 %H ; call %ClearScratch # Clean up after ourselves E9 %k ; jmp %Second_pass :Q # :fail # Some shit went wrong 48C7C7 01000000 ; mov_rdi, %1 # All is wrong 48C7C0 3C000000 ; mov_rax, %0x3C # put the exit syscall number in eax 0F05 ; syscall # Call it a good day :S # :write 00000000 ; NULL 00000000 ; NULL :T # :scratch 00000000 ; NULL 00000000 ; NULL # :ELF_end
# SPDX-FileCopyrightText: 2019 Jeremiah Orians <jeremiah@pdp10.guru> # # SPDX-License-Identifier: GPL-3.0-or-later ## ELF Header :ELF_base 7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number 02 ## e_ident[EI_CLASS] Indicating 64 bit 01 ## e_ident[EI_DATA] Indicating little endianness 01 ## e_ident[EI_VERSION] Indicating original elf 03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares 00 00 00 00 00 00 00 ## e_ident[EI_PAD] 02 00 ## e_type Indicating Executable 3E 00 ## e_machine Indicating AMD64 01 00 00 00 ## e_version Indicating original elf &_start 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address) %ELF_program_headers>ELF_base 00 00 00 00 ## e_phoff Address of program header table 00 00 00 00 00 00 00 00 ## e_shoff Address of section header table 00 00 00 00 ## e_flags 40 00 ## e_ehsize Indicating our 64 Byte header 38 00 ## e_phentsize size of a program header table 01 00 ## e_phnum number of entries in program table 00 00 ## e_shentsize size of a section header table 00 00 ## e_shnum number of entries in section table 00 00 ## e_shstrndx index of the section names ## Program Header :ELF_program_headers 01 00 00 00 ## p_type 07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 ## p_offset &ELF_base 00 00 00 00 ## p_vaddr &ELF_base 00 00 00 00 ## p_physaddr %ELF_end>ELF_base 00 00 00 00 ## p_filesz %ELF_end>ELF_base 00 00 00 00 ## p_memsz 01 00 00 00 00 00 00 00 ## Required alignment :ELF_text :_start 58 ; pop_rax # Get the number of arguments 5F ; pop_rdi # Get the program name 5F ; pop_rdi # Get the actual output name 48C7C6 41020000 ; mov_rsi, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC 48C7C2 80010000 ; mov_rdx, %384 # Prepare file as RW for owner only (600 in octal) 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that file 4989C7 ; mov_r15,rax # Preserve the file pointer we were given 48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK 48C7C7 00000000 ; mov_rdi, %0 # Get current brk 0F05 ; syscall # Let the kernel do the work 4989C6 ; mov_r14,rax # Set our malloc pointer 48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK 4C89F7 ; mov_r14,rax # Using current pointer 4881C7 00001000 ; add_rdi, %0x100000 # Allocate 1MB 0F05 ; syscall # Let the kernel do the work :core 5F ; pop_rdi # Get the actual input name 4883FF 00 ; cmp_rdi, !0 # Check for null string 0F84 %done ; je %done # Hit null be done 48C7C6 00000000 ; mov_rsi, %0 # prepare read_only 48C7C2 00000000 ; mov_rdx, %0 # prevent any interactions 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that damn file 4989C5 ; mov_r13,rax # Protect INPUT :keep 48C7C2 00001000 ; mov_rdx, %0x100000 # set the size of chars we want 4C89F6 ; mov_rsi,r14 # Where to put it 4C89EF ; mov_rdi,r13 # Where are we reading from 48C7C0 00000000 ; mov_rax, %0 # the syscall number for read 0F05 ; syscall # call the Kernel 50 ; push_rax # Protect the number of bytes read 4889C2 ; mov_rdx,rax # Number of bytes to write 4C89F6 ; mov_rsi,r14 # What we are writing 4C89FF ; mov_rdi,r15 # Write to target file 48C7C0 01000000 ; mov_rax, %1 # the syscall number for write 0F05 ; syscall # call the Kernel 58 ; pop_rax # Get bytes read 483D 00001000 ; cmp_rax, %0x100000 # Check if buffer was fully used 0F84 %keep ; je %keep # Keep looping if was full E9 %core ; jmp %core # Otherwise move to next file :done # program completed Successfully 48C7C7 00000000 ; mov_rdi, %0 # All is well 48C7C0 3C000000 ; mov_rax, %0x3C # put the exit syscall number in eax 0F05 ; syscall # Call it a good day :ELF_end
### Copyright (C) 2016 Jeremiah Orians ### Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> ### This file is part of M2-Planet. ### ### M2-Planet is free software: you can redistribute it and/or modify ### it under the terms of the GNU General Public License as published by ### the Free Software Foundation, either version 3 of the License, or ### (at your option) any later version. ### ### M2-Planet is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### ### You should have received a copy of the GNU General Public License ### along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. ### elf64.hex2: 64 bit elf header in hex2 ### if you wish to use this header, you need to add :ELF_end to the end of your ### M1 or hex2 files. ## ELF Header :ELF_base 7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number 02 ## e_ident[EI_CLASS] Indicating 64 bit 01 ## e_ident[EI_DATA] Indicating little endianness 01 ## e_ident[EI_VERSION] Indicating original elf 03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares 00 00 00 00 00 00 00 ## e_ident[EI_PAD] 02 00 ## e_type Indicating Executable 3E 00 ## e_machine Indicating AMD64 01 00 00 00 ## e_version Indicating original elf &_start 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address) %ELF_program_headers>ELF_base 00 00 00 00 ## e_phoff Address of program header table 00 00 00 00 00 00 00 00 ## e_shoff Address of section header table 00 00 00 00 ## e_flags 40 00 ## e_ehsize Indicating our 64 Byte header 38 00 ## e_phentsize size of a program header table 01 00 ## e_phnum number of entries in program table 00 00 ## e_shentsize size of a section header table 00 00 ## e_shnum number of entries in section table 00 00 ## e_shstrndx index of the section names ## Program Header :ELF_program_headers 01 00 00 00 ## p_type 07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 ## p_offset &ELF_base 00 00 00 00 ## p_vaddr &ELF_base 00 00 00 00 ## p_physaddr %ELF_end>ELF_base 00 00 00 00 ## p_filesz %ELF_end>ELF_base 00 00 00 00 ## p_memsz 01 00 00 00 00 00 00 00 ## Required alignment :ELF_text
# SPDX-FileCopyrightText: 2019 Jeremiah Orians <jeremiah@pdp10.guru> # SPDX-FileCopyrightText: 2023 Andrius Å tikonas <andrius@stikonas.eu> # # SPDX-License-Identifier: GPL-3.0-or-later # Register usage: # RAX, RSI, RDI => Temps # R12 => MALLOC # R13 => HEAD # R14 => Output_file # R15 => Input_file # Struct format: (size 32) # NEXT => 0 # TYPE => 8 # TEXT => 16 # EXPRESSION => 24 # Types # None => 0 # MACRO => 1 # STRING => 2 # Where the ELF Header is going to hit # Simply jump to _start # Our main function :_start 58 ; pop_rax # Get the number of arguments 5F ; pop_rdi # Get the program name 5F ; pop_rdi # Get the actual input name 48C7C6 00000000 ; mov_rsi, %0 # prepare read_only 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that damn file 4989C7 ; mov_r15,rax # Preserve the file pointer we were given 5F ; pop_rdi # Get the actual output name 48C7C6 41020000 ; mov_rsi, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC 48C7C2 80010000 ; mov_rdx, %384 # Prepare file as RW for owner only (600 in octal) 48C7C0 02000000 ; mov_rax, %2 # the syscall number for open() 0F05 ; syscall # Now open that damn file 483D 00000000 ; cmp_rax, %0 # Check for missing output 7F !_start_out ; jg8 !_start_out # Have real input 48C7C0 01000000 ; mov_rax, %1 # Use stdout :_start_out 4989C6 ; mov_r14,rax # Preserve the file pointer we were given 48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK 48C7C7 00000000 ; mov_rdi, %0 # Get current brk 0F05 ; syscall # Let the kernel do the work 4989C4 ; mov_r12,rax # Set our malloc pointer 4D31ED ; xor_r13,r13 # Set HEAD = NULL E8 %Tokenize_Line ; call %Tokenize_Line # Get all lines 4C89E8 ; mov_rax,r13 # prepare for Reverse_List E8 %Reverse_List ; call %Reverse_List # Correct order 4989C5 ; mov_r13,rax # Update HEAD E8 %Identify_Macros ; call %Identify_Macros # Find the DEFINEs E8 %Line_Macro ; call %Line_Macro # Apply the DEFINEs E8 %Process_String ; call %Process_String # Handle strings E8 %Eval_Immediates ; call %Eval_Immediates # Handle Numbers E8 %Preserve_Other ; call %Preserve_Other # Collect the remaining E8 %Print_Hex ; call %Print_Hex # Output our results :Done # program completed Successfully 48C7C7 00000000 ; mov_rdi, %0 # All is well 48C7C0 3C000000 ; mov_rax, %0x3c # put the exit syscall number in eax 0F05 ; syscall # Call it a good day # Tokenize_Line Function # Using input file R15 and Head R13 # Creates a linked list of structs # Uses RBX for in_set strings, RCX for Int C and RDX for Struct Token* p :Tokenize_Line 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX :restart E8 %fgetc ; call %fgetc # Read a char 483D FCFFFFFF ; cmp_rax, %-4 # Check for EOF 74 !done ; je8 !done # File is collected 480FB6C0 ; movzx_rax,al # We have to zero extend it to use it 4889C1 ; mov_rcx,rax # Protect C 488D1D %comments ; lea_rbx,[rip+DWORD] %comments # Get pointer to "#;" E8 %In_Set ; call %In_Set # Check for comments 483D 01000000 ; cmp_rax, %1 # If comments 0F84 %Purge_LineComment ; je %Purge_LineComment # try again 4889C8 ; mov_rax,rcx # put C in place for check 488D1D %terminators ; lea_rbx,[rip+DWORD] %terminators # Get pointer to "\n\t " E8 %In_Set ; call %In_Set # Check for terminators 483D 01000000 ; cmp_rax, %1 # If terminator 74 !restart ; je8 !restart # try again 48C7C0 20000000 ; mov_rax, %32 # Malloc the struct P E8 %malloc ; call %malloc # Get pointer to P 4889C2 ; mov_rdx,rax # Protect P 4C892A ; mov_[rdx],r13 # P->NEXT = HEAD 4989D5 ; mov_r13,rdx # HEAD = P 4889C8 ; mov_rax,rcx # put C in place for check 488D1D %string_char ; lea_rbx,[rip+DWORD] %string_char # Get pointer to "\"'" E8 %In_Set ; call %In_Set # Check for string chars 483D 01000000 ; cmp_rax, %1 # If string char 0F84 %Store_String ; je %Store_String # Get string E8 %Store_Atom ; call %Store_Atom # Get whole token EB !restart ; jmp8 !restart :done 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret # fgetc function # Receives FILE* in R15 # Returns -4 (EOF) or char in RAX :fgetc 48C7C0 FCFFFFFF ; mov_rax, %-4 # Put EOF in rax 50 ; push_rax # Assume bad (If nothing read, value will remain EOF)ill remain EOF) 488D3424 ; lea_rsi,[rsp] # Get stack address 4C89FF ; mov_rdi,r15 # Where are we reading from 48C7C0 00000000 ; mov_rax, %0 # the syscall number for read 52 ; push_rdx # Protect RDX 48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want 4153 ; push_r11 # Protect r11 0F05 ; syscall # call the Kernel 415B ; pop_r11 # Restore r11 5A ; pop_rdx # Restore RDX 58 ; pop_rax # Get either char or EOF C3 ; ret # Malloc isn't actually required if the program being built fits in the initial memory # However, it doesn't take much to add it. # Requires R12 to be initialized and RAX to have the number of desired bytes :malloc 4C89E7 ; mov_rdi,r12 # Using the current pointer 4801C7 ; add_rdi,rax # Request the number of desired bytes 48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK 51 ; push_rcx # Protect rcx 4153 ; push_r11 # Protect r11 0F05 ; syscall # call the Kernel 415B ; pop_r11 # Restore r11 59 ; pop_rcx # Restore rcx 4C89E0 ; mov_rax,r12 # Return pointer 4989FC ; mov_r12,rdi # Update pointer C3 ; ret # Purge_LineComment function # Reads chars until LF and jumps to restart :Purge_LineComment E8 %fgetc ; call %fgetc # Get a char 480FB6C0 ; movzx_rax,al # Zero extend 4883F8 0A ; cmp_rax, %10 # While not LF 75 !Purge_LineComment ; jne8 !Purge_LineComment # Keep reading E9 %restart ; jmp %restart # Store_String Function # Receives C in RCX, HEAD in RDX and Input file in R14 # Uses RBX for terminator, RCX for C and RDX for string :Store_String 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 48C7C0 02000000 ; mov_rax, %2 # Using TYPE STRING 488942 08 ; mov_[rdx+BYTE],rax !8 # HEAD->TYPE = STRINGE = STRING 48C7C0 00010000 ; mov_rax, %256 # Malloc the string E8 %malloc ; call %malloc # Get pointer to P 488942 10 ; mov_[rdx+BYTE],rax !16 # HEAD->TEXT = STRINGXT = STRING 4889CB ; mov_rbx,rcx # Protect terminator 4889C2 ; mov_rdx,rax # Protect string pointer :Store_String_Loop 880A ; mov_[rdx],cl # write byte E8 %fgetc ; call %fgetc # read next char 480FB6C0 ; movzx_rax,al # Zero extend it 4889C1 ; mov_rcx,rax # Update C 4883C2 01 ; add_rdx, %1 # STRING = STRING + 1 4839D9 ; cmp_rcx,rbx # See if we hit terminator 75 !Store_String_Loop ; jne8 !Store_String_Loop # Otherwise keep looping 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 4889D0 ; mov_rax,rdx # return HEAD E9 %restart ; jmp %restart # Store_Atom Function # Receives C in RCX, HEAD in RDX and Input file in R15 # Uses RBX for in_set strings, RCX for C and RDX for string :Store_Atom 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 48C7C0 00010000 ; mov_rax, %256 # Malloc the string E8 %malloc ; call %malloc # Get pointer to P 488942 10 ; mov_[rdx+BYTE],rax !16 # HEAD->TEXT = STRING 488D1D %terminators ; lea_rbx,[rip+DWORD] %terminators # Get pointer to "\n\t " 4889C2 ; mov_rdx,rax # Protect string pointer :Store_Atom_loop 880A ; mov_[rdx],cl # write byte E8 %fgetc ; call %fgetc # read next char 480FB6C0 ; movzx_rax,al # Zero extend it 4889C1 ; mov_rcx,rax # Update C 4883C2 01 ; add_rdx, %1 # STRING = STRING + 1 E8 %In_Set ; call %In_Set # Check for terminators 4883F8 00 ; cmp_rax, %0 # Check for "\n\t " 74 !Store_Atom_loop ; je8 !Store_Atom_loop # Loop otherwise 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 4889D0 ; mov_rax,rdx # return HEAD C3 ; ret # In_Set function # Receives Char C in RAX and CHAR* in RBX # Returns 1 if true, zero if false in RAX :In_Set 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX :In_Set_loop 8A0B ; mov_cl,[rbx] # Read char 480FB6C9 ; movzx_rcx,cl # Zero extend it 4839C8 ; cmp_rax,rcx # See if they match 74 !In_Set_True ; je8 !In_Set_True # return true 4881F9 00000000 ; cmp_rcx, %0 # Check for NULL 74 !In_Set_False ; je8 !In_Set_False # return false 4881C3 01000000 ; add_rbx, %1 # s = s + 1 EB !In_Set_loop ; jmp8 !In_Set_loop # Keep looping :In_Set_True 48C7C0 01000000 ; mov_rax, %1 # Set True 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret :In_Set_False 48C7C0 00000000 ; mov_rax, %0 # Set FALSE 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret # Char sets :terminators 0A 09 20 00 # "\n\t \0" :comments 23 3B 00 # "#;\0" :string_char 22 27 00 # "\"'\0" # Reverse_List function # Receives List in RAX # Returns the list reversed in RAX :Reverse_List 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 4889C3 ; mov_rbx,rax # Set HEAD 48C7C0 00000000 ; mov_rax, %0 # ROOT = NULL :Reverse_List_Loop 4883FB 00 ; cmp_rbx, %0 # WHILE HEAD != NULL 74 !Reverse_List_Done ; je8 !Reverse_List_Done # Stop otherwise 488B0B ; mov_rcx,[rbx] # NEXT = HEAD->NEXT 488903 ; mov_[rbx],rax # HEAD->NEXT = ROOT 4889D8 ; mov_rax,rbx # ROOT = HEAD 4889CB ; mov_rbx,rcx # HEAD = NEXT EB !Reverse_List_Loop ; jmp8 !Reverse_List_Loop # Keep Going :Reverse_List_Done 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret # Identify_Macros function # Receives List in RAX # Updates the list in place; does not modify registers # Uses RBX for DEFINE, RCX for I :Identify_Macros 50 ; push_rax # Protect RAX 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 488D1D %DEFINE_str ; lea_rbx,[rip+DWORD] %DEFINE_str # Setup define string 4889C1 ; mov_rcx,rax # I = HEAD :Identify_Macros_Loop 488B41 10 ; mov_rax,[rcx+BYTE] !16 # I->TEXT E8 %match ; call %match # IF "DEFINE" == I->TEXT 4883F8 00 ; cmp_rax, %0 # Check if match 75 !Identify_Macros_Next ; jne8 !Identify_Macros_Next # Skip the work # Deal with MACRO 48C7C0 01000000 ; mov_rax, %1 # Using MACRO 488941 08 ; mov_[rcx+BYTE],rax !8 # I->TYPE = MACRO 488B01 ; mov_rax,[rcx] # I->NEXT 488B40 10 ; mov_rax,[rax+BYTE] !16 # I->NEXT->TEXT 488941 10 ; mov_[rcx+BYTE],rax !16 # I->TEXT = I->NEXT->TEXT 488B01 ; mov_rax,[rcx] # I->NEXT 488B00 ; mov_rax,[rax] # I->NEXT->NEXT 488B40 10 ; mov_rax,[rax+BYTE] !16 # I->NEXT->NEXT->TEXT 488941 18 ; mov_[rcx+BYTE],rax !24 # I->EXPRESSION = I->NEXT->NEXT->TEXT 488B01 ; mov_rax,[rcx] # I->NEXT 488B00 ; mov_rax,[rax] # I->NEXT->NEXT 488B00 ; mov_rax,[rax] # I->NEXT->NEXT->NEXT 488901 ; mov_[rcx],rax # I->NEXT = I->NEXT->NEXT->NEXT :Identify_Macros_Next 488B09 ; mov_rcx,[rcx] # I = I->NEXT 4883F9 00 ; cmp_rcx, %0 # Check for NULL 75 !Identify_Macros_Loop ; jne8 !Identify_Macros_Loop # Keep looping otherwise 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 58 ; pop_rax # Restore RAX C3 ; ret :DEFINE_str 44 45 46 49 4E 45 00 # "DEFINE" # match function # Receives CHAR* in RAX and CHAR* in RBX # Returns 0 (TRUE) or 1 (FALSE) in RAX :match 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 4889C1 ; mov_rcx,rax # S1 in place 4889DA ; mov_rdx,rbx # S2 in place :match_Loop 8A01 ; mov_al,[rcx] # S1[0] 480FB6C0 ; movzx_rax,al # Make it useful 8A1A ; mov_bl,[rdx] # S2[0] 480FB6DB ; movzx_rbx,bl # Make it useful 4839D8 ; cmp_rax,rbx # See if they match 75 !match_False ; jne8 !match_False # If not 4883C1 01 ; add_rcx, %1 # S1 = S1 + 1 4883C2 01 ; add_rdx, %1 # S2 = S2 + 1 4883F8 00 ; cmp_rax, %0 # If reached end of string 74 !match_Done ; je8 !match_Done # Perfect match EB !match_Loop ; jmp8 !match_Loop # Otherwise keep looping :match_False 48C7C0 01000000 ; mov_rax, %1 # Return false :match_Done 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret # Line_Macro function # Receives List in RAX # Updates the list in place; does not modify registers # Uses RAX for I, RBX for I->TEXT, RCX for I->EXPRESSION :Line_Macro 50 ; push_rax # Protect RAX 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX :Line_Macro_Loop 488B58 08 ; mov_rbx,[rax+BYTE] !8 # I->TYPE 4883FB 01 ; cmp_rbx, %1 # IF MACRO == I->TYPE 75 !Line_Macro_Next ; jne8 !Line_Macro_Next # Otherwise move on # Is a macro apply 488B58 10 ; mov_rbx,[rax+BYTE] !16 # I->TEXT 488B48 18 ; mov_rcx,[rax+BYTE] !24 # I->EXPRESSION 488B00 ; mov_rax,[rax] # I->NEXT E8 %Set_Expression ; call %Set_Expression # Apply it EB !Line_Macro_Loop ; jmp8 !Line_Macro_Loop # Move on to next :Line_Macro_Next 488B00 ; mov_rax,[rax] # I->NEXT 4883F8 00 ; cmp_rax, %0 # Check for NULL 75 !Line_Macro_Loop ; jne8 !Line_Macro_Loop # Keep going 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 58 ; pop_rax # Restore RAX C3 ; ret # Set_Expression function # Receives List in RAX, CHAR* in RBX and CHAR* in RCX # Updates the list in place; does not modify registers # Uses RBX for C, RCX for EXP and RDX for I :Set_Expression 50 ; push_rax # Protect RAX 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 4889C2 ; mov_rdx,rax # Set I :Set_Expression_Loop 488B42 08 ; mov_rax,[rdx+BYTE] !8 # I->TYPE 4883F8 01 ; cmp_rax, %1 # IF MACRO == I->TYPE 74 !Set_Expression_Next ; je8 !Set_Expression_Next # Ignore and move on 488B42 10 ; mov_rax,[rdx+BYTE] !16 # I->TEXT E8 %match ; call %match # Check for match 4883F8 00 ; cmp_rax, %0 # If match 75 !Set_Expression_Next ; jne8 !Set_Expression_Next # Otherwise next # We have a non-macro match 48894A 18 ; mov_[rdx+BYTE],rcx !24 # I->EXPRESSION = EXP :Set_Expression_Next 488B12 ; mov_rdx,[rdx] # I = I->NEXT 4883FA 00 ; cmp_rdx, %0 # IF NULL == I 75 !Set_Expression_Loop ; jne8 !Set_Expression_Loop # Otherwise keep looping 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 58 ; pop_rax # Restore RAX C3 ; ret # Process_String function # Receives List in RAX # Update the list in place; does not modify registers # Uses RBX for I->TEXT, RCX for I and RDX for S :Process_String 50 ; push_rax # Protect RAX 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 4889C1 ; mov_rcx,rax # I = HEAD :Process_String_loop 488B41 08 ; mov_rax,[rcx+BYTE] !8 # I->TYPE 4883F8 02 ; cmp_rax, %2 # IF STRING == I->TYPE 75 !Process_String_Next ; jne8 !Process_String_Next # Skip to next 488B59 10 ; mov_rbx,[rcx+BYTE] !16 # I->TEXT 8A03 ; mov_al,[rbx] # I->TEXT[0] 480FB6C0 ; movzx_rax,al # make it useful 4883F8 27 ; cmp_rax, %39 # IF '\'' == I->TEXT[0] 75 !Process_String_Raw ; jne8 !Process_String_Raw # Deal with '"' # Deal with '\'' 4883C3 01 ; add_rbx, %1 # I->TEXT + 1 488959 18 ; mov_[rcx+BYTE],rbx !24 # I->EXPRESSION = I->TEXT + 1 EB !Process_String_Next ; jmp8 !Process_String_Next # Move on to next :Process_String_Raw 4889D8 ; mov_rax,rbx # Get length of I->TEXT E8 %string_length ; call %string_length # Do it 48C1E8 02 ; shr_rax, !2 # LENGTH = LENGTH >> 2 4883C0 01 ; add_rax, %1 # LENGTH = LENGTH + 1 48C1E0 03 ; shl_rax, !3 # LENGTH = LENGTH << 3 E8 %malloc ; call %malloc # Get string 4889DA ; mov_rdx,rbx # S = I->TEXT 4883C2 01 ; add_rdx, %1 # S = S + 1 488941 18 ; mov_[rcx+BYTE],rax !24 # I->EXPRESSION = hexify 4889C3 ; mov_rbx,rax # Put hexify buffer in rbx :Process_String_Raw_Loop 8A02 ; mov_al,[rdx] # Read 1 chars 480FB6C0 ; movzx_rax,al # Make it useful 4883C2 01 ; add_rdx, %1 # S = S + 1 3C 00 ; cmp_al, !0 # Check for NULL 9C ; pushf # Protect condition E8 %hex8 ; call %hex8 # write them all 9D ; popf # restore condition 75 !Process_String_Raw_Loop ; jne8 !Process_String_Raw_Loop # Keep looping :Process_String_Next 488B09 ; mov_rcx,[rcx] # I = I->NEXT 4883F9 00 ; cmp_rcx, %0 # IF NULL == I 75 !Process_String_loop ; jne8 !Process_String_loop # Otherwise keep looping 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 58 ; pop_rax # Restore RAX C3 ; ret # string_length function # Receives CHAR* in RAX # Returns INT in RAX # Uses RAX for CH, RBX for S and RCX for INDEX :string_length 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 4889C3 ; mov_rbx,rax # Set S B9 00000000 ; mov_rcx, %0 # INDEX = 0 :string_length_loop 8A040B ; mov_al,[rbx+rcx] # S[0] 480FB6C0 ; movzx_rax,al # make it useful 4883F8 00 ; cmp_rax, %0 # IF NULL == S[0] 74 !string_length_done ; je8 !string_length_done # Stop 4883C1 01 ; add_rcx, %1 # INDEX = INDEX + 1 EB !string_length_loop ; jmp8 !string_length_loop # Keep going :string_length_done 4889C8 ; mov_rax,rcx # RETURN INDEX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret # Eval_Immediates function # Receives List in RAX # Updates the list in place; does not modify registers # Uses RBX for I->TEXT[0], RCX for I->TEXT[1] and RDX for I :Eval_Immediates 50 ; push_rax # Protect RAX 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 4889C2 ; mov_rdx,rax # I = HEAD :Eval_Immediates_Loop # Check for MACRO 488B42 08 ; mov_rax,[rdx+BYTE] !8 # I->TYPE 4883F8 01 ; cmp_rax, %1 # IF MACRO == I->TYPE 74 !Eval_Immediates_Next ; je8 !Eval_Immediates_Next # Skip to next # Check for NULL EXPRESSION 488B42 18 ; mov_rax,[rdx+BYTE] !24 # I->EXPRESSION 4883F8 00 ; cmp_rax, %0 # IF NULL == I->EXPRESSION 75 !Eval_Immediates_Next ; jne8 !Eval_Immediates_Next # Skip to next # Check if number 488B42 10 ; mov_rax,[rdx+BYTE] !16 # I->TEXT 8A18 ; mov_bl,[rax] # I->TEXT[0] 480FB6DB ; movzx_rbx,bl # Extend to use 4883C0 01 ; add_rax, %1 # I->TEXT + 1 8A08 ; mov_cl,[rax] # I->TEXT[1] 480FB6C9 ; movzx_rcx,cl # Extend to use E8 %numerate_string ; call %numerate_string # Convert string to INT 4883F8 00 ; cmp_rax, %0 # IF 0 == numerate_string(I->TEXT + 1) 75 !Eval_Immediates_value ; jne8 !Eval_Immediates_value # Has a value # Last chance for Immediate 4883F9 30 ; cmp_rcx, %48 # If '0' == I->TEXT[1] 75 !Eval_Immediates_Next ; jne8 !Eval_Immediates_Next # Skip to next :Eval_Immediates_value E8 %express_number ; call %express_number # Convert value to hex string 488942 18 ; mov_[rdx+BYTE],rax !24 # I->EXPRESSION = express_number(value, I-TEXT[0]) :Eval_Immediates_Next 488B12 ; mov_rdx,[rdx] # I = I->NEXT 4883FA 00 ; cmp_rdx, %0 # IF NULL == I 75 !Eval_Immediates_Loop ; jne8 !Eval_Immediates_Loop # Otherwise keep looping 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 58 ; pop_rax # Restore RAX C3 ; ret # numerate_string function # Receives CHAR* in RAX # Returns value of CHAR* in RAX # Only supports negative decimals and Uppercase Hex (eg 5, -3 and 0xCC) # Uses RAX for VALUE, RBX for S, RCX for CH and RSI for NEGATIVE? :numerate_string 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 56 ; push_rsi # Protect RSI 4889C3 ; mov_rbx,rax # put S in correct place 48C7C0 00000000 ; mov_rax, %0 # Initialize to Zero :numerate_string_loop 8A4B 01 ; mov_cl,[rbx+BYTE] !1 # S[1] 480FB6C9 ; movzx_rcx,cl # make it useful 4883F9 78 ; cmp_rcx, %120 # IF 'x' == S[1] 74 !numerate_hex ; je8 !numerate_hex # Deal with hex input # Assume decimal input 48C7C6 00000000 ; mov_rsi, %0 # Assume no negation 8A0B ; mov_cl,[rbx] # S[0] 480FB6C9 ; movzx_rcx,cl # make it useful 4883F9 2D ; cmp_rcx, %45 # IF '-' == S[0] 75 !numerate_decimal ; jne8 !numerate_decimal # Skip negation 48C7C6 01000000 ; mov_rsi, %1 # Set FLAG 4883C3 01 ; add_rbx, %1 # S = S + 1 :numerate_decimal 8A0B ; mov_cl,[rbx] # S[0] 480FB6C9 ; movzx_rcx,cl # make it useful 4883F9 00 ; cmp_rcx, %0 # IF NULL == S[0] 74 !numerate_decimal_done ; je8 !numerate_decimal_done # We are done 486BC0 0A ; imul_rax, !10 # VALUE = VALUE * 10 4883E9 30 ; sub_rcx, !48 # CH = CH - '0' 4883F9 09 ; cmp_rcx, %9 # Check for illegal 7F !numerate_string_fail ; jg8 !numerate_string_fail # If CH > '9' 4883F9 00 ; cmp_rcx, %0 # Check for illegal 7C !numerate_string_fail ; jl8 !numerate_string_fail # IF CH < 0 4801C8 ; add_rax,rcx # VALUE = VALUE + CH 4883C3 01 ; add_rbx, %1 # S = S + 1 EB !numerate_decimal ; jmp8 !numerate_decimal # Keep looping :numerate_decimal_done 4883FE 01 ; cmp_rsi, %1 # Check if need to negate 75 !numerate_string_done ; jne8 !numerate_string_done # Nope 486BC0 FF ; imul_rax, !-1 # VALUE = VALUE * -1 EB !numerate_string_done ; jmp8 !numerate_string_done # Done :numerate_hex 4883C3 02 ; add_rbx, %2 # S = S + 2 :numerate_hex_loop 8A0B ; mov_cl,[rbx] # S[0] 480FB6C9 ; movzx_rcx,cl # make it useful 4883F9 00 ; cmp_rcx, %0 # IF NULL == S[0] 0F84 %numerate_string_done ; je8 !numerate_string_done # We are done 48C1E0 04 ; shl_rax, !4 # VALUE = VALUE << 4 4883E9 30 ; sub_rcx, !48 # CH = CH - '0' 4883F9 0A ; cmp_rcx, %10 # IF 10 >= CH 7C !numerate_hex_digit ; jl8 !numerate_hex_digit # NO 4883E9 07 ; sub_rcx, !7 # Push A-F into range :numerate_hex_digit 4883F9 0F ; cmp_rcx, %15 # Check for illegal 7F !numerate_string_fail ; jg8 !numerate_string_fail # If CH > 'F' 4883F9 00 ; cmp_rcx, %0 # Check for illegal 7C !numerate_string_fail ; jl8 !numerate_string_fail # IF CH < 0 4801C8 ; add_rax,rcx # VALUE = VALUE + CH 4883C3 01 ; add_rbx, %1 # S = S + 1 EB !numerate_hex_loop ; jmp8 !numerate_hex_loop # Keep looping :numerate_string_fail 48C7C0 00000000 ; mov_rax, %0 # return ZERO :numerate_string_done 5E ; pop_rsi # Restore RSI 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret # express_number function # Receives INT in RAX and CHAR in RBX # Allocates a string and expresses the value in hex # Returns string in RAX # Uses RAX for VALUE, RBX for S and RCX for CH :express_number 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX 4889D9 ; mov_rcx,rbx # Put CH in right place 4889C3 ; mov_rbx,rax # Protect VALUE 4883F9 25 ; cmp_rcx, %37 # IF '%' == CH 75 !express_number2 ; jne8 !express_number2 # Otherwise try @ 48C7C0 09000000 ; mov_rax, %9 # We need 9 bytes E8 %malloc ; call %malloc # Get S pointer 4893 ; xchg_rax,rbx # Put S and VALUE in place 53 ; push_rbx # Protect S E8 %hex32l ; call %hex32l # Store 32bits EB !express_number_done ; jmp8 !express_number_done # done :express_number2 4883F9 40 ; cmp_rcx, %64 # IF '@' == CH 75 !express_number1 ; jne8 !express_number1 # Othrewise try ! 48C7C0 05000000 ; mov_rax, %5 # We need 5 bytes E8 %malloc ; call %malloc # Get S pointer 4893 ; xchg_rax,rbx # Put S and VALUE in place 53 ; push_rbx # Protect S E8 %hex16l ; call %hex16l # Store 16bits EB !express_number_done ; jmp8 !express_number_done # done :express_number1 48C7C0 03000000 ; mov_rax, %3 # We need 3 bytes E8 %malloc ; call %malloc # Get S pointer 4893 ; xchg_rax,rbx # Put S and VALUE in place 53 ; push_rbx # Protect S E8 %hex8 ; call %hex8 # Store 8bit :express_number_done 58 ; pop_rax # Restore S 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX C3 ; ret # HEX to ascii routine # Receives INT in RAX and CHAR* in RBX # Stores ascii of INT in CHAR* # Returns only modifying RAX :hex64l 50 ; push_rax # Protect top 32 E8 %hex32l ; call %hex32l # Store it 58 ; pop_rax # do top 32 48C1E8 20 ; shr_rax, !32 # do bottom 32 first :hex32l 50 ; push_rax # Protect top 16 E8 %hex16l ; call %hex16l # Store it 58 ; pop_rax # do top 16 48C1E8 10 ; shr_rax, !16 # do bottom 16 first :hex16l 50 ; push_rax # Protect top byte E8 %hex8 ; call %hex8 # Store it 58 ; pop_rax # do high byte 48C1E8 08 ; shr_rax, !8 # do bottom byte first :hex8 50 ; push_rax # Protect bottom nibble 48C1E8 04 ; shr_rax, !4 # do high nibble first E8 %hex4 ; call %hex4 # Store it 58 ; pop_rax # do low nibble :hex4 4883E0 0F ; and_rax, !0xF # isolate nibble 04 30 ; add_al, !0x30 # convert to ascii (add '0') 3C 39 ; cmp_al, !0x39 # valid digit? (compare to '9') 76 !hex1 ; jbe8 !hex1 # yes 04 07 ; add_al, !7 # use alpha range :hex1 8803 ; mov_[ebx],al # store result 4883C3 01 ; add_ebx, !1 # next position C3 ; ret # Preserve_Other function # Receives List in RAX # Updates the list in place; does not modify registers # Uses RAX for I, RBX for I->TEXT :Preserve_Other 50 ; push_rax # Protect RAX 53 ; push_rbx # Protect RBX 51 ; push_rcx # Protect RCX 52 ; push_rdx # Protect RDX :Preserve_Other_Loop 488B58 18 ; mov_rbx,[rax+BYTE] !24 # I->EXPRESSION 4883FB 00 ; cmp_rbx, %0 # IF NULL == I->EXPRESSION 75 !Preserve_Other_Next ; jne8 !Preserve_Other_Next # Otherwise next # Needs preserving 488B58 10 ; mov_rbx,[rax+BYTE] !16 # I->TEXT 488958 18 ; mov_[rax+BYTE],rbx !24 # I->EXPRESSION = I->TEXT :Preserve_Other_Next 488B00 ; mov_rax,[rax] # I = I->NEXT 4883F8 00 ; cmp_rax, %0 # IF NULL == I 75 !Preserve_Other_Loop ; jne8 !Preserve_Other_Loop # Otherwise keep looping 5A ; pop_rdx # Restore RDX 59 ; pop_rcx # Restore RCX 5B ; pop_rbx # Restore RBX 58 ; pop_rax # Restore RAX C3 ; ret # Print_Hex function # Receives list in RAX # walks the list and prints the I->EXPRESSION for all nodes followed by newline # Uses RBX for I :Print_Hex 53 ; push_rbx # Protect RBX 4C89EB ; mov_rbx,r13 # I = Head :Print_Hex_Loop 488B43 08 ; mov_rax,[rbx+BYTE] !8 # I->TYPE 4883F8 01 ; cmp_rax, %1 # IF MACRO == I->TYPE 74 !Print_Hex_Next ; je8 !Print_Hex_Next # Skip 488B43 18 ; mov_rax,[rbx+BYTE] !24 # Using EXPRESSION E8 %File_Print ; call %File_Print # Print it 48C7C0 0A000000 ; mov_rax, %10 # NEWLINE E8 %fputc ; call %fputc # Append it :Print_Hex_Next 488B1B ; mov_rbx,[rbx] # Iterate to next Token 4883FB 00 ; cmp_rbx, %0 # Check for NULL 75 !Print_Hex_Loop ; jne8 !Print_Hex_Loop # Otherwise keep looping 5B ; pop_rbx # Restore RBX C3 ; ret # File_Print function # Receives CHAR* in RAX # calls fputc for every non-null char :File_Print 53 ; push_rbx # Protect RBX 4889C3 ; mov_rbx,rax # Protect S 4883F8 00 ; cmp_rax, %0 # Protect against nulls 74 !File_Print_Done ; je8 !File_Print_Done # Simply don't try to print them :File_Print_Loop 8A03 ; mov_al,[rbx] # Read byte 480FB6C0 ; movzx_rax,al # zero extend 4883F8 00 ; cmp_rax, %0 # Check for NULL 74 !File_Print_Done ; je8 !File_Print_Done # Stop at NULL E8 %fputc ; call %fputc # write it 4883C3 01 ; add_rbx, %1 # S = S + 1 EB !File_Print_Loop ; jmp8 !File_Print_Loop # Keep going :File_Print_Done 5B ; pop_rbx # Restore RBX C3 ; ret # fputc function # receives CHAR in RAX and FILE* in R14 # writes char and returns :fputc 50 ; push_rax # We are writing rax 488D3424 ; lea_rsi,[rsp] # Get stack address 4C89F7 ; mov_rdi,r14 # Write to target file 48C7C0 01000000 ; mov_rax, %1 # the syscall number for write 52 ; push_rdx # Protect RDX 48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want 4153 ; push_r11 # Protect HEAD 0F05 ; syscall # call the Kernel 415B ; pop_r11 # Restore HEAD 5A ; pop_rdx # Restore RDX 58 ; pop_rax # Restore stack C3 ; ret :ELF_end
# Copyright (C) 2017 Jeremiah Orians # Copyright (C) 2022 Andrius Å tikonas # This file is part of stage0. # # stage0 is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # stage0 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with stage0. If not, see <http://www.gnu.org/licenses/>. DEFINE add_al, 04 DEFINE add_rax, 4805 DEFINE add_rbx, 4881C3 DEFINE add_rcx, 4881C1 DEFINE add_rdx, 4881C2 DEFINE add_rsi, 4881C6 DEFINE add_rax,rbx 4801D8 DEFINE add_rax,rcx 4801C8 DEFINE add_rbx,rax 4801C3 DEFINE add_rcx,rdi 4801F9 DEFINE add_rdi,rax 4801C7 DEFINE and_rax, 4825 DEFINE and_rax,rbx 4821D8 DEFINE call E8 DEFINE call_rax FFD0 DEFINE cmp_al, 3C DEFINE cmp_rax, 483D DEFINE cmp_rbx, 4881FB DEFINE cmp_rcx, 4881F9 DEFINE cmp_rdx, 4881FA DEFINE cmp_rbp, 4881FD DEFINE cmp_rsi, 4881FE DEFINE cmp_r12, 4981FC DEFINE cmp_rax,rbx 4839D8 DEFINE cmp_rax,rcx 4839C8 DEFINE cmp_rbx,rcx 4839CB DEFINE cmp_rbx,rdx 4839D3 DEFINE cmp_rsi,rdi 4839FE DEFINE jbe 0F86 DEFINE je 0F84 DEFINE jg 0F8F DEFINE jl 0F8C DEFINE jle 0F8E DEFINE jmp E9 DEFINE jne 0F85 DEFINE idiv_rbx 48F7FB DEFINE imul_rax, 4869C0 DEFINE imul_rbp, 4869ED DEFINE imul_rax,rbx 480FAFC3 DEFINE lea_rsi,[rsp] 488D3424 DEFINE lea_rax,[rip+DWORD] 488D05 DEFINE lea_rbx,[rip+DWORD] 488D1D DEFINE lea_rcx,[rip+DWORD] 488D0D DEFINE lea_rdx,[rip+DWORD] 488D15 DEFINE mov_rax, 48C7C0 DEFINE mov_rbx, 48C7C3 DEFINE mov_rcx, 48C7C1 DEFINE mov_rdi, 48C7C7 DEFINE mov_rdx, 48C7C2 DEFINE mov_rsi, 48C7C6 DEFINE mov_r14, 49C7C6 DEFINE mov_r15, 49C7C7 DEFINE mov_rax,rbp 4889E8 DEFINE mov_rax,rbx 4889D8 DEFINE mov_rax,rcx 4889C8 DEFINE mov_rax,rdx 4889D0 DEFINE mov_rax,r12 4C89E0 DEFINE mov_rax,r13 4C89E8 DEFINE mov_rax,rsi 4889F0 DEFINE mov_rbp,rax 4889C5 DEFINE mov_rbp,rdx 4889D5 DEFINE mov_rbx,rax 4889C3 DEFINE mov_rbx,rcx 4889CB DEFINE mov_rbx,rdx 4889D3 DEFINE mov_rcx,rax 4889C1 DEFINE mov_rcx,rbx 4889D9 DEFINE mov_rdi,rsi 4889F7 DEFINE mov_rdi,r13 4C89EF DEFINE mov_rdi,r14 4C89F7 DEFINE mov_rdi,r15 4C89FF DEFINE mov_rdx,rax 4889C2 DEFINE mov_rdx,rbx 4889DA DEFINE mov_rsi,rax 4889C6 DEFINE mov_rsi,rdi 4889FE DEFINE mov_r12,rax 4989C4 DEFINE mov_r13,rax 4989C5 DEFINE mov_r13,rdi 4989FD DEFINE mov_r14,rax 4989C6 DEFINE mov_r15,rax 4989C7 DEFINE mov_al,[rax] 8A00 DEFINE mov_al,[rbx] 8A03 DEFINE mov_al,[rcx] 8A01 DEFINE mov_al,[rdx] 8A02 DEFINE mov_bl,[rbx] 8A1B DEFINE mov_bl,[rcx] 8A19 DEFINE mov_bl,[rdx] 8A1A DEFINE mov_cl,[rbx] 8A0B DEFINE mov_rax,[rax] 488B00 DEFINE mov_rax,[rbx] 488B03 DEFINE mov_rax,[r12] 498B0424 DEFINE mov_rax,[r12+BYTE] 498B4424 DEFINE mov_rbx,[rax] 488B18 DEFINE mov_rbx,[rbx] 488B1B DEFINE mov_rbx,[rbx+BYTE] 488B5B DEFINE mov_rcx,[rbx] 488B0B DEFINE mov_rcx,[rcx] 488B09 DEFINE mov_r12,[r12] 4D8B2424 DEFINE mov_[rbx],al 8803 DEFINE mov_[rcx],al 8801 DEFINE mov_[rcx],bl 8819 DEFINE mov_[rsi],al 8806 DEFINE mov_[rax],rbx 488918 DEFINE mov_[rax],rcx 488908 DEFINE mov_[rbx],rax 488903 DEFINE mov_[rdx],rax 488902 DEFINE mov_cl,[rbx+BYTE] 8A4B DEFINE mov_rax,[rax+BYTE] 488B40 DEFINE mov_rax,[rbx+BYTE] 488B43 DEFINE mov_rax,[rcx+BYTE] 488B41 DEFINE mov_rax,[rdx+BYTE] 488B42 DEFINE mov_rbx,[rax+BYTE] 488B58 DEFINE mov_rbx,[rcx+BYTE] 488B59 DEFINE mov_rcx,[rax+BYTE] 488B48 DEFINE mov_rcx,[rcx+BYTE] 488B49 DEFINE mov_rcx,[rdx+BYTE] 488B4A DEFINE mov_rdi,[rdx+BYTE] 488B7A DEFINE mov_rdx,[rdx+BYTE] 488B52 DEFINE mov_rax,[rax+DWORD] 488B40 DEFINE mov_rbx,[rbx+DWORD] 488B5B DEFINE mov_rax,[rip+DWORD] 488B05 DEFINE mov_rbx,[rip+DWORD] 488B1D DEFINE mov_rcx,[rip+DWORD] 488B0D DEFINE mov_[rax+BYTE],rbx 488958 DEFINE mov_[rax+BYTE],rcx 488948 DEFINE mov_[rax+BYTE],rdx 488950 DEFINE mov_[rbp+BYTE],rax 488945 DEFINE mov_[rbp+BYTE],rdx 488955 DEFINE mov_[rbp+BYTE],rsi 488975 DEFINE mov_[rcx+BYTE],rax 488941 DEFINE mov_[rdx+BYTE],rax 488942 DEFINE mov_[rdx+BYTE],rbx 48895A DEFINE mov_[rdx+BYTE],rcx 48894A DEFINE mov_[rdx+BYTE],rbp 48896A DEFINE mov_[rdx+BYTE],rsi 488972 DEFINE mov_[rip+DWORD],rax 488905 DEFINE mov_[rip+DWORD],rbx 48891D DEFINE mov_[rip+DWORD],rcx 48890D DEFINE mov_[rip+DWORD],rdx 488915 DEFINE movzx_rax,al 480FB6C0 DEFINE movzx_rbx,bl 480FB6DB DEFINE movzx_rcx,cl 480FB6C9 DEFINE pop_rax 58 DEFINE pop_rbp 5D DEFINE pop_rbx 5B DEFINE pop_rcx 59 DEFINE pop_rdi 5F DEFINE pop_rdx 5A DEFINE pop_rsi 5E DEFINE pop_r11 415B DEFINE push_rax 50 DEFINE push_rbp 55 DEFINE push_rbx 53 DEFINE push_rcx 51 DEFINE push_rdi 57 DEFINE push_rdx 52 DEFINE push_rsi 56 DEFINE push_r11 4153 DEFINE ret C3 DEFINE sal_rax, 48C1E0 DEFINE shl_rax, 48C1E0 DEFINE shr_rax, 48C1E8 DEFINE shr_rbx 48D1EB DEFINE sub_rax, 482D DEFINE sub_rcx, 4881E9 DEFINE sub_rsi, 4881EE DEFINE syscall 0F05 DEFINE xchg_rax,rbx 4893 DEFINE NULL 0000000000000000 # Register usage: # RAX, RSI, RDI => Temps # R13 => MALLOC # R14 => Output_file # R15 => Input_file # Struct TYPE format: (size 56) # NEXT => 0 # SIZE => 8 # OFFSET => 16 # INDIRECT => 24 # MEMBERS => 32 # TYPE => 40 # NAME => 48 # Struct TOKEN_LIST format: (size 40) # NEXT => 0 # LOCALS/PREV => 8 # S => 16 # TYPE => 24 # ARGS/DEPTH => 32 # Where the ELF Header is going to hit # Simply jump to _start # Our main function :_start pop_rax # Get the number of arguments pop_rdi # Get the program name pop_rdi # Get the actual input name mov_rsi, %0 # prepare read_only mov_rax, %2 # the syscall number for open() syscall # Now open that damn file mov_r15,rax # Preserve the file pointer we were given pop_rdi # Get the actual output name mov_rsi, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC mov_rdx, %384 # Prepare file as RW for owner only (600 in octal) mov_rax, %2 # the syscall number for open() syscall # Now open that damn file cmp_rax, %0 # Check for missing output jg %_start_out # Have real input mov_rax, %1 # Use stdout :_start_out mov_r14,rax # Preserve the file pointer we were given mov_rax, %12 # the Syscall # for SYS_BRK mov_rdi, %0 # Get current brk syscall # Let the kernel do the work mov_r13,rax # Set our malloc pointer call %fix_types # Resolve relative addresses in types struct to absolute mov_rax, %0 # HEAD = NULL call %read_all_tokens # Read all tokens call %Reverse_List # Reverse order # call %debug_list # Try to figure out what is wrong mov_[rip+DWORD],rax %global_token # Set global_token call %program # Convert into program lea_rax,[rip+DWORD] %header_string1 # Our header string call %File_Print # Print it mov_rax,[rip+DWORD] %output_list # Our output_list call %recursive_output # Print core program # lea_rax,[rip+DWORD] %header_string2 # Our Enable debug # call %File_Print # Print it lea_rax,[rip+DWORD] %header_string3 # Our second label call %File_Print # Print it mov_rax,[rip+DWORD] %globals_list # Our globals call %recursive_output # Get them lea_rax,[rip+DWORD] %header_string4 # Our final header call %File_Print # Print it mov_rax,[rip+DWORD] %strings_list # Our strings call %recursive_output # Get them lea_rax,[rip+DWORD] %header_string5 # Our final header call %File_Print # Print it :Done mov_rdi, %0 # All is well mov_rax, %0x3C # put the exit syscall number in eax syscall # Call it a good day :header_string1 " # Core program " :header_string2 " :ELF_data " :header_string3 " # Program global variables " :header_string4 " # Program strings " :header_string5 " :ELF_end " # Resolve relative addresses in types struct to absolute # Uses RAX to store current type, RBX for temp :fix_types push_rbx # Protect RBX lea_rax,[rip+DWORD] %prim_types # Get address of prim_types mov_[rip+DWORD],rax %global_types # Write it to global_types :fix_type mov_rbx,[rax+BYTE] !48 # Get offset to NAME add_rbx,rax # Get NAME mov_[rax+BYTE],rbx !48 # Store NAME mov_rbx,[rax+BYTE] !40 # Get offset to TYPE add_rbx,rax # Get TYPE mov_[rax+BYTE],rbx !40 # Store TYPE mov_rbx,[rax+BYTE] !24 # Get offset to INDIRECT add_rbx,rax # Get INDIRECT mov_[rax+BYTE],rbx !24 # Store INDIRECT mov_rbx,[rax] # Get offset to NEXT cmp_rbx, %0 # If no more types je %fix_types_done # Then we are done add_rbx,rax # Get NEXT mov_[rax],rbx # Store NEXT add_rax, %56 # Go to next type jmp %fix_type :fix_types_done pop_rbx # Restore RBX ret # read_all_tokens function # Receives FILE* in R15 and Token_List* in RAX # Tokenizes all input and returns updated list in RAX # Returns TOKEN in RAX # Uses RAX for C :read_all_tokens mov_[rip+DWORD],rax %Token call %fgetc :read_all_tokens_loop cmp_rax, %-4 # Check for EOF je %read_all_tokens_done # Stop if found call %get_token # Read all tokens jmp %read_all_tokens_loop # Loop :read_all_tokens_done mov_rax,[rip+DWORD] %Token ret # get_token function # Receives INT in RAX and FILE* in R15 # Makes a list of TOKEN_LIST # C and STRING_INDEX are stored in memory, RCX is used for S and RDX is used for current # Returns C in RAX :get_token push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX mov_[rip+DWORD],rax %C # Set C mov_rax, %40 # Malloc CURRENT call %malloc # Get Pointer mov_rdx,rax # Set CURRENT mov_rax, %256 # Malloc the string call %malloc # Get pointer to S mov_rcx,rax # Set S mov_[rdx+BYTE],rcx !16 # CURRENT->S = S :reset mov_[rip+DWORD],rcx %string_index # S[0] mov_rax,[rip+DWORD] %C # Using C call %clear_white_space # Clear WhiteSpace mov_[rip+DWORD],rax %C # Set C cmp_rax, %-4 # Check for EOF je %get_token_abort # if EOF abort cmp_rax, %35 # Check for '#' jne %get_token_alpha # Nope # Deal with # line comments call %purge_macro # Let it handle it mov_[rip+DWORD],rax %C # Set C jmp %reset # Try again :get_token_alpha mov_rax,[rip+DWORD] %C # Send C lea_rbx,[rip+DWORD] %alphas # Get alphanumerics call %In_Set # See if in set cmp_rax, %1 # IF TRUE jne %get_token_symbol # Otherwise # Store keywords mov_rax,[rip+DWORD] %C # Send C call %preserve_keyword # Store mov_[rip+DWORD],rax %C # Set C jmp %get_token_done # Be done with this token :get_token_symbol mov_rax,[rip+DWORD] %C # Send C lea_rbx,[rip+DWORD] %symbols # Get symbols call %In_Set # See if in set cmp_rax, %1 # IF TRUE jne %get_token_strings # Otherwise # Store symbols mov_rax,[rip+DWORD] %C # Send C call %preserve_symbol # Store mov_[rip+DWORD],rax %C # Set C jmp %get_token_done # Be done with this token :get_token_strings mov_rax,[rip+DWORD] %C # Send C lea_rbx,[rip+DWORD] %strings # Get strings call %In_Set # See if in set cmp_rax, %1 # IF TRUE jne %get_token_comment # Otherwise # Store String mov_rax,[rip+DWORD] %C # Send C call %consume_word # Store mov_[rip+DWORD],rax %C # Set C jmp %get_token_done # Be done with this token :get_token_comment mov_rax,[rip+DWORD] %C # Send C cmp_rax, %47 # IF '/' == C jne %get_token_else # Otherwise call %consume_byte # Hope it just is '/' mov_[rip+DWORD],rax %C # Set C cmp_rax, %42 # IF '*' we have '/*' jne %get_token_comment_line # Check for '//' # Deal with /* block comments */ call %fgetc # get next C mov_[rip+DWORD],rax %C # Set C :get_token_comment_block_outer mov_rax,[rip+DWORD] %C # Using C cmp_rax, %47 # IF '/' != C je %get_token_comment_block_done # be done :get_token_comment_block_inner mov_rax,[rip+DWORD] %C # Using C cmp_rax, %42 # IF '*' != C je %get_token_comment_block_iter # jump over # Deal with inner loop call %fgetc # get next C mov_[rip+DWORD],rax %C # Set C jmp %get_token_comment_block_inner # keep going :get_token_comment_block_iter call %fgetc # get next C mov_[rip+DWORD],rax %C # Set C jmp %get_token_comment_block_outer :get_token_comment_block_done call %fgetc # get next C mov_[rip+DWORD],rax %C # Set C jmp %reset # throw away, try again :get_token_comment_line cmp_rax, %47 # IF '/' we have // jne %get_token_done # keep if just '/' # Deal with // line comment call %fgetc # drop to match mov_[rip+DWORD],rax %C # Set C jmp %reset # throw away, try again :get_token_else mov_rax,[rip+DWORD] %C # Send C call %consume_byte mov_[rip+DWORD],rax %C # Set C :get_token_done mov_rax,[rip+DWORD] %Token # TOKEN mov_[rdx+BYTE],rax !8 # CURRENT->PREV = TOKEN mov_[rdx],rax # CURRENT->NEXT = TOKEN mov_[rip+DWORD],rdx %Token # TOKEN = CURRENT :get_token_abort pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX mov_rax,[rip+DWORD] %C # Return C ret # Malloc isn't actually required if the program being built fits in the initial memory # However, it doesn't take much to add it. # Requires R13 to be initialized and RAX to have the number of desired bytes :malloc mov_rdi,r13 # Using the current pointer add_rdi,rax # Request the number of desired bytes mov_rax, %12 # the Syscall # for SYS_BRK push_rcx # Protect rcx push_r11 # Protect r11 syscall # call the Kernel pop_r11 # Restore r11 pop_rcx # Restore rcx mov_rax,r13 # Return pointer mov_r13,rdi # Update pointer ret # clear_white_space function # Receives INT C in RAX and FILE* in R15 # Returns first non-whitespace char in RAX :clear_white_space cmp_rax, %32 # Check for ' ' je %clear_white_space_wipe # wipe it out cmp_rax, %10 # Check for '\n' je %clear_white_space_wipe # wipe it output cmp_rax, %9 # Check for '\t' jne %clear_white_space_done # looks like non-whitespace :clear_white_space_wipe call %fgetc # Read a new byte cmp_rax, %-4 # Check for EOF je %clear_white_space_done # Short circuit jmp %clear_white_space # iterate :clear_white_space_done ret # In_Set function # Receives Char C in RAX and CHAR* in RBX # Returns 1 if true, zero if false in RAX :In_Set push_rbx # Protect RBX push_rcx # Protect RCX :In_Set_loop mov_cl,[rbx] # Read char movzx_rcx,cl # Zero extend it cmp_rax,rcx # See if they match je %In_Set_True # return true cmp_rcx, %0 # Check for NULL je %In_Set_False # return false add_rbx, %1 # s = s + 1 jmp %In_Set_loop # Keep looping :In_Set_True mov_rax, %1 # Set True pop_rcx # Restore RCX pop_rbx # Restore RBX ret :In_Set_False mov_rax, %0 # Set FALSE pop_rcx # Restore RCX pop_rbx # Restore RBX ret :alphas "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" :symbols "<=>|&!-" :strings '22 27 00' # purge_macro function # Receives CH in RAX # Reads chars until Line feed is read # returns line feed :purge_macro call %fgetc # read next char cmp_rax, %10 # Check for '\n' jne %purge_macro # Keep going ret # preserve_keyword function # Receives INT C in RAX # collects all chars in keyword # Returns C in RAX # Uses RCX for INT C :preserve_keyword push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rax # Setup C lea_rbx,[rip+DWORD] %alphas # Concerning ourselves with "abc.." :preserve_keyword_loop call %In_Set # Check if alphanumerics cmp_rax, %1 # IF TRUE jne %preserve_keyword_label # Otherwise check for label mov_rax,rcx # Pass C call %consume_byte # consume that byte mov_rcx,rax # Update C jmp %preserve_keyword_loop # keep looping :preserve_keyword_label mov_rax,rcx # Fix return cmp_rax, %58 # Check for ':' jne %preserve_keyword_done # be done # Fix our goto label call %fixup_label # Fix the label mov_rax, %32 # Return Whitespace :preserve_keyword_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret # preserve_symbol function # Receives INT C in RAX # collects all chars in symbol # Returns C in RAX # Uses RCX for INT C :preserve_symbol push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rax # Setup C lea_rbx,[rip+DWORD] %symbols # Concerning ourselves with "<=>.." :preserve_symbol_loop call %In_Set # Check if symbol cmp_rax, %1 # IF TRUE jne %preserve_symbol_done # Otherwise be done mov_rax,rcx # Pass C call %consume_byte # consume that byte mov_rcx,rax # Update C jmp %preserve_symbol_loop # keep looping :preserve_symbol_done mov_rax,rcx # Fix return pop_rcx # Restore RCX pop_rbx # Restore RBX ret # consume_word function # receives INT C in RAX # returns INT C in RAX # Uses RAX for C, RBX for FREQ and RCX for ESCAPE :consume_word push_rbx # Protect RBX push_rcx # Protect RCX mov_rbx,rax # FREQ = C mov_rcx, %0 # ESCAPE = FALSE :consume_word_loop cmp_rcx, %0 # IF !ESCAPE jne %consume_word_escape # Enable escape cmp_rax, %92 # if '\\' jne %consume_word_iter # keep state mov_rcx, %1 # ESCAPE = TRUE jmp %consume_word_iter # keep going :consume_word_escape mov_rcx, %0 # ESCAPE = FALSE :consume_word_iter call %consume_byte # read next char cmp_rcx, %0 # IF ESCAPE jne %consume_word_loop # keep looping cmp_rax,rbx # IF C != FREQ jne %consume_word_loop # keep going call %fgetc # return next char pop_rcx # Restore RCX pop_rbx # Restore RBX ret # consume_byte function # Receives INT C in RAX # Inserts C into string S, updates String S # Returns Next char in RAX :consume_byte push_rbx # Protect RBX mov_rbx,[rip+DWORD] %string_index # S[0] mov_[rbx],al # S[0] = C add_rbx, %1 # S = S + 1 mov_[rip+DWORD],rbx %string_index # Update S call %fgetc pop_rbx # Restore RBX ret # fixup_label function # Receives S in RCX # prepends ':' to string and returns registers unchanged # Uses RAX for HOLD, RBX for PREV and RCX for S[0] :fixup_label push_rax # Protect RAX push_rbx # Protect RBX push_rcx # Protect RCX mov_rax, %58 # HOLD = ':' mov_rcx,[rdx+BYTE] !16 # HOLD_STRING[0] :fixup_label_loop mov_rbx,rax # PREV = HOLD mov_al,[rcx] # HOLD = HOLD_STRING[I] movzx_rax,al # make useful mov_[rcx],bl # HOLD_STRING[I] = PREV add_rcx, %1 # I = I + 1 cmp_rax, %0 # IF NULL == HOLD jne %fixup_label_loop # Keep looping pop_rcx # Restore RCX pop_rbx # Restore RBX pop_rax # Restore RAX ret # fgetc function # Receives FILE* in R15 # Returns -4 (EOF) or char in RAX :fgetc mov_rax, %-4 # Put EOF in rax push_rax # Assume bad (If nothing read, value will remain EOF) lea_rsi,[rsp] # Get stack address mov_rdi,r15 # Where are we reading from mov_rax, %0 # the syscall number for read push_rdx # Protect RDX mov_rdx, %1 # set the size of chars we want push_rcx # Protect RCX push_r11 # Protect R11 syscall # call the Kernel pop_r11 # Restore R11 pop_rcx # Restore RCX :fgetc_1 pop_rdx # Restore RDX pop_rax # Get either char or EOF cmp_rax, %-4 # Check for EOF je %fgetc_done # Return as is movzx_rax,al # Make it useful :fgetc_done ret # return # Reverse_List function # Receives List in RAX # Returns the list reversed in RAX :Reverse_List push_rbx # Protect RBX push_rcx # Protect RCX mov_rbx,rax # Set HEAD mov_rax, %0 # ROOT = NULL :Reverse_List_Loop cmp_rbx, %0 # WHILE HEAD != NULL je %Reverse_List_Done # Stop otherwise mov_rcx,[rbx] # NEXT = HEAD->NEXT mov_[rbx],rax # HEAD->NEXT = ROOT mov_rax,rbx # ROOT = HEAD mov_rbx,rcx # HEAD = NEXT jmp %Reverse_List_Loop # Keep Going :Reverse_List_Done pop_rcx # Restore RCX pop_rbx # Restore RBX ret # recursive_output function # Receives list in RAX # walks the list and prints the I->S for all nodes backwards # Uses RBX for I :recursive_output push_rbx # Protect RBX push_rcx # Protect RCX cmp_rax, %0 # Check for NULL je %recursive_output_done # Skip the work mov_rbx,rax # I = Head mov_rax,[rbx] # Iterate to next Token call %recursive_output # Recurse mov_rax,[rbx+BYTE] !16 # Using S call %File_Print # Print it :recursive_output_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret # File_Print function # Receives CHAR* in RAX # calls fputc for every non-null char :File_Print push_rbx # Protect RBX push_rcx # Protect RCX mov_rbx,rax # Protect S cmp_rax, %0 # Protect against nulls je %File_Print_Done # Simply don't try to print them :File_Print_Loop mov_al,[rbx] # Read byte movzx_rax,al # zero extend cmp_rax, %0 # Check for NULL je %File_Print_Done # Stop at NULL call %fputc # write it add_rbx, %1 # S = S + 1 jmp %File_Print_Loop # Keep going :File_Print_Done pop_rcx # Restore RCX pop_rbx # Restore RBX ret # fputc function # receives CHAR in RAX and FILE* in R14 # writes char and returns :fputc push_rax # We are writing rax lea_rsi,[rsp] # Get stack address mov_rdi,r14 # Write to target file mov_rax, %1 # the syscall number for write push_rdx # Protect RDX mov_rdx, %1 # set the size of chars we want push_rcx # Protect RCX push_r11 # Protect R11 syscall # call the Kernel pop_r11 # Restore R11 pop_rcx # Restore RCX pop_rdx # Restore RDX pop_rax # Restore stack ret # program function # receives nothing, returns nothing # Uses RAX for type_size :program # The binary initialized the globals to null, so we can skip those steps push_rbx # Protect RBX push_rcx # Protect RCX :new_type mov_rax,[rip+DWORD] %global_token # Using global_token cmp_rax, %0 # Check if NULL je %program_done # Be done if null mov_rbx,[rax+BYTE] !16 # GLOBAL_TOKEN->S lea_rax,[rip+DWORD] %constant # "CONSTANT" call %match # IF GLOBAL_TOKEN->S == "CONSTANT" cmp_rax, %0 # If true jne %program_else # Looks like not a constant # Deal with minimal constant case mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next mov_rax,[rax+BYTE] !16 # global_token->S mov_rbx, %0 # NULL mov_rcx,[rip+DWORD] %global_constant_list # global_constant_list call %sym_declare # Declare that constant mov_[rip+DWORD],rax %global_constant_list # global_constant_list = sym_declare(global_token->s, NULL, global_constant_list); mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx] # global_token->next mov_[rax+BYTE],rbx !32 # global_constant_list->arguments = global_token->next mov_rbx,[rbx] # global_token->next->next mov_[rip+DWORD],rbx %global_token # global_token = global_token->next->next; jmp %new_type # go around again :program_else call %type_name # Figure out the type_size cmp_rax, %0 # IF NULL == type_size je %new_type # it was a new type # Add to global symbol table mov_rbx,rax # put type_size in the right spot mov_rax,[rip+DWORD] %global_token # Using global token mov_rax,[rax+BYTE] !16 # global_token->S mov_rcx,[rip+DWORD] %global_symbol_list # Using global_symbol_list call %sym_declare # Declare symbol mov_[rip+DWORD],rax %global_symbol_list # global_symbol_list = sym_declare(global_token->s, type_size, global_symbol_list); mov_rbx,[rip+DWORD] %global_token # Using global token mov_rbx,[rbx] # global_token->next mov_[rip+DWORD],rbx %global_token # global_token = global_token->next mov_rbx,[rip+DWORD] %global_token # Using global token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %semicolon # ";" call %match # if(match(";", global_token->s)) cmp_rax, %0 # If true jne %program_function # looks like not a match # Deal with the global variable mov_rbx,[rip+DWORD] %globals_list # Using globals_list lea_rax,[rip+DWORD] %program_string_0 # ":GLOBAL_" call %emit # Emit it mov_rbx,rax # update globals_list mov_rax,[rip+DWORD] %global_token # Using global token mov_rax,[rax+BYTE] !8 # global token->prev mov_rax,[rax+BYTE] !16 # global token->prev->s call %emit # Emit it mov_rbx,rax # update globals_list lea_rax,[rip+DWORD] %program_string_1 # "\nNULL\n" call %emit # Emit it mov_[rip+DWORD],rax %globals_list # update globals_list mov_rax,[rip+DWORD] %global_token # Using global token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next jmp %new_type # go around again :program_function mov_rbx,[rip+DWORD] %global_token # Using global token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %open_paren # "(" call %match # if(match(";", global_token->s)) cmp_rax, %0 # If true jne %program_error # Otherwise deal with error case # Deal with function definition call %declare_function # Lets get the parsing rolling jmp %new_type # Keep looping through functions :program_error # Deal with the case of something we don't support :program_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret # Strings needed by the program function :program_string_0 ":GLOBAL_" :program_string_1 " NULL " # declare_function function # Receives nothing and returns nothing # Sets current function and adds it to the global function list :declare_function push_rbx # Protect RBX push_rcx # Protect RCX mov_rax, %0 # Using NULL mov_[rip+DWORD],rax %current_count # current_count = 0 mov_rax,[rip+DWORD] %global_token # Using global token mov_rax,[rax+BYTE] !8 # global token->prev mov_rax,[rax+BYTE] !16 # global token->prev->s mov_rbx, %0 # NULL mov_rcx,[rip+DWORD] %global_function_list # global_function_list call %sym_declare # sym_declare(global_token->prev->s, NULL, global_function_list); mov_[rip+DWORD],rax %function # function = sym_declare(global_token->prev->s, NULL, global_function_list); mov_[rip+DWORD],rax %global_function_list # global_function_list = function call %collect_arguments # collect all of the function arguments mov_rax,[rip+DWORD] %global_token # Using global token mov_rax,[rax+BYTE] !16 # global token->s lea_rbx,[rip+DWORD] %semicolon # ";" call %match # IF global token->s == ";" cmp_rax, %0 # If true jne %declare_function_full # It was a prototype # Deal with prototypes mov_rax,[rip+DWORD] %global_token # Using global token mov_rax,[rax] # global token->next mov_[rip+DWORD],rax %global_token # global token = global token->next jmp %declare_function_done # Move on :declare_function_full # Deal with full function definitions lea_rax,[rip+DWORD] %declare_function_string_0 # "# Defining function " call %emit_out # emit it mov_rax,[rip+DWORD] %function # function mov_rax,[rax+BYTE] !16 # function->s call %emit_out # emit it lea_rax,[rip+DWORD] %declare_function_string_1 # "\n:FUNCTION_" call %emit_out # emit it mov_rax,[rip+DWORD] %function # function mov_rax,[rax+BYTE] !16 # function->s call %emit_out # emit it lea_rax,[rip+DWORD] %declare_function_string_3 # "\n" call %emit_out # emit it call %statement # Recursively get the function pieces mov_rax,[rip+DWORD] %output_list # output mov_rax,[rax+BYTE] !16 # output->s lea_rbx,[rip+DWORD] %declare_function_string_2 # "ret\n" call %match # IF output->s == "ret\n" cmp_rax, %0 # If true we can skip adding it je %declare_function_done # otherwise we need to add it # Add the return to the end of a function lacking a return; lea_rax,[rip+DWORD] %declare_function_string_2 # "ret\n" call %emit_out # emit it :declare_function_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :declare_function_string_0 "# Defining function " :declare_function_string_1 " :FUNCTION_" :declare_function_string_2 "ret " :declare_function_string_3 " " # collect_arguments function # Receives nothing # Returns Nothing # Adds arguments to the function definition # holds struct type* type_size in RCX, then replace with struct token_list* a in RCX when type_size is used :collect_arguments push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next :collect_arguments_loop mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %close_paren # ")" call %match # IF global_token->S == ")" cmp_rax, %0 # we reached the end je %collect_arguments_done # be done # deal with the case of there are arguments call %type_name # Get the type mov_rcx,rax # put type_size safely out of the way mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %close_paren # ")" call %match # IF global_token->S == ")" cmp_rax, %0 # is a foo(int, char,void) case je %collect_arguments_common # deal with commas # Trying second else mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %comma # "," call %match # IF global_token->S == "," cmp_rax, %0 # then deal with the common je %collect_arguments_common # case of commas between arguments # deal with foo(int a, char b) mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+DWORD] !16 # global_token->S mov_rbx,rcx # put type_size in the right place mov_rcx,[rip+DWORD] %function # Using function mov_rcx,[rcx+BYTE] !32 # function->args call %sym_declare # sym_declare(global_token->s, type_size, function->arguments); mov_rcx,rax # put a in a safe place mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !32 # function->args cmp_rax, %0 # IF function->args == NULL jne %collect_arguments_another # otherwise it isn't the first # Deal with the case of first argument in the function mov_rax, %-8 # -8 mov_[rcx+BYTE],rax !32 # a->depth = -8 jmp %collect_arguments_next # get to next :collect_arguments_another # deal with the case of non-first arguments mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !32 # function->args mov_rax,[rax+BYTE] !32 # function->args->depth sub_rax, %8 # function->args->depth - 8 mov_[rcx+BYTE],rax !32 # a->depth = function->args->depth - 8 :collect_arguments_next mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next mov_rax,[rip+DWORD] %function # Using function mov_[rax+BYTE],rcx !32 # function->args = a :collect_arguments_common mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+DWORD] !16 # global_token->S lea_rax,[rip+DWORD] %comma # "," call %match # IF global_token->S == "," cmp_rax, %0 # then deal with the comma jne %collect_arguments_loop # otherwise loop # keep foo(bar(), 1) expressions working mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next jmp %collect_arguments_loop # keep going :collect_arguments_done mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next pop_rcx # Restore RCX pop_rbx # Restore RBX ret # statement function # Receives nothing # Returns nothing # Walks down global_token recursively to collect the contents of the function :statement push_rbx # Protect RBX push_rcx # Protect RCX mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %open_curly_brace # "{" call %match # IF global_token->S == "{" jne %statement_label # otherwise try label # deal with { statement } call %recursive_statement # Statements inside of statements for days jmp %statement_done # Be done :statement_label mov_al,[rbx] # global_token->S[0] movzx_rax,al # make it useful cmp_rax, %58 # IF global_token->S == ':' jne %statement_local # otherwise try locals # deal with labels mov_rax,rbx # put global_token->S in the right spot call %emit_out # emit it lea_rax,[rip+DWORD] %statement_string_0 # Using "\t#C goto label\n" call %emit_out # emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next jmp %statement_done # be done :statement_local mov_rax,rbx # put global_token->S in the right place lea_rbx,[rip+DWORD] %prim_types # pointer to primitive types call %lookup_type # See if found cmp_rax, %0 # IF NULL == lookup_type(global_token->S, prim_types) jne %statement_local_success # Sweet a new local # Second chance mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %struct # "struct" call %match # IF global_token->S == "struct" cmp_rax, %0 # then we are a local jne %statement_if # otherwise try IF :statement_local_success call %collect_local # Grab those locals jmp %statement_done # be done :statement_if lea_rax,[rip+DWORD] %if_string # Using "if" call %match # IF global_token->S == "if" cmp_rax, %0 # then we have an if statement jne %statement_do # otherwise try DO # Deal with IF statement call %process_if # DO IT jmp %statement_done # be done :statement_do lea_rax,[rip+DWORD] %do_string # Using "do" call %match # IF global_token->S == "do" cmp_rax, %0 # then we have a do statement jne %statement_while # otherwise try WHILE # Deal with DO statement call %process_do # DO IT jmp %statement_done # be done :statement_while lea_rax,[rip+DWORD] %while_string # Using "while" call %match # IF global_token->S == "while" cmp_rax, %0 # then we have a while statement jne %statement_for # otherwise try FOR # Deal with WHILE statement call %process_while # DO IT jmp %statement_done # be done :statement_for lea_rax,[rip+DWORD] %for_string # Using "for" call %match # IF global_token->S == "for" cmp_rax, %0 # then we have a for statement jne %statement_asm # otherwise try ASM # Deal with FOR statement call %process_for # DO IT jmp %statement_done # be done :statement_asm lea_rax,[rip+DWORD] %asm_string # Using "asm" call %match # IF global_token->S == "asm" cmp_rax, %0 # then we have an asm statement jne %statement_goto # otherwise try GOTO # Deal with ASM statement call %process_asm # Hit it jmp %statement_done # be done :statement_goto lea_rax,[rip+DWORD] %goto_string # Using "goto" call %match # IF global_token->S == "goto" cmp_rax, %0 # then we have a goto statement jne %statement_return # Otherwise try RETURN # Deal with GOTO statement lea_rax,[rip+DWORD] %statement_string_1 # Using "jmp %" call %emit_out # emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next mov_rax,[rax+BYTE] !16 # global_token->S call %emit_out # emit it lea_rax,[rip+DWORD] %statement_string_2 # Using "\n" call %emit_out # emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next lea_rax,[rip+DWORD] %statement_string_4 # Using "ERROR in statement\nMissing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure it has the required jmp %statement_done # Be done :statement_return lea_rax,[rip+DWORD] %return_string # Using "return" call %match # IF global_token->S == "return" cmp_rax, %0 # then we have a return statement jne %statement_break # Otherwise try BREAK # Deal with RETURN Statement call %return_result # Return anything they want jmp %statement_done # be done :statement_break lea_rax,[rip+DWORD] %break_string # Using "break" call %match # IF global_token->S == "break" cmp_rax, %0 # then we have a break statement jne %statement_continue # Otherwise try CONTINUE # Deal with BREAK statement call %process_break # Lets do some damage jmp %statement_done # be done :statement_continue lea_rax,[rip+DWORD] %continue_string # Using "continue" call %match # IF global_token->S == "continue" cmp_rax, %0 # then we have a continue statement jne %statement_else # Otherwise we are punting to an expression # Deal with CONTINUE statement mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next lea_rax,[rip+DWORD] %statement_string_3 # Using "\n#continue statement\n" call %emit_out # emit it lea_rax,[rip+DWORD] %statement_string_4 # Using "ERROR in statement\nMissing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Don't forget the ";" jmp %statement_done # Be done :statement_else call %expression # Collect expression lea_rax,[rip+DWORD] %statement_string_4 # Using "ERROR in statement\nMissing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # make sure we have it :statement_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :statement_string_0 " #C goto label " :statement_string_1 "jmp %" :statement_string_2 " " :statement_string_3 " #continue statement " :statement_string_4 "ERROR in statement Missing ; " # recursive_statement function # Receives nothing # Returns nothing # Walks the global_token list to build the contents of statements # Uses struct token_list* frame in RCX :recursive_statement push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next mov_rcx,[rip+DWORD] %function # Using function mov_rcx,[rcx+BYTE] !8 # frame = function->locals :recursive_statement_loop mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %close_curly_brace # Using "}" call %match # IF global_token->S == "}" cmp_rax, %0 # Then we are done recursing je %recursive_statement_cleanup # and then we clean up # Deal with the recursive calls call %statement # Deal with another statement jmp %recursive_statement_loop # loop some more :recursive_statement_cleanup mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next lea_rax,[rip+DWORD] %recursive_statement_string_0 # Using "ret\n" mov_rbx,[rip+DWORD] %output_list # Using output mov_rbx,[rbx+BYTE] !16 # output->S call %match # IF output->S == "ret\n" cmp_rax, %0 # Then we can skip the clean up je %recursive_statement_done # and be done # Deal with cleanup mov_rbx,[rip+DWORD] %function # Using function mov_rbx,[rbx+BYTE] !8 # i = function->locals lea_rax,[rip+DWORD] %recursive_statement_string_1 # Using "pop_rbx\t# _recursive_statement_locals\n" :recursive_statement_locals cmp_rbx,rcx # IF frame != i je %recursive_statement_done # Otherwise be done # Lets emit call %emit_out # emit it mov_rbx,[rbx] # i = i->next jmp %recursive_statement_locals # keep going :recursive_statement_done mov_rax,[rip+DWORD] %function # Using function mov_[rax+BYTE],rcx !8 # function->locals = frame pop_rcx # Restore RCX pop_rbx # Restore RBX ret :recursive_statement_string_0 "ret " :recursive_statement_string_1 "pop_rbx # _recursive_statement_locals " # return_result function # Receives nothing # Returns nothing # Cleans up function and generates return # Also handles returning expressions :return_result push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # make it useful cmp_rax, %59 # If global_token->S[0] == ';' je %return_result_cleanup # Go straight to cleanup call %expression # get the expression we are returning :return_result_cleanup lea_rax,[rip+DWORD] %return_result_string_0 # Using "ERROR in return_result\nMISSING ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it mov_rbx,[rip+DWORD] %function # Using function mov_rbx,[rbx+BYTE] !8 # function->locals lea_rax,[rip+DWORD] %return_result_string_1 # Using "pop_rbx\t# _return_result_locals\n" :return_result_locals cmp_rbx, %0 # IF NULL == i je %return_result_done # Be done call %emit_out # Emit out pop mov_rbx,[rbx] # i = i->NEXT jmp %return_result_locals # Keep going :return_result_done lea_rax,[rip+DWORD] %return_result_string_2 # Using "ret\n" call %emit_out # Emit it pop_rcx # Restore RCX pop_rbx # Restore RBX ret :return_result_string_0 "ERROR in return_result MISSING ; " :return_result_string_1 "pop_rbx # _return_result_locals " :return_result_string_2 "ret " # collect_local function # Receives nothing # Returns nothing # Walks global_token list to create function locals # Uses RCX for struct token_list* A :collect_local push_rbx # Protect RBX push_rcx # Protect RCX call %type_name # Get the local's type mov_rbx,rax # Put struct type* type_size in the right place mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_rcx,[rip+DWORD] %function # Using function mov_rcx,[rcx+BYTE] !8 # function->locals call %sym_declare # Declare it mov_rcx,rax # put it away safely # Try for main lea_rax,[rip+DWORD] %main_string # Using "main" mov_rbx,[rip+DWORD] %function # Using function mov_rbx,[rbx+BYTE] !16 # function->S call %match # IF match("main", function->s) cmp_rax, %0 # possible jne %collect_local_fresh # try to see if fresh function # Ok we are in main, now to see if main is fresh mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !8 # function->locals cmp_rax, %0 # IF NULL == function->locals jne %collect_local_fresh # try to see if fresh function # Sweet we are in a fresh main mov_rax, %-40 # We start at -40 mov_[rcx+BYTE],rax !32 # a->DEPTH = -40 jmp %collect_local_common # Go to the commons :collect_local_fresh mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !32 # function->args cmp_rax, %0 # IF NULL == function->args jne %collect_local_first # Otherwise see if first mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !8 # function->locals cmp_rax, %0 # IF NULL == function->locals jne %collect_local_first # Otherwise try first # Sweet we are in a fresh function mov_rax, %-16 # We start at -16 mov_[rcx+BYTE],rax !32 # a->DEPTH = -16 jmp %collect_local_common # Go to the commons :collect_local_first mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !8 # function->locals cmp_rax, %0 # IF NULL == function->locals jne %collect_local_else # Looks like we are just another local # Ok we are the first local mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !32 # function->args mov_rax,[rax+BYTE] !32 # function->args->depth sub_rax, %16 # function->arguments->depth - 16 mov_[rcx+BYTE],rax !32 # a->DEPTH = function->arguments->depth - 16 jmp %collect_local_common # Go to the commons :collect_local_else # Always the last to know mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !8 # function->locals mov_rax,[rax+BYTE] !32 # function->locals->depth sub_rax, %8 # function->locals->depth - 8 mov_[rcx+BYTE],rax !32 # a->DEPTH = function->locals->depth - 8 :collect_local_common mov_rax,[rip+DWORD] %function # Using function mov_[rax+BYTE],rcx !8 # function->locals = a mov_rcx,[rcx+BYTE] !16 # a->S lea_rax,[rip+DWORD] %collect_local_string_0 # Using "# Defining local " call %emit_out # emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S call %emit_out # emit it lea_rax,[rip+DWORD] %collect_local_string_1 # Using "\n" call %emit_out # emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rbx,[rax+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %equal # Using "=" call %match # IF match("=", global_token->s) cmp_rax, %0 # Deal with assignment jne %collect_local_done # Otherwise finish it # Deal with assignment mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT call %expression # Recurse :collect_local_done lea_rax,[rip+DWORD] %collect_local_string_2 # Using "ERROR in collect_local\nMissing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it lea_rax,[rip+DWORD] %collect_local_string_3 # Using "push_rax\t#" call %emit_out # emit it mov_rax,rcx # put A->S where it belongs call %emit_out # emit it lea_rax,[rip+DWORD] %collect_local_string_1 # Using "\n" call %emit_out # emit it pop_rcx # Restore RCX pop_rbx # Restore RBX ret :collect_local_string_0 "# Defining local " :collect_local_string_1 " " :collect_local_string_2 "ERROR in collect_local Missing ; " :collect_local_string_3 "push_rax #" # process_asm function # Receives nothing # Returns nothing # Simply inlines the asm statements # Uses RBX for global_token temp storage :process_asm push_rbx # Protect RBX mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %process_asm_string_0 # Using "ERROR in process_asm\nMISSING (\n" lea_rbx,[rip+DWORD] %open_paren # Using "(" call %require_match # Make sure we have it mov_rbx,[rip+DWORD] %global_token # Using global_token :process_asm_iter mov_rax,[rbx+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %34 # IF global_token->S[0] == '"' jne %process_asm_done # Otherwise be done mov_rax,[rbx+BYTE] !16 # global_token->S add_rax, %1 # global_token->S + 1 call %emit_out # Emit it lea_rax,[rip+DWORD] %process_asm_string_1 # Using "\n" call %emit_out # Emit it mov_rbx,[rbx] # global_token->NEXT mov_[rip+DWORD],rbx %global_token # global_token = global_token->NEXT jmp %process_asm_iter # keep going :process_asm_done lea_rax,[rip+DWORD] %process_asm_string_2 # Using "ERROR in process_asm\nMISSING )\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it lea_rax,[rip+DWORD] %process_asm_string_3 # Using "ERROR in process_asm\nMISSING ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it pop_rbx # Restore RBX ret :process_asm_string_0 "ERROR in process_asm MISSING ( " :process_asm_string_1 " " :process_asm_string_2 "ERROR in process_asm MISSING ) " :process_asm_string_3 "ERROR in process_asm MISSING ; " # process_if function # Receives nothing # Returns Nothing # Increments current_count recurses into expression + statement # Uses RCX for char* NUMBER_STRING :process_if push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %current_count # Using current count mov_rbx,rax # Preparing for update add_rbx, %1 # current_count + 1 mov_[rip+DWORD],rbx %current_count # current_count = current_count + 1 call %numerate_number # convert to string mov_rcx,rax # put NUMBER_STRING in place lea_rax,[rip+DWORD] %process_if_string_0 # Using "# IF_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %process_if_string_1 # Using "ERROR in process_if\nMISSING (\n" lea_rbx,[rip+DWORD] %open_paren # Using "(" call %require_match # Make sure we have it call %expression # Recurse to get the IF(...) part lea_rax,[rip+DWORD] %process_if_string_2 # Using "test_rax,rax\nje %ELSE_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_if_string_3 # Using "ERROR in process_if\nMISSING )\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it call %statement # Recursive to get the IF(){...} part lea_rax,[rip+DWORD] %process_if_string_4 # Using "jmp %_END_IF_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_if_string_5 # Using ":ELSE_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %else_string # Using "else" call %match # IF global_token->S == "else" cmp_rax, %0 # Then we need to collect the else too jne %process_if_done # Otherwise finish up # deal with else statement mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT call %statement # Recurse to get the ELSE {...} part :process_if_done lea_rax,[rip+DWORD] %process_if_string_6 # Using ":_END_IF_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) pop_rcx # Restore RCX pop_rbx # Restore RBX ret :process_if_string_0 "# IF_" :process_if_string_1 "ERROR in process_if MISSING ( " :process_if_string_2 "test_rax,rax je %ELSE_" :process_if_string_3 "ERROR in process_if MISSING ) " :process_if_string_4 "jmp %_END_IF_" :process_if_string_5 ":ELSE_" :process_if_string_6 ":_END_IF_" # save_break_frame microfunction # Overwrites RAX and RBX # Saves break frame on stack # Returns to caller :save_break_frame pop_rbx # Save return Address mov_rax,[rip+DWORD] %break_frame # Get break_frame push_rax # Store as nested_locals mov_rax,[rip+DWORD] %break_target_head # Get break_target_head push_rax # Store as nested_break_head mov_rax,[rip+DWORD] %break_target_func # Get break_target_func push_rax # Store as nested_break_func mov_rax,[rip+DWORD] %break_target_num # Get break_target_num push_rax # Store as nested_break_num push_rbx # Put return back in place ret # Return to caller # restore_break_frame microfunction # Overwrites RAX and RBX # Restores break frame from stack # Returns to caller :restore_break_frame pop_rbx # Save return Address pop_rax # Get nested_break_num mov_[rip+DWORD],rax %break_target_num # Restore break_target_num pop_rax # Get nested_break_func mov_[rip+DWORD],rax %break_target_func # Restore break_target_func pop_rax # Get nested_break_head mov_[rip+DWORD],rax %break_target_head # Restore break_target_head pop_rax # Get nested_locals mov_[rip+DWORD],rax %break_frame # Restore break_frame push_rbx # Put return back in place ret # Return to caller # set_break_frame microfunction # Receives char* head in RAX and char* num in RBX # Overwrites RAX and RBX # Returns to calling function :set_break_frame mov_[rip+DWORD],rax %break_target_head # update break_target_head mov_[rip+DWORD],rbx %break_target_num # update break_target_num mov_rbx,[rip+DWORD] %function # Using function mov_rax,[rbx+BYTE] !8 # function->LOCALS mov_[rip+DWORD],rax %break_frame # break_frame = function->LOCALS mov_rax,[rbx+BYTE] !16 # function->S mov_[rip+DWORD],rax %break_target_func # break_target_func = function->S ret # Return to sender # process_do function # Receives Nothing # Returns Nothing # Increments current_count and leverages save/restore_break_frame pieces # Uses RCX for char* NUMBER_STRING :process_do push_rbx # Protect RBX push_rcx # Protect RCX call %save_break_frame # Save the frame mov_rax,[rip+DWORD] %current_count # Using current count mov_rbx,rax # Preparing for update add_rbx, %1 # current_count + 1 mov_[rip+DWORD],rbx %current_count # current_count = current_count + 1 call %numerate_number # convert to string mov_rcx,rax # put NUMBER_STRING in place lea_rax,[rip+DWORD] %process_do_string_0 # Using "DO_END_" mov_rbx,rcx # Passing NUMBER_STRING call %set_break_frame # Set the frame lea_rax,[rip+DWORD] %process_do_string_1 # Using ":DO_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT call %statement # Do the DO {...} part lea_rax,[rip+DWORD] %process_do_string_2 # Using "ERROR in process_do\nMISSING while\n" lea_rbx,[rip+DWORD] %while_string # Using "while" call %require_match # Make sure we have it lea_rax,[rip+DWORD] %process_do_string_3 # Using "ERROR in process_do\nMISSING (\n" lea_rbx,[rip+DWORD] %open_paren # Using "(" call %require_match # Make sure we have it call %expression # Do the WHILE (...) part lea_rax,[rip+DWORD] %process_do_string_4 # Using "ERROR in process_do\nMISSING )\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it lea_rax,[rip+DWORD] %process_do_string_5 # Using "ERROR in process_do\nMISSING ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it lea_rax,[rip+DWORD] %process_do_string_6 # Using "test_rax,rax\njne %DO_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_do_string_7 # Using ":DO_END_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) call %restore_break_frame # Restore the old break frame pop_rcx # Restore RCX pop_rbx # Restore RBX ret :process_do_string_0 "DO_END_" :process_do_string_1 ":DO_" :process_do_string_2 "ERROR in process_do MISSING while " :process_do_string_3 "ERROR in process_do MISSING ( " :process_do_string_4 "ERROR in process_do MISSING ) " :process_do_string_5 "ERROR in process_do MISSING ; " :process_do_string_6 "test_rax,rax jne %DO_" :process_do_string_7 ":DO_END_" # process_while function # Receives nothing # Returns nothing # Increments current_count and leverages save/restore_break_frame pieces # Uses RCX for char* NUMBER_STRING :process_while push_rbx # Protect RBX push_rcx # Protect RCX call %save_break_frame # Save break_frame mov_rax,[rip+DWORD] %current_count # Using current count mov_rbx,rax # Preparing for update add_rbx, %1 # current_count + 1 mov_[rip+DWORD],rbx %current_count # current_count = current_count + 1 call %numerate_number # convert to string mov_rcx,rax # put NUMBER_STRING in place lea_rax,[rip+DWORD] %process_while_string_0 # Using "END_WHILE_" mov_rbx,rcx # Passing NUMBER_STRING call %set_break_frame # Set it and forget it lea_rax,[rip+DWORD] %process_while_string_1 # Using ":WHILE_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %process_while_string_2 # Using "ERROR in process_while\nMISSING (\n" lea_rbx,[rip+DWORD] %open_paren # Using "(" call %require_match # Make sure we have it call %expression # Deal with the WHILE (...) part lea_rax,[rip+DWORD] %process_while_string_3 # Using "test_rax,rax\nje %END_WHILE_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_while_string_4 # Using "# THEN_while_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_while_string_5 # Using "ERROR in process_while\nMISSING )\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it call %statement # Deal with the {....} part lea_rax,[rip+DWORD] %process_while_string_6 # Using "jmp %WHILE_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_while_string_7 # Using ":END_WHILE_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) call %restore_break_frame # Restore the old break frame pop_rcx # Restore RCX pop_rbx # Restore RBX ret :process_while_string_0 "END_WHILE_" :process_while_string_1 ":WHILE_" :process_while_string_2 "ERROR in process_while MISSING ( " :process_while_string_3 "test_rax,rax je %END_WHILE_" :process_while_string_4 "# THEN_while_" :process_while_string_5 "ERROR in process_while MISSING ) " :process_while_string_6 "jmp %WHILE_" :process_while_string_7 ":END_WHILE_" # process_for function # Receives Nothing # Returns Nothing # Increments current_count and leverages save/restore_break_frame pieces # Uses RCX for char* NUMBER_STRING :process_for push_rbx # Protect RBX push_rcx # Protect RCX call %save_break_frame # Save the frame mov_rax,[rip+DWORD] %current_count # Using current count mov_rbx,rax # Preparing for update add_rbx, %1 # current_count + 1 mov_[rip+DWORD],rbx %current_count # current_count = current_count + 1 call %numerate_number # convert to string mov_rcx,rax # put NUMBER_STRING in place lea_rax,[rip+DWORD] %process_for_string_0 # Using "FOR_END_" mov_rbx,rcx # Passing NUMBER_STRING call %set_break_frame # Set it and forget it lea_rax,[rip+DWORD] %process_for_string_1 # Using "# FOR_initialization_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %process_for_string_2 # Using "ERROR in process_for\nMISSING (\n" lea_rbx,[rip+DWORD] %open_paren # Using "(" call %require_match # Make Sure we have it mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %semicolon # Using ";" call %match # IF global_token->S == ";" cmp_rax, %0 # Then no initializer je %process_for_terminator # And skip getting the expression # Deal with FOR (...; case call %expression # Get the FOR ( ... ; part :process_for_terminator lea_rax,[rip+DWORD] %process_for_string_3 # Using ":FOR_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_for_string_4 # Using "ERROR in process_for\nMISSING ;1\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it call %expression # Get the FOR ( ; ... ; Part lea_rax,[rip+DWORD] %process_for_string_5 # Using "test_rax,rax\nje %FOR_END_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_for_string_6 # Using "jmp %FOR_THEN_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_for_string_7 # Using ":FOR_ITER_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_for_string_8 # Using "ERROR in process_for\nMISSING ;2\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it call %expression # Get the FOR (;;...) part lea_rax,[rip+DWORD] %process_for_string_9 # Using "jmp %FOR_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_for_string_10 # Using ":FOR_THEN_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_for_string_11 # Using "ERROR in process_for\nMISSING )\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it call %statement # Get FOR (;;) {...} part lea_rax,[rip+DWORD] %process_for_string_12 # Using "jmp %FOR_ITER_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Passing NUMBER_STRING call %uniqueID_out # uniqueID_out(function->s, number_string) lea_rax,[rip+DWORD] %process_for_string_13 # Using ":FOR_END_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID_out # uniqueID_out(function->s, number_string) call %restore_break_frame # Restore the old break frame pop_rcx # Restore RCX pop_rbx # Restore RBX ret :process_for_string_0 "FOR_END_" :process_for_string_1 "# FOR_initialization_" :process_for_string_2 "ERROR in process_for MISSING ( " :process_for_string_3 ":FOR_" :process_for_string_4 "ERROR in process_for MISSING ;1 " :process_for_string_5 "test_rax,rax je %FOR_END_" :process_for_string_6 "jmp %FOR_THEN_" :process_for_string_7 ":FOR_ITER_" :process_for_string_8 "ERROR in process_for MISSING ;2 " :process_for_string_9 "jmp %FOR_" :process_for_string_10 ":FOR_THEN_" :process_for_string_11 "ERROR in process_for MISSING ) " :process_for_string_12 "jmp %FOR_ITER_" :process_for_string_13 ":FOR_END_" # process_break function # Receives nothing # Returns nothing # Handles the break out of loops case # Uses RBX for struct token_list* break_frame and RCX for struct token_list* I :process_break push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %break_target_head # Catch big error cmp_rax, %0 # IF(NULL == break_target_head) je %process_break_bad # I'm sorry Mr White but you have stage-3 lung cancer mov_rax,[rip+DWORD] %function # Using function mov_rcx,[rax+BYTE] !8 # I = function->LOCALS mov_rbx,[rip+DWORD] %break_frame # Put break_frame in the right spot lea_rax,[rip+DWORD] %process_break_string_1 # Using "pop_rbx\t# break_cleanup_locals\n" :process_break_iter cmp_rcx, %0 # IF (NULL == I) je %process_break_cleaned # We are done cmp_rbx,rcx # IF I != break_frame je %process_break_cleaned # We are done call %emit_out # Emit it mov_rcx,[rcx] # I = I->NEXT jmp %process_break_iter # Keep looping :process_break_cleaned mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %process_break_string_2 # Using "jmp %" call %emit_out # Emit it mov_rax,[rip+DWORD] %break_target_head # Get what we are in call %emit_out # Emit it mov_rax,[rip+DWORD] %break_target_func # Get what function we are in call %emit_out # Emit it lea_rax,[rip+DWORD] %underline # Using "_" call %emit_out # Emit it mov_rax,[rip+DWORD] %break_target_num # Get dem digits call %emit_out # Emit it lea_rax,[rip+DWORD] %process_break_string_3 # Using "\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %process_break_string_4 # Using "ERROR in break statement\nMissing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it pop_rcx # Restore RCX pop_rbx # Restore RBX ret :process_break_bad # Breaking badly mov_r14, %2 # write to standard error # call %line_error # Write useful debug info mov_rax,rcx # put S in the right place call %File_Print # print it lea_rax,[rip+DWORD] %process_break_string_0 # Ending string call %File_Print # print it jmp %Exit_Failure # Abort Hard :process_break_string_0 "Not inside of a loop or case statement" :process_break_string_1 "pop_rbx # break_cleanup_locals " :process_break_string_2 "jmp %" :process_break_string_3 " " :process_break_string_4 "ERROR in break statement Missing ; " # expression function # Receives Nothing # Returns Nothing # Walks global_token and updates output_list # Uses RAX and RBX for match and RCX for char* store :expression push_rbx # Protect RBX push_rcx # Protect RCX call %bitwise_expr # Collect bitwise expressions mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %equal # "=" call %match # IF global_token->S == "=" cmp_rax, %0 # We have to deal with assignment jne %expression_done # Looks like nope # Deal with possible assignment lea_rcx,[rip+DWORD] %expression_string_1 # Assume "mov_[rbx],al\n" by default mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !8 # global_token->PREV mov_rbx,[rbx+BYTE] !16 # global_token->PREV->S lea_rax,[rip+DWORD] %close_bracket # Using "]" call %match # IF global_token->S == "]" cmp_rax, %0 # Then we might have a char jne %expression_int # Otherwise INT mov_rbx,[rip+DWORD] %current_target # Using current_target mov_rbx,[rbx+BYTE] !48 # current_target->NAME lea_rax,[rip+DWORD] %type_char_indirect_name # Using "char*" call %match # Intentional inefficiency because I feel like it cmp_rax, %0 # IF current_target->NAME == "char*" jne %expression_int # Do char anyway jmp %expression_common # Looks like we have to use "mov_[rbx],al\n" :expression_int lea_rcx,[rip+DWORD] %expression_string_0 # Use "mov_[rbx],rax\n" :expression_common lea_rax,[rip+DWORD] %expression # Passing expression call %common_recursion # Recurse mov_rax,rcx # Using Store call %emit_out # Emit it mov_rax, %0 # Using NULL mov_[rip+DWORD],rax %current_target # current_target = NULL :expression_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :expression_string_0 "mov_[rbx],rax " :expression_string_1 "mov_[rbx],al " # bitwise_expr function # Receives nothing # Returns nothing # Walks global_token list and updates output list # Just calls other functions :bitwise_expr call %relational_expr # Walk up the tree call %bitwise_expr_stub # Let general recursion do the work ret # bitwise_expr_stub function # Receives nothing # Returns Nothing # Just calls general_recursion a bunch # Uses RAX, RBX, RCX and RDX for passing constants to general recursion :bitwise_expr_stub push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX lea_rax,[rip+DWORD] %relational_expr # Using relational_expr lea_rbx,[rip+DWORD] %bitwise_expr_stub_string_0 # Using "and_rax,rbx\n" lea_rcx,[rip+DWORD] %bitwise_and # Using "&" lea_rdx,[rip+DWORD] %bitwise_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %relational_expr # Using relational_expr lea_rbx,[rip+DWORD] %bitwise_expr_stub_string_0 # Using "and_rax,rbx\n" lea_rcx,[rip+DWORD] %logical_and # Using "&&" lea_rdx,[rip+DWORD] %bitwise_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %relational_expr # Using relational_expr lea_rbx,[rip+DWORD] %bitwise_expr_stub_string_1 # Using "or_rax,rbx\n" lea_rcx,[rip+DWORD] %bitwise_or # Using "|" lea_rdx,[rip+DWORD] %bitwise_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %relational_expr # Using relational_expr lea_rbx,[rip+DWORD] %bitwise_expr_stub_string_1 # Using "or_rax,rbx\n" lea_rcx,[rip+DWORD] %logical_or # Using "||" lea_rdx,[rip+DWORD] %bitwise_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %relational_expr # Using relational_expr lea_rbx,[rip+DWORD] %bitwise_expr_stub_string_2 # Using "xor_rax,rbx\n" lea_rcx,[rip+DWORD] %bitwise_xor # Using "^" lea_rdx,[rip+DWORD] %bitwise_expr_stub # And recurse call %general_recursion # Hit it pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :bitwise_expr_stub_string_0 "and_rax,rbx " :bitwise_expr_stub_string_1 "or_rax,rbx " :bitwise_expr_stub_string_2 "xor_rax,rbx " # relational_expr function # Receives nothing # Returns Nothing # Walks global_token list and updates output list # just calls other function :relational_expr call %additive_expr # Walk up the tree call %relational_expr_stub # Recurse ret # relational_expr_stub function # Receives nothing # Returns Nothing # Just calls general_recursion a bunch # Uses RAX, RBX, RCX and RDX for passing constants to general recursion :relational_expr_stub push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX lea_rax,[rip+DWORD] %additive_expr # Using additive_expr lea_rbx,[rip+DWORD] %relational_expr_stub_string_0 # Using "cmp_rbx,rax\nsetl_al\nmovzx_rax,al\n" lea_rcx,[rip+DWORD] %less_than_string # Using "<" lea_rdx,[rip+DWORD] %relational_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %additive_expr # Using additive_expr lea_rbx,[rip+DWORD] %relational_expr_stub_string_1 # Using "cmp_rbx,rax\nsetle_al\nmovzx_rax,al\n" lea_rcx,[rip+DWORD] %less_than_equal_string # Using "<=" lea_rdx,[rip+DWORD] %relational_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %additive_expr # Using additive_expr lea_rbx,[rip+DWORD] %relational_expr_stub_string_2 # Using "cmp_rbx,rax\nsetge_al\nmovzx_rax,al\n" lea_rcx,[rip+DWORD] %greater_than_equal_string # Using ">=" lea_rdx,[rip+DWORD] %relational_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %additive_expr # Using additive_expr lea_rbx,[rip+DWORD] %relational_expr_stub_string_3 # Using "cmp_rbx,rax\nsetg_al\nmovzx_rax,al\n" lea_rcx,[rip+DWORD] %greater_than_string # Using ">" lea_rdx,[rip+DWORD] %relational_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %additive_expr # Using additive_expr lea_rbx,[rip+DWORD] %relational_expr_stub_string_4 # Using "cmp_rbx,rax\nsete_al\nmovzx_rax,al\n" lea_rcx,[rip+DWORD] %equal_to_string # Using "==" lea_rdx,[rip+DWORD] %relational_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %additive_expr # Using additive_expr lea_rbx,[rip+DWORD] %relational_expr_stub_string_5 # Using "cmp_rbx,rax\nsetne_al\nmovzx_rax,al\n" lea_rcx,[rip+DWORD] %not_equal_string # Using "!=" lea_rdx,[rip+DWORD] %relational_expr_stub # And recurse call %general_recursion # Hit it pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :relational_expr_stub_string_0 "cmp_rbx,rax setl_al movzx_rax,al " :relational_expr_stub_string_1 "cmp_rbx,rax setle_al movzx_rax,al " :relational_expr_stub_string_2 "cmp_rbx,rax setge_al movzx_rax,al " :relational_expr_stub_string_3 "cmp_rbx,rax setg_al movzx_rax,al " :relational_expr_stub_string_4 "cmp_rbx,rax sete_al movzx_rax,al " :relational_expr_stub_string_5 "cmp_rbx,rax setne_al movzx_rax,al " # additive_expr function # Receives nothing # Returns Nothing # Walks global_token list and updates output list # just calls other function :additive_expr call %postfix_expr # Walk up the tree call %additive_expr_stub # Recurse ret # additive_expr_stub function # Receives nothing # Returns Nothing # Just calls general_recursion a bunch # Uses RAX, RBX, RCX and RDX for passing constants to general recursion :additive_expr_stub push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX lea_rax,[rip+DWORD] %postfix_expr # Using postfix_expr lea_rbx,[rip+DWORD] %additive_expr_stub_string_0 # Using "add_rax,rbx\n" lea_rcx,[rip+DWORD] %plus_string # Using "+" lea_rdx,[rip+DWORD] %additive_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %postfix_expr # Using postfix_expr lea_rbx,[rip+DWORD] %additive_expr_stub_string_1 # Using "sub_rbx,rax\nmov_rax,rbx\n" lea_rcx,[rip+DWORD] %minus_string # Using "-" lea_rdx,[rip+DWORD] %additive_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %postfix_expr # Using postfix_expr lea_rbx,[rip+DWORD] %additive_expr_stub_string_2 # Using "mul_rbx\n" lea_rcx,[rip+DWORD] %multiply_string # Using "*" lea_rdx,[rip+DWORD] %additive_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %postfix_expr # Using postfix_expr lea_rbx,[rip+DWORD] %additive_expr_stub_string_3 # Using "xchg_rbx,rax\nmov_rdx, %0\ndiv_rbx\n" lea_rcx,[rip+DWORD] %divide_string # Using "/" lea_rdx,[rip+DWORD] %additive_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %postfix_expr # Using postfix_expr lea_rbx,[rip+DWORD] %additive_expr_stub_string_4 # Using "xchg_rbx,rax\nmov_rdx, %0\ndiv_rbx\nmov_rax,rdx\n" lea_rcx,[rip+DWORD] %modulus_string # Using "%" lea_rdx,[rip+DWORD] %additive_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %postfix_expr # Using postfix_expr lea_rbx,[rip+DWORD] %additive_expr_stub_string_5 # Using "mov_rcx,rax\nmov_rax,rbx\nsal_rax,cl\n" lea_rcx,[rip+DWORD] %left_shift_string # Using "<<" lea_rdx,[rip+DWORD] %additive_expr_stub # And recurse call %general_recursion # Hit it lea_rax,[rip+DWORD] %postfix_expr # Using postfix_expr lea_rbx,[rip+DWORD] %additive_expr_stub_string_6 # Using "mov_rcx,rax\nmov_rax,rbx\nsar_rax,cl\n" lea_rcx,[rip+DWORD] %right_shift_string # Using ">>" lea_rdx,[rip+DWORD] %additive_expr_stub # And recurse call %general_recursion # Hit it pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :additive_expr_stub_string_0 "add_rax,rbx " :additive_expr_stub_string_1 "sub_rbx,rax mov_rax,rbx " :additive_expr_stub_string_2 "mul_rbx " :additive_expr_stub_string_3 "xchg_rbx,rax mov_rdx, %0 div_rbx " :additive_expr_stub_string_4 "xchg_rbx,rax mov_rdx, %0 div_rbx mov_rax,rdx " :additive_expr_stub_string_5 "mov_rcx,rax mov_rax,rbx sal_rax,cl " :additive_expr_stub_string_6 "mov_rcx,rax mov_rax,rbx sar_rax,cl " # postfix_expr function # Receives nothing # Returns Nothing # Walks global_token list and updates output list # just calls other function :postfix_expr call %primary_expr # Walk up the tree call %postfix_expr_stub # Recurse ret # postfix_expr_stub function # Receives nothing # Returns Nothing # Checks for "[" and "->" and deals with them otherwise does nothing # Uses RAX, RBX, RCX and RDX for passing constants to general recursion :postfix_expr_stub push_rbx # Protect RBX mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %open_bracket # Using "[" call %match # IF global_token->S == "[" cmp_rax, %0 # then we have an array jne %postfix_expr_stub_arrow # Otherwise try arrow # Deal with array call %postfix_expr_array # Get it call %postfix_expr_stub # Recurse :postfix_expr_stub_arrow lea_rax,[rip+DWORD] %arrow_string # Using "->" call %match # IF global_token->S == "->" cmp_rax, %0 # Then we need to deal with struct offsets jne %postfix_expr_stub_done # Otherwise be done # Deal with arrow call %postfix_expr_arrow # Get it call %postfix_expr_stub # Recurse :postfix_expr_stub_done pop_rbx # Restore RBX ret # unary_expr_sizeof function # Receives nothing # Returns nothing # Uses RCX for A->SIZE :unary_expr_sizeof push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %unary_expr_sizeof_string_0 # Using "ERROR in unary_expr\nMissing (\n" lea_rbx,[rip+DWORD] %open_paren # Using "(" call %require_match # Make sure we have it call %type_name # Get the type mov_rcx,[rax+BYTE] !8 # Set A->TYPE lea_rax,[rip+DWORD] %unary_expr_sizeof_string_1 # Using "ERROR in unary_expr\nMissing )\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it lea_rax,[rip+DWORD] %unary_expr_sizeof_string_2 # Using "mov_rax, %" call %emit_out # Emit it mov_rax,rcx # Put A->SIZE in the right place call %numerate_number # Turn into string call %emit_out # Emit it lea_rax,[rip+DWORD] %unary_expr_sizeof_string_3 # Using "\n" call %emit_out # Emit it pop_rcx # Restore RCX pop_rbx # Restore RBX ret :unary_expr_sizeof_string_0 "ERROR in unary_expr Missing ( " :unary_expr_sizeof_string_1 "ERROR in unary_expr Missing ) " :unary_expr_sizeof_string_2 "mov_rax, %" :unary_expr_sizeof_string_3 " " # postfix_expr_array function # Receives Nothing # Returns Nothing # Uses RBX for struct type* ARRAY and RCX for char* ASSIGN :postfix_expr_array push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %current_target # ARRAY = current_target push_rax # Protect it lea_rax,[rip+DWORD] %expression # Using expression call %common_recursion # Recurse pop_rbx # Restore array mov_[rip+DWORD],rbx %current_target # current_target = ARRAY lea_rcx,[rip+DWORD] %postfix_expr_array_string_0 # ASSIGN = "mov_rax,[rax]\n" lea_rax,[rip+DWORD] %type_char_indirect_name # Using "char*" mov_rbx,[rbx+BYTE] !48 # current_target->NAME call %match # IF current_target->NAME == "char*" cmp_rax, %0 # load a byte jne %postfix_expr_array_large # Otherwise adjust # Deal with loading byte lea_rcx,[rip+DWORD] %postfix_expr_array_string_1 # ASSIGN = "movsx_rax,BYTE_PTR_[rax]\n" jmp %postfix_expr_array_common # Do the next bit :postfix_expr_array_large # deal with arrays made of things other than chars lea_rax,[rip+DWORD] %postfix_expr_array_string_2 # Using "sal_rax, !" call %emit_out # Emit it mov_rax,[rip+DWORD] %current_target # Using current_target mov_rax,[rax+BYTE] !24 # current_target->INDIRECT mov_rax,[rax+BYTE] !8 # current_target->INDIRECT->SIZE call %ceil_log2 # ceil_log2(current_target->indirect->size) call %numerate_number # numerate_number(ceil_log2(current_target->indirect->size)) call %emit_out # Emit it lea_rax,[rip+DWORD] %postfix_expr_array_string_3 # Using "\n" call %emit_out # Emit it :postfix_expr_array_common lea_rax,[rip+DWORD] %postfix_expr_array_string_4 # Using "add_rax,rbx\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %postfix_expr_array_string_5 # Using "ERROR in postfix_expr\nMissing ]\n" lea_rbx,[rip+DWORD] %close_bracket # Using "]" call %require_match # Make sure we have it mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %equal # Using "=" call %match # IF global_token->S == "=" cmp_rax, %0 # We need to preserve address jne %postfix_expr_array_done # Otherwise be done # Clearing out assign lea_rcx,[rip+DWORD] %postfix_expr_array_string_6 # ASSIGN = "" :postfix_expr_array_done mov_rax,rcx # Using ASSIGN call %emit_out # Emit it pop_rcx # Restore RCX pop_rbx # Restore RBX ret :postfix_expr_array_string_0 "mov_rax,[rax] " :postfix_expr_array_string_1 "movsx_rax,BYTE_PTR_[rax] " :postfix_expr_array_string_2 "sal_rax, !" :postfix_expr_array_string_3 " " :postfix_expr_array_string_4 "add_rax,rbx " :postfix_expr_array_string_5 "ERROR in postfix_expr Missing ] " :postfix_expr_array_string_6 '00' # ceil_log2 function # Receives int a in RAX # Performs log2 on A and # Returns result in RAX # Uses RBX for INT A and RCX for INT RESULT :ceil_log2 push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx, %0 # RESULT = 0 mov_rbx,rax # put A in right place sub_rax, %1 # (A - 1) and_rax,rbx # A & (A - 1) cmp_rax, %0 # IF 0 == (A & (A - 1)) jne %ceil_log2_iter # Starting from -1 mov_rcx, %-1 # RESULT = -1 :ceil_log2_iter cmp_rbx, %0 # IF A > 0 jle %ceil_log2_done # Otherwise be done add_rcx, %1 # RESULT = RESULT + 1 shr_rbx # A = A >> 1 jmp %ceil_log2_iter # Keep looping :ceil_log2_done mov_rax,rcx # Return RESULT pop_rcx # Restore RCX pop_rbx # Restore RBX ret # postfix_expr_arrow function # Receives nothing # Returns nothing # Emits a bunch and updates current_target # Uses RBX for struct type* I :postfix_expr_arrow push_rbx # Protect RBX lea_rax,[rip+DWORD] %postfix_expr_arrow_string_0 # Using "# looking up offset\n" call %emit_out # Emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rbx,[rax+BYTE] !16 # Using global_token->S mov_rax,[rip+DWORD] %current_target # Using current_target call %lookup_member # lookup_member(current_target, global_token->s) mov_rbx,rax # struct type* I = lookup_member(current_target, global_token->s) mov_rax,[rax+BYTE] !40 # I->TYPE mov_[rip+DWORD],rax %current_target # current_target = I->TYPE mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rax,[rbx+BYTE] !16 # I->OFFSET cmp_rax, %0 # IF 0 != I->OFFSET je %postfix_expr_arrow_first # Then we don't need to do an offset # Deal with needing an offset lea_rax,[rip+DWORD] %postfix_expr_arrow_string_1 # Using "# -> offset calculation\nmov_rbx, %" call %emit_out # Emit it mov_rax,[rbx+BYTE] !16 # I->OFFSET call %numerate_number # Convert to string call %emit_out # Emit it lea_rax,[rip+DWORD] %postfix_expr_arrow_string_2 # Using "\nadd_rax,rbx\n" call %emit_out # Emit it :postfix_expr_arrow_first mov_rax,[rbx+BYTE] !8 # I->SIZE cmp_rax, %4 # IF I->SIZE >= 4 jl %postfix_expr_arrow_done # Otherwise be done # Last chance for load mov_rax,[rip+DWORD] %global_token # Using global_token mov_rbx,[rax+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %equal # Using "=" call %match # IF global_token->S == "=" cmp_rax, %0 # Then we have assignment and should not load je %postfix_expr_arrow_done # Be done # Deal with load case lea_rax,[rip+DWORD] %postfix_expr_arrow_string_3 # Using "mov_rax,[rax]\n" call %emit_out # Emit it :postfix_expr_arrow_done pop_rbx # Restore RBX ret :postfix_expr_arrow_string_0 "# looking up offset " :postfix_expr_arrow_string_1 "# -> offset calculation mov_rbx, %" :postfix_expr_arrow_string_2 " add_rax,rbx " :postfix_expr_arrow_string_3 "mov_rax,[rax] " # primary_expr function # Receives nothing # Returns nothing :primary_expr push_rbx # Protect RBX mov_rax,[rip+DWORD] %global_token # Using global_token mov_rbx,[rax+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %sizeof_string # Using "sizeof" call %match # See if match cmp_rax, %0 # IF match jne %primary_expr_neg # Otherwise try negatives # Deal with sizeof call %unary_expr_sizeof # Lets do this jmp %primary_expr_done # Be done :primary_expr_neg mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %45 # IF global_token->S[0] == "-" jne %primary_expr_not # Otherwise try logical NOT # Deal with negative numbers lea_rax,[rip+DWORD] %primary_expr_string_0 # Using "mov_rax, %0\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %postfix_expr # Passing postfix_expr call %common_recursion # Get what it is notting lea_rax,[rip+DWORD] %primary_expr_string_1 # Using "sub_rbx,rax\nmov_rax,rbx\n" call %emit_out # Emit it jmp %primary_expr_done # Be done :primary_expr_not mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %33 # IF global_token->S[0] == "!" jne %primary_expr_bin # Otherwise try '~' # Deal with logical not lea_rax,[rip+DWORD] %primary_expr_string_2 # Using "mov_rax, %1\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %postfix_expr # Passing postfix_expr call %common_recursion # Get what it is notting lea_rax,[rip+DWORD] %primary_expr_string_3 # Using "xor_rax,rbx\n" call %emit_out # Emit it jmp %primary_expr_done # Be done :primary_expr_bin mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %126 # IF global_token->S[0] == "~" jne %primary_expr_paren # Otherwise try paren # Deal with binary NOT lea_rax,[rip+DWORD] %postfix_expr # Passing postfix_expr call %common_recursion # Get what it is notting lea_rax,[rip+DWORD] %primary_expr_string_4 # Using "not_rax\n" call %emit_out # Emit it jmp %primary_expr_done # Be done :primary_expr_paren mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %40 # IF global_token->S[0] == "(" jne %primary_expr_ch # Otherwise try char # deal with nesting mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT call %expression # Lets recurse lea_rax,[rip+DWORD] %primary_expr_string_5 # Using "Error in Primary expression\nDidn't get )\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it jmp %primary_expr_done # Be done :primary_expr_ch mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %39 # Using "'" jne %primary_expr_str # Otherwise try string # Deal with chars call %primary_expr_char # Handle that char jmp %primary_expr_done # Be done :primary_expr_str mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %34 # Using '"' jne %primary_expr_var # Otherwise try a variable # Deal with strings call %primary_expr_string # Handle that string jmp %primary_expr_done # Be done :primary_expr_var mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful lea_rbx,[rip+DWORD] %primary_expr_string_6 # Using "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" call %In_Set # See if we have a match cmp_rax, %1 # IF match jne %primary_expr_num # otherwise try number # Deal with variables call %primary_expr_variable # Deal with variable jmp %primary_expr_done # Be done :primary_expr_num mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful lea_rbx,[rip+DWORD] %primary_expr_string_7 # Using "0123456789" call %In_Set # See if we have a match cmp_rax, %1 # IF match jne %primary_expr_fail # otherwise we failed hard # Deal with numbers call %primary_expr_number # Collect the number jmp %primary_expr_done # Be done :primary_expr_fail # looks like we hit bad input # abort before it gets bad call %primary_expr_failure # No match means failure :primary_expr_done pop_rbx # Restore RBX ret :primary_expr_string_0 "mov_rax, %0 " :primary_expr_string_1 "sub_rbx,rax mov_rax,rbx " :primary_expr_string_2 "mov_rax, %1 " :primary_expr_string_3 "xor_rax,rbx " :primary_expr_string_4 "not_rax " :primary_expr_string_5 "Error in Primary expression Didn't get ) " :primary_expr_string_6 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" :primary_expr_string_7 "0123456789" # primary_expr_variable function # Receives nothing # Returns nothing # Walks global and updates output # Uses RAX for struct token_list* a and RCX for char* S :primary_expr_variable push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rip+DWORD] %global_token # Using global_token mov_rcx,[rax+BYTE] !16 # S = global_token->S mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rax,rcx # Using S mov_rbx,[rip+DWORD] %global_constant_list # Using global_constant_list call %sym_lookup # sym_lookup(s, global_constant_list) cmp_rax, %0 # IF NULL == sym_lookup(s, global_constant_list) je %primary_expr_variable_local # Try locals next # Deal with constant load mov_rbx,[rax+BYTE] !32 # a->ARGS lea_rax,[rip+DWORD] %primary_expr_variable_string_2 # Using "mov_rax, %" call %emit_out # Emit it mov_rax,[rbx+BYTE] !16 # a->ARGS->S call %emit_out # Emit it lea_rax,[rip+DWORD] %primary_expr_variable_string_1 # Using "\n" call %emit_out # Emit it jmp %primary_expr_variable_done # Be done :primary_expr_variable_local mov_rax,rcx # Using S mov_rbx,[rip+DWORD] %function # Using function mov_rbx,[rbx+BYTE] !8 # function->locals call %sym_lookup # sym_lookup(s, function->locals) cmp_rax, %0 # IF NULL == sym_lookup(s, function->locals) je %primary_expr_variable_arguments # try arguments next # Deal with local load call %variable_load # Collect it jmp %primary_expr_variable_done # Be done :primary_expr_variable_arguments mov_rax,rcx # Using S mov_rbx,[rip+DWORD] %function # Using function mov_rbx,[rbx+BYTE] !32 # function->args call %sym_lookup # sym_lookup(s, function->args) cmp_rax, %0 # IF NULL == sym_lookup(s, function->args) je %primary_expr_variable_function # try functions next # Deal with argument load call %variable_load # Collect it jmp %primary_expr_variable_done # Be done :primary_expr_variable_function mov_rax,rcx # Using S mov_rbx,[rip+DWORD] %global_function_list # Using global_function_list call %sym_lookup # sym_lookup(s, global_function_list) cmp_rax, %0 # IF NULL == sym_lookup(s, global_function_list) je %primary_expr_variable_global # try globals next # Deal with functions call %function_load # Deal with the function jmp %primary_expr_variable_done # Be done :primary_expr_variable_global mov_rax,rcx # Using S mov_rbx,[rip+DWORD] %global_symbol_list # Using global_symbol_list call %sym_lookup # sym_lookup(s, global_symbol_list) cmp_rax, %0 # IF NULL == sym_lookup(s, global_symbol_list) je %primary_expr_variable_error # Give up # Deal with globals call %global_load # Collect that global jmp %primary_expr_variable_done # Be done :primary_expr_variable_error mov_r14, %2 # write to standard error # call %line_error # Write useful debug info mov_rax,rcx # put S in the right place call %File_Print # print it lea_rax,[rip+DWORD] %primary_expr_variable_string_0 # Ending string call %File_Print # print it jmp %Exit_Failure # Abort Hard :primary_expr_variable_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :primary_expr_variable_string_0 " is not a defined symbol " :primary_expr_variable_string_1 " " :primary_expr_variable_string_2 "mov_rax, %" # function_call function # Receives char* S in RAX and int BOOL in RBX # Builds stack frames before and tears them down after function calls # Uses RCX for char* S, RDX for int BOOL, RSI for PASSED :function_call push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX push_rsi # Protect RSI mov_rcx,rax # Put S in place mov_rdx,rbx # Put BOOL in place mov_rsi, %0 # PASSED = 0 lea_rax,[rip+DWORD] %function_call_string_0 # Using "ERROR in process_expression_list\nNo ( was found\n" lea_rbx,[rip+DWORD] %open_paren # Using "(" call %require_match # Make sure we have it lea_rax,[rip+DWORD] %function_call_string_1 # Using "push_rdi\t# Prevent overwriting in recursion\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_2 # Using "push_rbp\t# Protect the old base pointer\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_3 # Using "mov_rdi,rsp\t# Copy new base pointer\n" call %emit_out # Emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %41 # IF global_token->S[0] == ")" je %function_call_gen_done # Then no arguments to send # looks like we have arguments to collect call %expression # Collect the argument lea_rax,[rip+DWORD] %function_call_string_4 # Using "push_rax\t#_process_expression1\n" call %emit_out # Emit it mov_rsi, %1 # PASSED = 1 :function_call_gen_iter mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %44 # IF global_token->S[0] == "," jne %function_call_gen_done # Otherwise we are done mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT call %expression # Collect the argument lea_rax,[rip+DWORD] %function_call_string_5 # Using "push_rax\t#_process_expression2\n" call %emit_out # Emit it add_rsi, %1 # PASSED = PASSED + 1 jmp %function_call_gen_iter # Keep trying :function_call_gen_done # All is collected lea_rax,[rip+DWORD] %function_call_string_6 # Using "ERROR in process_expression_list\nNo ) was found\n" lea_rbx,[rip+DWORD] %close_paren # Using ")" call %require_match # Make sure we have it cmp_rdx, %0 # IF(BOOL == TRUE) jne %function_call_static # Otherwise it is a static call # Deal with a passed function pointer lea_rax,[rip+DWORD] %function_call_string_7 # Using "lea_rax,[rbp+DWORD] %" call %emit_out # Emit it mov_rax,rcx # Using S call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_8 # Using "\nmov_rax,[rax]\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_9 # Using "mov_rbp,rdi\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_10 # Using "call_rax\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_13 # Using "pop_rbx\t# _process_expression_locals\n" jmp %function_call_cleanup # Clean up :function_call_static # Deal with fixed function name lea_rax,[rip+DWORD] %function_call_string_9 # Using "mov_rbp,rdi\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_11 # Using "call %FUNCTION_" call %emit_out # Emit it mov_rax,rcx # Using S call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_12 # Using "\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_13 # Using "pop_rbx\t# _process_expression_locals\n" :function_call_cleanup cmp_rsi, %0 # IF PASSED > 0 jle %function_call_done # Otherwise be done # The desired string is already in RAX call %emit_out # Emit it sub_rsi, %1 # PASSED = PASSED - 1 jmp %function_call_cleanup # Keep going :function_call_done lea_rax,[rip+DWORD] %function_call_string_14 # Using "POP_rbp\t# Restore old base pointer\n" call %emit_out # Emit it lea_rax,[rip+DWORD] %function_call_string_15 # Using "POP_rdi\t# Prevent overwrite\n" call %emit_out # Emit it pop_rsi # Restore RSI pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :function_call_string_0 "ERROR in process_expression_list No ( was found " :function_call_string_1 "push_rdi # Prevent overwriting in recursion " :function_call_string_2 "push_rbp # Protect the old base pointer " :function_call_string_3 "mov_rdi,rsp # Copy new base pointer " :function_call_string_4 "push_rax #_process_expression1 " :function_call_string_5 "push_rax #_process_expression2 " :function_call_string_6 "ERROR in process_expression_list No ) was found " :function_call_string_7 "lea_rax,[rbp+DWORD] %" :function_call_string_8 " mov_rax,[rax] " :function_call_string_9 "mov_rbp,rdi " :function_call_string_10 "call_rax " :function_call_string_11 "call %FUNCTION_" :function_call_string_12 " " :function_call_string_13 "pop_rbx # _process_expression_locals " :function_call_string_14 "pop_rbp # Restore old base pointer " :function_call_string_15 "pop_rdi # Prevent overwrite " # variable_load function # Receives struct token_list* A in RAX # Returns nothing # Updates output and current_target # Uses RCX for A :variable_load push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rax # Protect A mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %open_paren # Using "(" call %match # IF global_token->S == "(" cmp_rax, %0 # Then it might be a function jne %variable_load_regular # Otherwise it is regular mov_rbx,[rcx+BYTE] !24 # A->TYPE mov_rbx,[rbx+BYTE] !48 # A->TYPE->NAME lea_rax,[rip+DWORD] %type_function_name # Using "FUNCTION" call %match # IF A->TYPE->NAME == "FUNCTION" cmp_rax, %0 # Then it must be a function jne %variable_load_regular # otherwise just another regular # deal with function mov_rax,[rcx+BYTE] !32 # A->DEPTH call %numerate_number # Convert to string mov_rbx, %0 # pass 0 for true call %function_call # Create the function call jmp %variable_load_done # Be done :variable_load_regular mov_rax,[rcx+BYTE] !24 # A->TYPE mov_[rip+DWORD],rax %current_target # current_target = A->TYPE lea_rax,[rip+DWORD] %variable_load_string_0 # Using "lea_rax,[rbp+DWORD] %" call %emit_out # Emit it mov_rax,[rcx+BYTE] !32 # A->DEPTH call %numerate_number # Convert to string call %emit_out # Emit it lea_rax,[rip+DWORD] %variable_load_string_1 # Using "\n" call %emit_out # Emit it # Check for special case of assignment mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %equal # Using "=" call %match # IF global_token->S == "=" cmp_rax, %0 # Then we skip loading je %variable_load_done # And be done # Deal with common case lea_rax,[rip+DWORD] %variable_load_string_2 # Using "mov_rax,[rax]\n" call %emit_out # Emit it :variable_load_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :variable_load_string_0 "lea_rax,[rbp+DWORD] %" :variable_load_string_1 " " :variable_load_string_2 "mov_rax,[rax] " # function_load function # Receives struct token_list* a in RAX # Returns nothing # Uses RCX to hold A->S :function_load push_rbx # Protect RBX push_rcx # Protect RCX mov_rax,[rax+BYTE] !16 # A->S mov_rcx,rax # Protect A->S mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %open_paren # Using "(" call %match # IF global_token->S == "(" cmp_rax, %0 # The we need to do a function call jne %function_load_regular # Otherwise just load it's address # Deal with function call mov_rax,rcx # Using A->S mov_rbx, %1 # Using FALSE call %function_call # Deal with it jmp %function_load_done # Be done :function_load_regular lea_rax,[rip+DWORD] %function_load_string_0 # Using "lea_rax,[rip+DWORD] %FUNCTION_" call %emit_out # Emit it mov_rax,rcx # Using A->S call %emit_out # Emit it lea_rax,[rip+DWORD] %function_load_string_1 # Using "\n" call %emit_out # Emit it :function_load_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :function_load_string_0 "lea_rax,[rip+DWORD] %FUNCTION_" :function_load_string_1 " " # global_load function # Receives struct token_list* A in RAX # Returns nothing # Uses RBX to hold A->S :global_load push_rbx # Protect RBX mov_rbx,rax # Set as A mov_rbx,[rbx+BYTE] !16 # Set as A->S mov_rax,[rax+BYTE] !24 # A->TYPE mov_[rip+DWORD],rax %current_target # current_target = A->TYPE lea_rax,[rip+DWORD] %global_load_string_0 # Using "lea_rax,[rip+DWORD] %GLOBAL_" call %emit_out # Emit it mov_rax,rbx # Using A->S call %emit_out # Emit it lea_rax,[rip+DWORD] %global_load_string_1 # Using "\n" call %emit_out # Emit it mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %equal # "=" call %match # IF global_token->S == "=" cmp_rax, %0 # We need to skip for assignment je %global_load_done # and be done # Otherwise we are loading the contents lea_rax,[rip+DWORD] %global_load_string_2 # Using "mov_rax,[rax]\n" call %emit_out # Emit it :global_load_done pop_rbx # Restore RBX ret :global_load_string_0 "lea_rax,[rip+DWORD] %GLOBAL_" :global_load_string_1 " " :global_load_string_2 "mov_rax,[rax] " # sym_lookup function # Receives char* S in RAX and struct token_list* symbol_list in RBX # Uses I->S in RAX, S in RBX and I in RCX # Returns match or NULL :sym_lookup push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rbx # I = symbol_list mov_rbx,rax # Put S in the right place :sym_lookup_iter cmp_rcx, %0 # IF NULL == I je %sym_lookup_done # We failed to find match mov_rax,[rcx+BYTE] !16 # Using I->S call %match # IF I->S == S cmp_rax, %0 # then be done je %sym_lookup_done # Failed mov_rcx,[rcx] # I = I->NEXT jmp %sym_lookup_iter # otherwise keep looping :sym_lookup_done mov_rax,rcx # Return I pop_rcx # Restore RCX pop_rbx # Restore RBX ret # primary_expr_number function # Receives nothing # Returns nothing # Simply uses current global token to update output and then steps to next global_token :primary_expr_number lea_rax,[rip+DWORD] %primary_expr_number_string_0 # Using "mov_rax, %" call %emit_out # Emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S call %emit_out # Emit it lea_rax,[rip+DWORD] %primary_expr_number_string_1 # Using "\n" call %emit_out # Emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT ret :primary_expr_number_string_0 "mov_rax, %" :primary_expr_number_string_1 " " # primary_expr_string function # receives nothing # Returns nothing # creates entries for string and calls to generate string output # uses RCX for char* number_string :primary_expr_string push_rbx # Protect RBX push_rcx # Protect RCX mov_rbx,[rip+DWORD] %current_count # Using current_count mov_rax,rbx # And putting it in the right place call %numerate_number # Get the string mov_rcx,rax # protect number_string add_rbx, %1 # current_count + 1 mov_[rip+DWORD],rbx %current_count # current_count = current_count + 1 lea_rax,[rip+DWORD] %primary_expr_string_string_0 # Using "lea_rax,[rip+DWORD] %STRING_" call %emit_out # Emit it mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S mov_rbx,rcx # Put number_string in the right place call %uniqueID_out # Make it unique # Generate the target lea_rax,[rip+DWORD] %primary_expr_string_string_1 # Using ":STRING_" mov_rbx,[rip+DWORD] %strings_list # Using strings_list call %emit # Emit it mov_rbx,rax # put new strings_list in place mov_rax,[rip+DWORD] %function # Using function mov_rax,[rax+BYTE] !16 # function->S call %uniqueID # Make it unique mov_rbx,rax # put new strings_list in place # Parse the string mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S call %parse_string # convert to useful form call %emit # Emit it mov_[rip+DWORD],rax %strings_list # Update Strings _list mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT pop_rcx # Restore RCX pop_rbx # Restore RBX ret :primary_expr_string_string_0 "lea_rax,[rip+DWORD] %STRING_" :primary_expr_string_string_1 ":STRING_" # primary_expr_char function # Receives nothing # Returns nothing # Updates output_list using global_token :primary_expr_char push_rbx # Protect RBX push_rcx # Protect RCX lea_rax,[rip+DWORD] %primary_expr_char_string_0 # Using "mov_rax, %" call %emit_out # Emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S add_rax, %1 # global_token->S + 1 call %escape_lookup # Get the char call %numerate_number # Convert to string call %emit_out # emit it lea_rax,[rip+DWORD] %primary_expr_char_string_1 # Using "\n" call %emit_out # Emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT pop_rcx # Restore RCX pop_rbx # Restore RBX ret :primary_expr_char_string_0 "mov_rax, %" :primary_expr_char_string_1 " " # primary_expr_failure function # Receives nothing # Does not return but aborts hard # Complains about the bad input :primary_expr_failure # call %line_error # Get line of issue mov_r14, %2 # write to standard error lea_rax,[rip+DWORD] %primary_expr_failure_string_0 # Using "Received " call %File_Print # Print it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S call %File_Print # Print it lea_rax,[rip+DWORD] %primary_expr_failure_string_1 # Using " in primary_expr\n" call %File_Print # Print it jmp %Exit_Failure # Abort Hard :primary_expr_failure_string_0 "Received " :primary_expr_failure_string_1 " in primary_expr " # general_recursion function # Receives FUNCTION F in RAX, char* S in RBX, char* name in RCX and FUNCTION iterate in RDX # Returns nothing # Uses RCX for char* S, RDX for FUNCTION iterate and RBP for FUNCTION F # But generally recurses a shitload :general_recursion push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX push_rbp # Protect RBP mov_rbp,rax # Protect F mov_rax,rcx # Put name in the right place mov_rcx,rbx # Protect S mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S call %match # IF match(name, global_token->s) cmp_rax, %0 # If true we do jne %general_recursion_done # Otherwise skip it # Deal with the recursion mov_rax,rbp # Put F in the right place call %common_recursion # Recurse mov_rax,rcx # Put S in the right place call %emit_out # Emit it mov_rax,rdx # Put iterate in the right place call_rax # Down the rabbit hole :general_recursion_done pop_rbp # Restore RBP pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret # promote_type function # Receives struct type* a in RAX and struct type* b in RBX # Returns the most recent type in RAX # Uses RAX for struct type* I, RCX for struct type* A and RDX for struct type* B :promote_type push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX cmp_rbx, %0 # IF NULL == B je %promote_type_done # Just return A mov_rcx,rax # Put A in place mov_rdx,rbx # Put B in place mov_rax,rbx # IF NULL == A cmp_rcx, %0 # Then we just return B je %promote_type_done # Be done # Looks like we need to walk the list mov_rcx,[rcx+BYTE] !48 # A->NAME mov_rdx,[rdx+BYTE] !48 # B->NAME mov_rax,[rip+DWORD] %global_types # I = global_types :promote_type_iter cmp_rax, %0 # IF NULL == I je %promote_type_done # Just be done mov_rbx,[rax+BYTE] !48 # I->NAME cmp_rbx,rcx # IF(A->NAME == I->NAME) je %promote_type_done # Be done cmp_rbx,rdx # IF(B->NAME == I->NAME) je %promote_type_done # Be done mov_rbx,[rax+BYTE] !24 # I->INDIRECT mov_rbx,[rbx+BYTE] !48 # I->INDIRECT->NAME cmp_rbx,rcx # IF(A->NAME == I->INDIRECT->NAME) je %promote_type_done # Be done cmp_rbx,rdx # IF(B->NAME == I->INDIRECT->NAME) je %promote_type_done # Be done mov_rax,[rax] # I = I->NEXT jmp %promote_type_iter # Keep going :promote_type_done pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret # common_recursion function # Receives FUNCTION F in RAX # Returns Nothing # Walks global_token list and update output_list # Updates current_target # Uses RBX to hold FUNCTION F and struct type* last_type :common_recursion push_rbx # Protect RBX mov_rbx,rax # Put FUNCTION F safely out of the way lea_rax,[rip+DWORD] %common_recursion_string_0 # Using "push_rax\t#_common_recursion\n" call %emit_out # Emit it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rax,rbx # Prepare for function call mov_rbx,[rip+DWORD] %current_target # Get last type call_rax # F(); mov_rax,[rip+DWORD] %current_target # Get current_target call %promote_type # get the right type mov_[rip+DWORD],rax %current_target # Set new current_target lea_rax,[rip+DWORD] %common_recursion_string_1 # Using "pop_rbx\t# _common_recursion\n" call %emit_out # Emit it pop_rbx # Restore RBX ret :common_recursion_string_0 "push_rax #_common_recursion " :common_recursion_string_1 "pop_rbx # _common_recursion " # require_match function # Receives char* message in RAX and char* required in RBX # Returns nothing # Uses RCX to hold message and updates global_token :require_match push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rax # put the message somewhere safe mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S call %match # IF required == global_token->S cmp_rax, %0 # we are fine je %require_match_good # otherwise pain # Deal with bad times # call %line_error # Tell user what went wrong mov_r14, %2 # write to standard error mov_rax,rcx # using our message call %File_Print # Print it jmp %Exit_Failure # Abort HARD :require_match_good mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->next mov_[rip+DWORD],rax %global_token # global_token = global_token->next pop_rcx # Restore RCX pop_rbx # Restore RBX ret # uniqueID Function # Receives char *S in RAX, struct token_list* l in RBX and char* num in RCX # Returns updated struct token_list* L in RAX :uniqueID push_rbx # Protect RBX push_rcx # Protect RCX call %emit # emit(s, l) mov_rbx,rax # Put L in correct place lea_rax,[rip+DWORD] %underline # Using "_" call %emit # emit("_", l) mov_rbx,rax # Put L in correct place mov_rax,rcx # Put num in correct place call %emit # emit(num, l) mov_rbx,rax # Put L in correct place lea_rax,[rip+DWORD] %uniqueID_string_0 # Using "\n" call %emit # emit("\n", l) pop_rcx # Restore RCX pop_rbx # Restore RBX ret :uniqueID_string_0 " " # uniqueID_out function # Receives char* S in RAX and char* num in RBX # Returns nothing :uniqueID_out push_rax # Protect RAX push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rbx # Put num in right spot mov_rbx,[rip+DWORD] %output_list # Using output_list call %uniqueID # Get updated list mov_[rip+DWORD],rax %output_list # output_list = uniqueID(s, output_list, num) pop_rcx # Restore RCX pop_rbx # Restore RBX pop_rax # Restore RAX ret # emit_out function # Receives char* S in RAX # Returns nothing # Updates output_list # MUST NOT ALTER REGISTERS :emit_out push_rax # Protect RAX push_rbx # Protect RBX mov_rbx,[rip+DWORD] %output_list # Using output_list call %emit # emit it mov_[rip+DWORD],rax %output_list # update it pop_rbx # Restore RBX pop_rax # Restore RAX ret # emit function # Receives char *s in RAX and struct token_list* head in RBX # Returns struct token_list* T in RAX :emit push_rcx # Protect RCX mov_rcx,rax # put S out of the way mov_rax, %40 # sizeof(struct token_list) call %malloc # get T mov_[rax],rbx # t->next = head; mov_[rax+BYTE],rcx !16 # t->s = s; pop_rcx # Restore RCX ret # escape_lookup function # Receives char* c in RAX # Returns integer value of char in RAX # Aborts hard if unknown escape is received # Uses RCX to hold char* C :escape_lookup push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rax # Put char* C in safe place mov_al,[rcx] # Load c[0] movzx_rax,al # make it useful cmp_rax, %92 # If '\\' != c[0] jne %escape_lookup_done # Be done mov_rbx,rcx # Prepare for walk add_rbx, %1 # increment mov_bl,[rbx] # load c[1] movzx_rbx,bl # make it useful cmp_rbx, %120 # Check if \x?? je %escape_lookup_hex # Deal with hex # Deal with \? escapes mov_rax, %10 # Guess "\n" cmp_rbx, %110 # If n je %escape_lookup_done # Be done mov_rax, %9 # Guess "\t" cmp_rbx, %116 # If t je %escape_lookup_done # Be done mov_rax,rbx # "\\", "'" and '"' all encode as themselves cmp_rbx, %92 # If "\\" je %escape_lookup_done # Be done cmp_rbx, %39 # IF "'" je %escape_lookup_done # Be done cmp_rbx, %34 # IF '"' je %escape_lookup_done # Be done mov_rax, %13 # Guess "\r" cmp_rbx, %114 # IF r je %escape_lookup_done # Be done # Looks like we have no clue what we are doing # Aborting hard mov_r14, %2 # write to standard error lea_rax,[rip+DWORD] %escape_lookup_string_0 # Using "Unknown escape received: " call %File_Print # Print it mov_rax,rcx # Using C call %File_Print # Print it lea_rax,[rip+DWORD] %escape_lookup_string_1 # Using " Unable to process\n" call %File_Print # Print it jmp %Exit_Failure # Abort Hard :escape_lookup_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :escape_lookup_hex # Give up on C and just assume they know what they are doing add_rcx, %2 # increment mov_al,[rcx] # c[2] movzx_rax,al # make it useful add_rcx, %1 # increment call %char2hex # Get the hex value sal_rax, !4 # c << 4 mov_bl,[rcx] # c[3] movzx_rbx,bl # make it useful xchg_rax,rbx # protect c << 4 call %char2hex # Get the hex value add_rax,rbx # hex(c[2]) << 4 + hex(c[3]) jmp %escape_lookup_done # Be done :escape_lookup_string_0 "Unknown escape received: " :escape_lookup_string_1 " Unable to process " # char2hex function # Receives char in RAX # Returns hex or aborts hard :char2hex sub_rax, %48 # Try 0-9 cmp_rax, %10 # Otherwise fun times jl %char2hex_done # Be done # Deal with A-F and_rax, %0xDF # Unset High bit turning a-f into A-F sub_rax, %7 # Shift down into position cmp_rax, %10 # Everything below A is bad jl %char2hex_fail # And should fail cmp_rax, %16 # Make sure we are below F jl %char2hex_done # If so be done :char2hex_fail # Time to fail hard mov_r15, %2 # write to standard error lea_rax,[rip+DWORD] %char2hex_string_0 # Using "Tried to print non-hex number\n" call %File_Print # Print it jmp %Exit_Failure # Abort Hard :char2hex_done ret :char2hex_string_0 "Tried to print non-hex number " # parse_string function # Receives char* string in RAX # Returns cleaned up string # Protects char* string in RBX :parse_string push_rbx # Protect RBX mov_rbx,rax # Protect char* string call %weird # Determine if we have a weird string cmp_rax, %0 # If weird je %parse_string_weird # Deal with it # Dealing with regular string mov_rax,rbx # Passing Char* string call %collect_regular_string # Collect it jmp %parse_string_done # Be done :parse_string_weird mov_rax,rbx # Passing Char* string call %collect_weird_string # Collect it :parse_string_done pop_rbx # Restore RBX ret # weird function # Receives char* string in RAX # Returns true(0) or false(1) in RAX # Uses RCX to hold char* string :weird push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rax # Place string in safe place add_rcx, %1 # increment past the " :weird_reset mov_al,[rcx] # Load a char movzx_rax,al # Make it useful cmp_rax, %0 # IF NULL == C je %weird_false # Nothing weird found cmp_rax, %92 # IF '\\' jne %weird_escaped # Deal with escaping # Deal with escape mov_rax,rcx # We are passing the string call %escape_lookup # to look it up add_rcx, %1 # string = string + 1 mov_bl,[rcx] # get string[1] movzx_rbx,bl # make it useful cmp_rbx, %120 # IF 'x' == string[1] jne %weird_escaped # otherwise skip the gap add_rcx, %2 # string = string + 2 :weird_escaped push_rax # Protect C in case we need it lea_rbx,[rip+DWORD] %weird_string_0 # Use "\t\n !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" call %In_Set # To find if weird cmp_rax, %1 # IF TRUE pop_rax # Restore C jne %weird_true # Then not weird add_rcx, %1 # string = string + 1 # Last chance for weird lea_rbx,[rip+DWORD] %weird_string_1 # Use "\t\n\r " call %In_Set # Check for special case cmp_rax, %1 # IF TRUE jne %weird_reset # Otherwise not in the special case # Deal with possible special case mov_al,[rcx] # Load string[1] movzx_rax,al # Make it useful cmp_rax, %58 # IF string[1] == ":" je %weird_true # Then we hit the special case jmp %weird_reset # Keep trying :weird_done pop_rcx # Restore RCX pop_rbx # Restore RBX ret :weird_true mov_rax, %0 # Return true jmp %weird_done # Be done :weird_false mov_rax, %1 # Return false jmp %weird_done # Be done :weird_string_0 " !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" :weird_string_1 '09 0a 0d 20 00' # collect_regular_string function # Receives char* string in RAX # Malloc and creates new string to return in RAX # Uses RCX for return string and RDX for passed string :collect_regular_string push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX mov_rdx,rax # Protect our passed string mov_rax, %256 # We need 256 bytes of storage call %malloc # Get our new pointer mov_rcx,rax # put it in place push_rax # protect until done :collect_regular_string_reset mov_al,[rdx] # string[0] movzx_rax,al # Make it useful cmp_rax, %0 # See if we hit the end je %collect_regular_string_done # And be done cmp_rax, %92 # IF string[0] == '\\' je %collect_regular_string_escaped # Deal with that mess # deal with boring char mov_[rcx],al # hold_string[index] = string[0] add_rcx, %1 # Increment it add_rdx, %1 # Increment it jmp %collect_regular_string_reset # And keep going :collect_regular_string_escaped mov_rax,rdx # Using string call %escape_lookup # Get the char mov_[rcx],al # hold_string[index] = escape_lookup(string) add_rdx, %1 # Increment it add_rcx, %1 # Increment it mov_al,[rdx] # string[0] movzx_rax,al # Make it useful add_rdx, %1 # Increment it cmp_rax, %120 # IF 'x' == string[1] jne %collect_regular_string_reset # Otherwise keep going add_rdx, %2 # Increment it jmp %collect_regular_string_reset # Keep going :collect_regular_string_done mov_rax, %34 # Using '"' mov_[rcx],al # hold_string[index] = '"' add_rcx, %1 # Increment it mov_rax, %10 # Using "\n" mov_[rcx],al # hold_string[index] = '\n' pop_rax # Return our new string pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret # collect_weird_string function # Receives char* string in RAX # Mallocs and returns char* hold in RAX # Uses RCX for char* hold and RDX for char* string :collect_weird_string push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX mov_rdx,rax # Protect our passed string mov_rax, %512 # We need 512 bytes of storage call %malloc # Get our new pointer mov_rcx,rax # put it in place push_rax # protect until done mov_rax, %39 # Using "'" mov_[rcx],al # hold_string[index] = "'" add_rcx, %1 # Increment it add_rdx, %1 # Increment it :collect_weird_string_reset mov_al,[rdx] # Read a byte movzx_rax,al # Make it useful cmp_rax, %0 # IF NULL == string[0] je %collect_weird_string_done # Be done mov_rax, %32 # Using ' ' mov_[rcx],al # hold_string[index] = ' ' add_rcx, %1 # Increment it mov_rax,rdx # Using string call %escape_lookup # Get the char call %hex8 # Update RCX mov_al,[rdx] # Read a byte movzx_rax,al # Make it useful add_rdx, %1 # Increment it cmp_rax, %92 # IF string[0] == '\\' jne %collect_weird_string_reset # Otherwise keep going mov_al,[rdx] # Read a byte movzx_rax,al # Make it useful add_rdx, %1 # Increment it cmp_rax, %120 # IF 'x' == string[1] jne %collect_weird_string_reset # Otherwise keep going add_rdx, %2 # Increment it jmp %collect_weird_string_reset # Keep going :collect_weird_string_done mov_rax, %32 # Using ' ' mov_[rcx],al # hold_string[index] = ' ' add_rcx, %1 # Increment it mov_rax, %48 # Using '0' mov_[rcx],al # hold_string[index] = '0' add_rcx, %1 # Increment it mov_[rcx],al # hold_string[index] = '0' add_rcx, %1 # Increment it mov_rax, %39 # Using "'" mov_[rcx],al # hold_string[index] = "'" add_rcx, %1 # Increment it mov_rax, %10 # Using "\n" mov_[rcx],al # hold_string[index] = '\n' pop_rax # Return our new string pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret # HEX to ascii routine # Receives INT in RAX and CHAR* in RCX # Stores ascii of INT in CHAR* # Returns only modifying RAX and RCX :hex8 push_rax # Protect bottom nibble shr_rax, !4 # do high nibble first call %hex4 # Store it pop_rax # do low nibble :hex4 and_rax, %0xF # isolate nibble add_al, !0x30 # convert to ascii cmp_al, !0x39 # valid digit? jbe %hex1 # yes add_al, !7 # use alpha range :hex1 mov_[rcx],al # store result add_rcx, %1 # next position ret # type_name function # Receives nothing # Returns type_size in RAX # Uses RCX for STRUCT TYPE* RET :type_name push_rbx # Protect RBX push_rcx # Protect RCX mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %struct # Using "struct" call %match # IF global_token->S == "struct" mov_rcx,rax # Protect structure cmp_rax, %0 # need to skip over "struct" jne %type_name_native # otherwise keep going # Deal with possible STRUCTs mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx] # global_token->next mov_[rip+DWORD],rbx %global_token # global_token = global_token->next mov_rax,[rbx+BYTE] !16 # global_token->S mov_rbx,[rip+DWORD] %global_types # get all known types call %lookup_type # Find type if possible mov_rcx,rax # Set ret cmp_rax, %0 # IF NULL == ret jne %type_name_common # We have to create struct # Create a struct call %create_struct # Create a new struct mov_rcx, %0 # We wish to return NULL jmp %type_name_done # be done :type_name_native # Deal only with native types mov_rax,rbx # Put global_token->S in the right place mov_rbx,[rip+DWORD] %global_types # get all known types call %lookup_type # Find the type if possible mov_rcx,rax # Set ret cmp_rax, %0 # IF NULL == ret jne %type_name_common # We need to abort hard # Aborting hard mov_r14, %2 # write to standard error lea_rax,[rip+DWORD] %type_name_string_0 # Print header call %File_Print # Print it mov_rax,[rip+DWORD] %global_token # Using global token mov_rax,[rax+BYTE] !16 # global_token->S call %File_Print # Print it lea_rax,[rip+DWORD] %type_name_string_1 # Print footer call %File_Print # Print it # call %line_error # Give details jmp %Exit_Failure # Abort :type_name_common mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx] # global_token->next mov_[rip+DWORD],rbx %global_token # global_token = global_token->next :type_name_iter mov_rax,[rbx+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # make it useful cmp_rax, %42 # IF global_token->S[0] == '*' jne %type_name_done # recurse # Deal with char** mov_rcx,[rcx+BYTE] !24 # ret = ret->indirect mov_rbx,[rip+DWORD] %global_token # Using global_token mov_rbx,[rbx] # global_token->next mov_[rip+DWORD],rbx %global_token # global_token = global_token->next jmp %type_name_iter # keep looping :type_name_done mov_rax,rcx # put ret in the right place pop_rcx # Restore RCX pop_rbx # Restore RBX ret :type_name_string_0 "Unknown type " :type_name_string_1 " " # lookup_type function # Receives char* s in RAX and struct type* start in RBX # Returns struct type* in RAX # Uses RBX for S and RCX for I :lookup_type push_rbx # Protect RBX push_rcx # Protect RCX mov_rcx,rbx # I = Start mov_rbx,rax # Put S in place :lookup_type_iter cmp_rcx, %0 # Check if I == NULL je %lookup_type_done # return NULL mov_rax,[rcx+BYTE] !48 # I->NAME call %match # Check if matching cmp_rax, %0 # IF I->NAME == S je %lookup_type_done # return it mov_rcx,[rcx] # Otherwise I = I->NEXT jmp %lookup_type_iter # And keep looping :lookup_type_done mov_rax,rcx # return either I or NULL pop_rcx # Restore RCX pop_rbx # Restore RBX ret # create_struct function # Receives nothing # Returns nothing # Uses global_token to malloc a struct's definition # Uses RCX for int OFFSET, RDX for struct type* head, RBP for struct type* I, # RDI for member_size (Which is passed) and RSI for LAST # RAX and RBX are used for scratch :create_struct push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX push_rbp # Protect RBP push_rdi # Protect RDI push_rsi # Protect RSI mov_rcx, %0 # OFFSET = 0 mov_rdi, %0 # member_size = 0 mov_rax, %56 # sizeof(struct type) call %malloc # malloc(sizeof(struct type)) mov_rdx,rax # Set HEAD mov_rax, %56 # sizeof(struct type) call %malloc # malloc(sizeof(struct type)) mov_rbp,rax # Set I mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_[rdx+BYTE],rax !48 # HEAD->NAME = global_token->S mov_[rbp+BYTE],rax !48 # I->NAME = global_token->S mov_[rdx+BYTE],rbp !24 # HEAD->INDIRECT = I mov_[rbp+BYTE],rdx !24 # I->INDIRECT = HEAD mov_rax,[rip+DWORD] %global_types # Using global_types mov_[rdx],rax # HEAD->NEXT = global_types mov_[rip+DWORD],rdx %global_types # global_types = HEAD mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rax, %8 # Using register size mov_[rbp+BYTE],rax !8 # I->SIZE = register size lea_rax,[rip+DWORD] %create_struct_string_0 # Using "ERROR in create_struct\n Missing {\n" lea_rbx,[rip+DWORD] %open_curly_brace # Using "{" call %require_match # Make sure we have it mov_rsi, %0 # LAST = NULL :create_struct_iter mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # Make it useful cmp_rax, %125 # IF global_token->S[0] == "}" je %create_struct_done # be done # Looks like we are adding members # Lets see if it is a union mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S lea_rbx,[rip+DWORD] %union # Using "union" call %match # IF match(global_token->s, "union") cmp_rax, %0 # Deal with union jne %create_struct_single # Otherwise deal with singles # Deal with union mov_rax,rsi # Put last in right place mov_rbx,rcx # put offset in right place call %build_union # ASSEMBLE mov_rsi,rax # last = build_union(last, offset) add_rcx,rdi # offset = offset + member_size lea_rax,[rip+DWORD] %create_struct_string_1 # Using "ERROR in create_struct\n Missing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it jmp %create_struct_iter # keep going :create_struct_single # deal with singles mov_rax,rsi # Put last in right place mov_rbx,rcx # put offset in right place call %build_member # ASSEMBLE mov_rsi,rax # last = build_union(last, offset) add_rcx,rdi # offset = offset + member_size lea_rax,[rip+DWORD] %create_struct_string_1 # Using "ERROR in create_struct\n Missing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it jmp %create_struct_iter # keep going :create_struct_done mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %create_struct_string_1 # Using "ERROR in create_struct\n Missing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it mov_[rdx+BYTE],rcx !8 # HEAD->SIZE = OFFSET mov_[rdx+BYTE],rsi !32 # HEAD->MEMBERS = LAST mov_[rbp+BYTE],rsi !32 # I->MEMBERS = LAST pop_rsi # Restore RSI pop_rdi # Restore RDI pop_rbp # Restore RBP pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :create_struct_string_0 "ERROR in create_struct Missing { " :create_struct_string_1 "ERROR in create_struct Missing ; " # lookup_member function # Receives struct type* parent in RAX and char* name in RBX # Returns struct type* I in RAX # Uses char* NAME in RBX, RCX for struct type* I and RDX to hold parent for errors # Aborts hard if not found :lookup_member push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX mov_rdx,rax # Protect Parent mov_rcx,[rax+BYTE] !32 # struct type* I = parent->MEMBERS :lookup_member_iter cmp_rcx, %0 # IF I == NULL je %lookup_member_fail # Abort HARD mov_rax,[rcx+BYTE] !48 # Using I->NAME call %match # IF I->NAME == NAME cmp_rax, %0 # Then we have found the member mov_rax,rcx # Prepare for return mov_rcx,[rcx+BYTE] !32 # Prepare for loop I = I->MEMBERS jne %lookup_member_iter # Looks like we are looping # I is already in RAX pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :lookup_member_fail mov_r14, %2 # write to standard error lea_rax,[rip+DWORD] %lookup_member_string_0 # Using "ERROR in lookup_member " call %File_Print # print it mov_rax,[rdx+BYTE] !48 # PARENT->NAME call %File_Print # print it lea_rax,[rip+DWORD] %arrow_string # Using "->" call %File_Print # print it mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S call %File_Print # print it lea_rax,[rip+DWORD] %lookup_member_string_1 # Using " does not exist\n" call %File_Print # print it # call %line_error # Write useful debug info lea_rax,[rip+DWORD] %lookup_member_string_2 # Using "\n" call %File_Print # print it jmp %Exit_Failure # Abort Hard :lookup_member_string_0 "ERROR in lookup_member " :lookup_member_string_1 " does not exist " :lookup_member_string_2 " " # build_member function # Receives struct type* last in RAX, int offset in RBX and global member_size in RDI # Updates member_size in RDI and returns struct type* I in RAX # Uses RCX for struct type* member_type and RDX for struct type* I :build_member push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX mov_rdx,rax # Put last out of the way mov_rax, %56 # Allocate type call %malloc # Get I mov_[rax+BYTE],rdx !32 # I->MEMBERS = LAST mov_[rax+BYTE],rbx !16 # I->OFFSET = OFFSET mov_rdx,rax # Put I in place call %type_name # Get member_type mov_rcx,rax # Put in place mov_[rdx+BYTE],rcx !40 # I->TYPE = MEMBER_TYPE mov_rax,[rip+DWORD] %global_token # Using global_token mov_rbx,[rax+BYTE] !16 # global_token->S mov_[rdx+BYTE],rbx !48 # I->NAME = global_token->S mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT # Check if we have an array mov_rbx,[rax+BYTE] !16 # global_token->S lea_rax,[rip+DWORD] %open_bracket # Using "[" call %match # IF global_token->S == "[" cmp_rax, %0 # Then we have to deal with arrays in our structs je %build_member_array # So deal with that pain # Deal with non-array case mov_rax,[rcx+BYTE] !8 # member_type->SIZE mov_[rdx+BYTE],rax !8 # I->SIZE = member_type->SIZE jmp %build_member_done # Be done :build_member_array mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rax,[rax+BYTE] !16 # global_token->S call %numerate_string # convert number mov_rbx,[rcx+BYTE] !40 # member_type->TYPE mov_rbx,[rbx+BYTE] !8 # member_type->TYPE->SIZE imul_rax,rbx # member_type->type->size * numerate_string(global_token->s) mov_[rdx+BYTE],rax !8 # I->SIZE = member_type->type->size * numerate_string(global_token->s) mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %build_member_string_0 # Using "Struct only supports [num] form\n" lea_rbx,[rip+DWORD] %close_bracket # Using "]" call %require_match # Make sure we have it :build_member_done mov_rdi,[rdx+BYTE] !8 # MEMBER_SIZE = I->SIZE mov_[rdx+BYTE],rcx !40 # I->TYPE = MEMBER_TYPE mov_rax,rdx # Return I pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :build_member_string_0 "Struct only supports [num] form " # build_union function # Receives struct type* last in RAX, int offset in RBX and global member_size in RDI # Updates member_size in RDI and returns struct type* LAST in RAX # Uses RCX for struct type* last, RDX for int offset, RSI for int size and RDI for int member_size :build_union push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX push_rsi # Protect RSI mov_rcx,rax # Put LAST in right spot mov_rdx,rbx # Put OFFSET in right spot mov_rsi, %0 # SIZE = 0 mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT lea_rax,[rip+DWORD] %build_union_string_0 # Using "ERROR in build_union\nMissing {\n" lea_rbx,[rip+DWORD] %open_curly_brace # Using "{" call %require_match # Make sure we have it :build_union_iter mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax+BYTE] !16 # global_token->S mov_al,[rax] # global_token->S[0] movzx_rax,al # make it useful cmp_rax, %125 # IF global_token->S[0] == "}" je %build_union_done # Be done # Collect union member mov_rax,rcx # Passing LAST mov_rbx,rdx # Passing offset call %build_member # build_member(last, offset) mov_rcx,rax # last = build_member(last, offset) cmp_rsi,rdi # IF member_size > size jg %build_union_size # Then update size # deal with member_size > size mov_rsi,rdi # SIZE = MEMBER_SIZE :build_union_size lea_rax,[rip+DWORD] %build_union_string_1 # Using "ERROR in build_union\nMissing ;\n" lea_rbx,[rip+DWORD] %semicolon # Using ";" call %require_match # Make sure we have it jmp %build_union_iter # Keep going :build_union_done mov_rdi,rsi # MEMBER_SIZE = SIZE mov_rax,[rip+DWORD] %global_token # Using global_token mov_rax,[rax] # global_token->NEXT mov_[rip+DWORD],rax %global_token # global_token = global_token->NEXT mov_rax,rcx # Return last pop_rsi # Restore RSI pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :build_union_string_0 "ERROR in build_union Missing { " :build_union_string_1 "ERROR in build_union Missing ; " # sym_declare function # Receives char *s in RAX, struct type* t in RBX, and struct token_list* list in RCX # Returns struct token_list* in RAX # Uses RAX for A :sym_declare push_rdx # Protect RDX mov_rdx,rax # Get char *S safely out of the way mov_rax, %40 # Using sizeof(struct token_list) call %malloc # Get pointer to A mov_[rax],rcx # A->NEXT = LIST mov_[rax+BYTE],rdx !16 # A->S = S mov_[rax+BYTE],rbx !24 # A->TYPE = T pop_rdx # Restore RDX ret # match function # Receives CHAR* in RAX and CHAR* in RBX # Returns 0 (TRUE) or 1 (FALSE) in RAX :match push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX mov_rcx,rax # S1 in place mov_rdx,rbx # S2 in place :match_Loop mov_al,[rcx] # S1[0] movzx_rax,al # Make it useful mov_bl,[rdx] # S2[0] movzx_rbx,bl # Make it useful cmp_rax,rbx # See if they match jne %match_False # If not add_rcx, %1 # S1 = S1 + 1 add_rdx, %1 # S2 = S2 + 1 cmp_rax, %0 # If reached end of string je %match_Done # Perfect match jmp %match_Loop # Otherwise keep looping :match_False mov_rax, %1 # Return false :match_Done pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret # numerate_number function # Receives an INT A in RAX # Returns char* result in RAX # Allocates 16 bytes of memory # Behaves badly when given a negative number too large # Uses RAX for temp, RBX for DIVISOR, RDX for mod/0, RSI for result[i] and RBP for A :numerate_number push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX push_rsi # Protect RSI push_rbp # Protect RBP mov_rbp,rax # Protect A mov_rax, %16 # 16 bytes call %malloc # Get our pointer push_rax # Protect our pointer mov_rsi,rax # put pointer in right place mov_rbx, %1000000000 # Set divisor to largest positive number that fits in 32bits cmp_rbp, %0 # Deal with 0 case je %numerate_number_ZERO # If it is jg %numerate_number_positive # If it is positive # Deal with negative case mov_rax, %45 # Using "-" mov_[rsi],al # Write it add_rsi, %1 # increment imul_rbp, %-1 # A = A * -1 :numerate_number_positive mov_rdx, %0 # Set top to 0 mov_rax,rbp # Using A as bottom idiv_rbx # rdx:rax % rbx -> rdx + rdx:rax / rbx -> rax [Even if we don't want it] cmp_rax, %0 # IF 0 == (a / divisor) jne %numerate_number_iter # Clean up those leading Zeros mov_rdx, %0 # Set top to 0 mov_rax,rbx # Using Divisor for bottom mov_rbx, %10 # Make this shit work because idiv 10 doesn't work idiv_rbx # rdx:rax % 10 -> rdx + rdx:rax / 10 -> rax [Even if we don't want it] mov_rbx,rax # Update divisor jmp %numerate_number_positive # Keep collecting :numerate_number_iter cmp_rbx, %0 # IF DIVISOR < 0 jle %numerate_number_done # Be done mov_rdx, %0 # Set top to 0 mov_rax,rbp # Using A as bottom idiv_rbx # rdx:rax % rbx -> rdx + rdx:rax / rbx -> rax [Even if we don't want it] add_rax, %48 # ((a / divisor) + 48) mov_[rsi],al # Write it mov_rbp,rdx # a = a % divisor mov_rdx, %0 # Set top to 0 mov_rax,rbx # Using Divisor for bottom mov_rbx, %10 # Make this shit work because idiv 10 doesn't work idiv_rbx # rdx:rax % 10 -> rdx + rdx:rax / 10 -> rax [Even if we don't want it] mov_rbx,rax # Update divisor add_rsi, %1 # increment jmp %numerate_number_iter # Keep going :numerate_number_done pop_rax # Restore our result pop_rbp # Restore RBP pop_rsi # Restore RSI pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret :numerate_number_ZERO mov_rax, %48 # Using '0' mov_[rsi],al # Write it add_rsi, %1 # increment jmp %numerate_number_done # Be done # numerate_string function # Receives CHAR* in RAX # Returns value of CHAR* in RAX # Uses RAX for VALUE, RBX for S, RCX for CH and RSI for NEGATIVE? :numerate_string push_rbx # Protect RBX push_rcx # Protect RCX push_rdx # Protect RDX push_rsi # Protect RSI mov_rbx,rax # put S in correct place mov_rax, %0 # Initialize to Zero :numerate_string_loop mov_cl,[rbx+BYTE] !1 # S[1] movzx_rcx,cl # make it useful cmp_rcx, %120 # IF 'x' == S[1] je %numerate_hex # Deal with hex input # Assume decimal input mov_rsi, %0 # Assume no negation mov_cl,[rbx] # S[0] movzx_rcx,cl # make it useful cmp_rcx, %45 # IF '-' == S[0] jne %numerate_decimal # Skip negation mov_rsi, %1 # Set FLAG add_rbx, %1 # S = S + 1 :numerate_decimal mov_cl,[rbx] # S[0] movzx_rcx,cl # make it useful cmp_rcx, %0 # IF NULL == S[0] je %numerate_decimal_done # We are done imul_rax, %10 # VALUE = VALUE * 10 sub_rcx, %48 # CH = CH - '0' cmp_rcx, %9 # Check for illegal jg %numerate_string_fail # If CH > '9' cmp_rcx, %0 # Check for illegal jl %numerate_string_fail # IF CH < 0 add_rax,rcx # VALUE = VALUE + CH add_rbx, %1 # S = S + 1 jmp %numerate_decimal # Keep looping :numerate_decimal_done cmp_rsi, %1 # Check if need to negate jne %numerate_string_done # Nope imul_rax, %-1 # VALUE = VALUE * -1 jmp %numerate_string_done # Done :numerate_hex add_rbx, %2 # S = S + 2 :numerate_hex_loop mov_cl,[rbx] # S[0] movzx_rcx,cl # make it useful cmp_rcx, %0 # IF NULL == S[0] je %numerate_string_done # We are done shl_rax, !4 # VALUE = VALUE << 4 sub_rcx, %48 # CH = CH - '0' cmp_rcx, %10 # IF 10 >= CH jl %numerate_hex_digit # NO sub_rcx, %7 # Push A-F into range :numerate_hex_digit cmp_rcx, %15 # Check for illegal jg %numerate_string_fail # If CH > 'F' cmp_rcx, %0 # Check for illegal jl %numerate_string_fail # IF CH < 0 add_rax,rcx # VALUE = VALUE + CH add_rbx, %1 # S = S + 1 jmp %numerate_hex_loop # Keep looping :numerate_string_fail mov_rax, %0 # return ZERO :numerate_string_done pop_rsi # Restore RSI pop_rdx # Restore RDX pop_rcx # Restore RCX pop_rbx # Restore RBX ret # Exit_Failure function # Receives nothing # And aborts hard # Does NOT return :Exit_Failure mov_rdi, %1 # All is wrong mov_rax, %0x3C # put the exit syscall number in eax syscall # Call it a bad day # debug_list function # Receives struct token_list* in RAX # Prints contents of list and exits # Does NOT return :debug_list mov_r12,rax # Protect the list pointer mov_r14, %2 # write to standard error :debug_list_iter # Header lea_rax,[rip+DWORD] %debug_list_string0 # Using our first string call %File_Print # Print it mov_rax,r12 # Use address of pointer call %numerate_number # Convert it into string call %File_Print # Print it # NEXT lea_rax,[rip+DWORD] %debug_list_string1 # Using our second string call %File_Print # Print it mov_rax,[r12] # Use address of pointer call %numerate_number # Convert it into string call %File_Print # Print it # PREV lea_rax,[rip+DWORD] %debug_list_string2 # Using our third string call %File_Print # Print it mov_rax,[r12+BYTE] !8 # Use address of pointer call %numerate_number # Convert it into string call %File_Print # Print it # S lea_rax,[rip+DWORD] %debug_list_string3 # Using our fourth string call %File_Print # Print it mov_rax,[r12+BYTE] !16 # Use address of pointer call %numerate_number # Convert it into string call %File_Print # Print it # S Contents lea_rax,[rip+DWORD] %debug_list_string4 # Using our fifth string call %File_Print # Print it mov_rax,[r12+BYTE] !16 # Use address of string cmp_rax, %0 # IF NULL Pointer jne %debug_list_null # otherwise display lea_rax,[rip+DWORD] %debug_list_string_null # Give meaningful message instead :debug_list_null call %File_Print # Print it # TYPE lea_rax,[rip+DWORD] %debug_list_string5 # Using our sixth string call %File_Print # Print it mov_rax,[r12+BYTE] !24 # Use address of pointer call %numerate_number # Convert it into string call %File_Print # Print it # ARGS/DEPTH lea_rax,[rip+DWORD] %debug_list_string6 # Using our seventh string call %File_Print # Print it mov_rax,[r12+BYTE] !32 # Use address of pointer call %numerate_number # Convert it into string call %File_Print # Print it mov_rax, %10 # Add "\n" call %fputc # print it call %fputc # print it mov_r12,[r12] # TOKEN = TOKEN->NEXT cmp_r12, %0 # Check if NULL jne %debug_list_iter # iterate otherwise mov_rdi, %666 # All is HELL mov_rax, %0x3C # put the exit syscall number in rax syscall # Call it a bad day :debug_list_string0 "Token_list node at address: " :debug_list_string1 " NEXT address: " :debug_list_string2 " PREV address: " :debug_list_string3 " S address: " :debug_list_string4 " The contents of S are: " :debug_list_string5 " TYPE address: " :debug_list_string6 " ARGUMENTS address: " :debug_list_string_null ">::<NULL>::<" # Keywords :union "union" :struct "struct" :constant "CONSTANT" :main_string "main" :argc_string "argc" :argv_string "argv" :if_string "if" :else_string "else" :do_string "do" :while_string "while" :for_string "for" :asm_string "asm" :goto_string "goto" :return_string "return" :break_string "break" :continue_string "continue" :sizeof_string "sizeof" :plus_string "+" :minus_string "-" :multiply_string "*" :divide_string "/" :modulus_string "%" :left_shift_string "<<" :right_shift_string ">>" :less_than_string "<" :less_than_equal_string "<=" :greater_than_equal_string ">=" :greater_than_string ">" :equal_to_string "==" :not_equal_string "!=" :bitwise_and "&" :logical_and "&&" :bitwise_or "|" :logical_or "||" :bitwise_xor "^" :arrow_string "->" # Frequently Used strings # Generally used by require_match :open_curly_brace "{" :close_curly_brace "}" :open_paren "(" :close_paren ")" :open_bracket "[" :close_bracket "]" :comma "," :semicolon ";" :equal "=" :percent "%" :underline "_" :prim_types :type_void %type_int>type_void %0 # NEXT %8 %0 # SIZE NULL # OFFSET %type_void>type_void %0 # INDIRECT NULL # MEMBERS %type_void>type_void %0 # TYPE %type_void_name>type_void %0 # NAME :type_int %type_char>type_int %0 # NEXT %8 %0 # SIZE NULL # OFFSET %type_int>type_int %0 # INDIRECT NULL # MEMBERS %type_int>type_int %0 # TYPE %type_int_name>type_int %0 # NAME :type_char %type_file>type_char %0 # NEXT %1 %0 # SIZE NULL # OFFSET %type_char_indirect>type_char %0 # INDIRECT NULL # MEMBERS %type_char>type_char %0 # TYPE %type_char_name>type_char %0 # NAME :type_char_indirect %type_file>type_char_indirect %0 # NEXT %8 %0 # SIZE NULL # OFFSET %type_char_double_indirect>type_char_indirect %0 # INDIRECT NULL # MEMBERS %type_char_indirect>type_char_indirect %0 # TYPE %type_char_indirect_name>type_char_indirect %0 # NAME :type_char_double_indirect %type_file>type_char_double_indirect %0 # NEXT %8 %0 # SIZE NULL # OFFSET %type_char_double_indirect>type_char_double_indirect %0 # INDIRECT NULL # MEMBERS %type_char_indirect>type_char_double_indirect %0xFFFFFFFF # TYPE %type_char_double_indirect_name>type_char_double_indirect %0 # NAME :type_file %type_function>type_file %0 # NEXT %8 %0 # SIZE NULL # OFFSET %type_file>type_file %0 # INDIRECT NULL # MEMBERS %type_file>type_file %0 # TYPE %type_file_name>type_file %0 # NAME :type_function %type_unsigned>type_function %0 # NEXT %8 %0 # SIZE NULL # OFFSET %type_function>type_function %0 # INDIRECT NULL # MEMBERS %type_function>type_function %0 # TYPE %type_function_name>type_function %0 # NAME :type_unsigned %type_long>type_unsigned %0 # NEXT %8 %0 # SIZE NULL # OFFSET %type_unsigned>type_unsigned %0 # INDIRECT NULL # MEMBERS %type_unsigned>type_unsigned %0 # TYPE %type_unsigned_name>type_unsigned %0 # NAME :type_long NULL # NEXT %8 %0 # SIZE NULL # OFFSET %type_long>type_long %0 # INDIRECT NULL # MEMBERS %type_long>type_long %0 # TYPE %type_long_name>type_long %0 # NAME :type_void_name "void" :type_int_name "int" :type_char_name "char" :type_char_indirect_name "char*" :type_char_double_indirect_name "char**" :type_file_name "FILE" :type_function_name "FUNCTION" :type_unsigned_name "unsigned" :type_long_name "long" :Address_of NULL :C NULL :Token NULL :break_frame NULL :break_target_func NULL :break_target_head NULL :break_target_num NULL :current_count NULL :current_target NULL :function NULL :global_constant_list NULL :global_function_list NULL :global_symbol_list NULL :global_token NULL :global_types # Needed to zero pad pointer to 64 bits NULL :globals_list NULL :output_list NULL :string_index NULL :strings_list NULL :ELF_end
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ // CONSTANT stdin 0 // CONSTANT stdout 1 // CONSTANT stderr 2 // CONSTANT EOF 0xFFFFFFFF // CONSTANT NULL 0 // CONSTANT EXIT_FAILURE 1 // CONSTANT EXIT_SUCCESS 0 // CONSTANT TRUE 1 // CONSTANT FALSE 0 int fgetc(FILE* f) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %0" "push_rax" "lea_rsi,[rsp+DWORD] %0" "mov_rdx, %1" "syscall" "mov_rbx, %0" "cmp_rbx,rax" "pop_rax" "jne %FUNCTION_fgetc_Done" "mov_rax, %-1" ":FUNCTION_fgetc_Done"); } void fputc(char s, FILE* f) { asm("mov_rax, %1" "lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rdx, %1" "syscall"); } void fputs(char* s, FILE* f) { while(0 != s[0]) { fputc(s[0], f); s = s + 1; } } FILE* open(char* name, int flag, int mode) { asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_rax, %2" "syscall"); } FILE* fopen(char* filename, char* mode) { FILE* f; if('w' == mode[0]) { /* 577 is O_WRONLY|O_CREAT|O_TRUNC, 384 is 600 in octal */ f = open(filename, 577 , 384); } else { /* Everything else is a read */ f = open(filename, 0, 0); } /* Negative numbers are error codes */ if(0 > f) { return 0; } return f; } int close(int fd) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %3" "syscall"); } int fclose(FILE* stream) { int error = close(stream); return error; } int brk(void *addr) { asm("mov_rax,[rsp+DWORD] %8" "push_rax" "mov_rax, %12" "pop_rbx" "mov_rdi,rbx" "syscall"); } long _malloc_ptr; long _brk_ptr; void* malloc(int size) { if(NULL == _brk_ptr) { _brk_ptr = brk(0); _malloc_ptr = _brk_ptr; } if(_brk_ptr < _malloc_ptr + size) { _brk_ptr = brk(_malloc_ptr + size); if(-1 == _brk_ptr) return 0; } long old_malloc = _malloc_ptr; _malloc_ptr = _malloc_ptr + size; return old_malloc; } int strlen(char* str ) { int i = 0; while(0 != str[i]) i = i + 1; return i; } void* memset(void* ptr, int value, int num) { char* s; for(s = ptr; 0 < num; num = num - 1) { s[0] = value; s = s + 1; } } void* calloc(int count, int size) { void* ret = malloc(count * size); if(NULL == ret) return NULL; memset(ret, 0, (count * size)); return ret; } void free(void* l) { return; } void exit(int value) { asm("pop_rbx" "pop_rdi" "mov_rax, %0x3C" "syscall"); }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include <stdlib.h> #include <stdio.h> #include <string.h> // CONSTANT FALSE 0 #define FALSE 0 // CONSTANT TRUE 1 #define TRUE 1 // CONSTANT KNIGHT_NATIVE 1 #define KNIGHT_NATIVE 1 // CONSTANT KNIGHT_POSIX 2 #define KNIGHT_POSIX 2 // CONSTANT X86 3 #define X86 3 // CONSTANT AMD64 4 #define AMD64 4 // CONSTANT ARMV7L 5 #define ARMV7L 5 // CONSTANT AARCH64 6 #define AARCH64 6 // CONSTANT RISCV32 7 #define RISCV32 7 // CONSTANT RISCV64 8 #define RISCV64 8 void copy_string(char* target, char* source, int max); int in_set(int c, char* s); int match(char* a, char* b); void require(int bool, char* error); void reset_hold_string(void); struct type { struct type* next; int size; int offset; int is_signed; struct type* indirect; struct type* members; struct type* type; char* name; }; struct token_list { struct token_list* next; struct token_list* locals; struct token_list* prev; char* s; struct type* type; char* filename; struct token_list* arguments; int depth; int linenumber; }; struct case_list { struct case_list* next; char* value; }; #include "cc_globals.h"
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #define TRUE 1 #define FALSE 0 void require(int bool, char* error) { if(!bool) { fputs(error, stderr); exit(EXIT_FAILURE); } } int match(char* a, char* b) { if((NULL == a) && (NULL == b)) return TRUE; if(NULL == a) return FALSE; if(NULL == b) return FALSE; int i = -1; do { i = i + 1; if(a[i] != b[i]) { return FALSE; } } while((0 != a[i]) && (0 !=b[i])); return TRUE; } int in_set(int c, char* s) { /* NULL set is always false */ if(NULL == s) return FALSE; while(0 != s[0]) { if(c == s[0]) return TRUE; s = s + 1; } return FALSE; } /* INTERNAL ONLY */ int __index_number(char* s, char c) { int i = 0; while(s[i] != c) { i = i + 1; if(0 == s[i]) return -1; } return i; } /* INTERNAL ONLY */ int __toupper(int c) { if(in_set(c, "abcdefghijklmnopqrstuvwxyz")) return (c & 0xDF); return c; } /* INTERNAL ONLY */ int __set_reader(char* set, int mult, char* input) { int n = 0; int i = 0; int hold; int negative_p = FALSE; if(input[0] == '-') { negative_p = TRUE; i = i + 1; } while(in_set(input[i], set)) { if('_' == input[i]) { i = i + 1; continue; } n = n * mult; hold = __index_number(set, __toupper(input[i])); /* Input managed to change between in_set and index_number */ if(-1 == hold) return 0; n = n + hold; i = i + 1; } /* loop exited before NULL and thus invalid input */ if(0 != input[i]) return 0; if(negative_p) { n = 0 - n; } return n; } int strtoint(char *a) { int result = 0; /* If NULL string */ if(0 == a[0]) { result = 0; } /* Deal with binary */ else if ('0' == a[0] && 'b' == a[1]) { result = __set_reader("01_", 2, a+2); } /* Deal with hex */ else if ('0' == a[0] && 'x' == a[1]) { result = __set_reader("0123456789ABCDEFabcdef_", 16, a+2); } /* Deal with octal */ else if('0' == a[0]) { result = __set_reader("01234567_", 8, a+1); } /* Deal with decimal */ else { result = __set_reader("0123456789_", 10, a); } /* Deal with sign extension for 64bit hosts */ if(0 != (0x80000000 & result)) result = (0xFFFFFFFF << 31) | result; return result; } char* int2str(int x, int base, int signed_p) { require(1 < base, "int2str doesn't support a base less than 2\n"); require(37 > base, "int2str doesn't support a base more than 36\n"); /* Be overly conservative and save space for 32binary digits and padding null */ char* p = calloc(34, sizeof(char)); /* if calloc fails return null to let calling code deal with it */ if(NULL == p) return p; p = p + 32; unsigned i; int sign_p = FALSE; char* table = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; if(signed_p && (10 == base) && (0 != (x & 0x80000000))) { /* Truncate to 31bits */ i = -x & 0x7FFFFFFF; if(0 == i) return "-2147483648"; sign_p = TRUE; } /* Truncate to 32bits */ else i = x & (0x7FFFFFFF | (1 << 31)); do { p[0] = table[i % base]; p = p - 1; i = i / base; } while(0 < i); if(sign_p) { p[0] = '-'; p = p - 1; } return p + 1; }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ /* What types we have */ struct type* global_types; struct type* prim_types; /* What we are currently working on */ struct token_list* global_token; /* Output reorder collections*/ struct token_list* output_list; struct token_list* strings_list; struct token_list* globals_list; /* Make our string collection more efficient */ char* hold_string; int string_index; /* Our Target Architecture */ int Architecture; int register_size; int MAX_STRING; struct type* integer; /* enable bootstrap-mode */ int BOOTSTRAP_MODE; /* enable preprocessor-only mode */ int PREPROCESSOR_MODE;
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas <andrius@stikonas.eu> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" int strtoint(char *a); /* Globals */ FILE* input; struct token_list* token; int line; char* file; int grab_byte(void) { int c = fgetc(input); if(10 == c) line = line + 1; return c; } int clearWhiteSpace(int c) { if((32 == c) || (9 == c)) return clearWhiteSpace(grab_byte()); return c; } int consume_byte(int c) { hold_string[string_index] = c; string_index = string_index + 1; require(MAX_STRING > string_index, "Token exceeded MAX_STRING char limit\nuse --max-string number to increase\n"); return grab_byte(); } int preserve_string(int c) { int frequent = c; int escape = FALSE; do { if(!escape && '\\' == c ) escape = TRUE; else escape = FALSE; c = consume_byte(c); require(EOF != c, "Unterminated string\n"); } while(escape || (c != frequent)); return grab_byte(); } void copy_string(char* target, char* source, int max) { int i = 0; while(0 != source[i]) { target[i] = source[i]; i = i + 1; if(i == max) break; } } void fixup_label(void) { int hold = ':'; int prev; int i = 0; do { prev = hold; hold = hold_string[i]; hold_string[i] = prev; i = i + 1; } while(0 != hold); } int preserve_keyword(int c, char* S) { while(in_set(c, S)) { c = consume_byte(c); } return c; } void reset_hold_string(void) { int i = MAX_STRING; while(0 <= i) { hold_string[i] = 0; i = i - 1; } string_index = 0; } /* note if this is the first token in the list, head needs fixing up */ struct token_list* eat_token(struct token_list* token) { if(NULL != token->prev) { token->prev->next = token->next; } /* update backlinks */ if(NULL != token->next) { token->next->prev = token->prev; } return token->next; } struct token_list* eat_until_newline(struct token_list* head) { while (NULL != head) { if('\n' == head->s[0]) { return head; } else { head = eat_token(head); } } return NULL; } struct token_list* remove_line_comments(struct token_list* head) { struct token_list* first = NULL; while (NULL != head) { if(match("//", head->s)) { head = eat_until_newline(head); } else { if(NULL == first) { first = head; } head = head->next; } } return first; } struct token_list* remove_line_comment_tokens(struct token_list* head) { struct token_list* first = NULL; while (NULL != head) { if(match("//", head->s)) { head = eat_token(head); } else { if(NULL == first) { first = head; } head = head->next; } } return first; } struct token_list* remove_preprocessor_directives(struct token_list* head) { struct token_list* first = NULL; while (NULL != head) { if('#' == head->s[0]) { head = eat_until_newline(head); } else { if(NULL == first) { first = head; } head = head->next; } } return first; } void new_token(char* s, int size) { struct token_list* current = calloc(1, sizeof(struct token_list)); require(NULL != current, "Exhausted memory while getting token\n"); /* More efficiently allocate memory for string */ current->s = calloc(size, sizeof(char)); require(NULL != current->s, "Exhausted memory while trying to copy a token\n"); copy_string(current->s, s, MAX_STRING); current->prev = token; current->next = token; current->linenumber = line; current->filename = file; token = current; } int get_token(int c) { struct token_list* current = calloc(1, sizeof(struct token_list)); require(NULL != current, "Exhausted memory while getting token\n"); reset: reset_hold_string(); string_index = 0; c = clearWhiteSpace(c); if(c == EOF) { free(current); return c; } else if('#' == c) { c = consume_byte(c); c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"); } else if(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")) { c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"); if(':' == c) { fixup_label(); c = ' '; } } else if(in_set(c, "<=>|&!^%")) { c = preserve_keyword(c, "<=>|&!^%"); } else if(in_set(c, "'\"")) { c = preserve_string(c); } else if(c == '/') { c = consume_byte(c); if(c == '*') { c = grab_byte(); while(c != '/') { while(c != '*') { c = grab_byte(); require(EOF != c, "Hit EOF inside of block comment\n"); } c = grab_byte(); require(EOF != c, "Hit EOF inside of block comment\n"); } c = grab_byte(); goto reset; } else if(c == '/') { c = consume_byte(c); } else if(c == '=') { c = consume_byte(c); } } else if (c == '\n') { c = consume_byte(c); } else if(c == '*') { c = consume_byte(c); if(c == '=') { c = consume_byte(c); } } else if(c == '+') { c = consume_byte(c); if(c == '=') { c = consume_byte(c); } if(c == '+') { c = consume_byte(c); } } else if(c == '-') { c = consume_byte(c); if(c == '=') { c = consume_byte(c); } if(c == '>') { c = consume_byte(c); } if(c == '-') { c = consume_byte(c); } } else { c = consume_byte(c); } new_token(hold_string, string_index + 2); return c; } int consume_filename(int c) { reset_hold_string(); int done = FALSE; while(!done) { if(c == EOF) { fputs("we don't support EOF as a filename in #FILENAME statements\n", stderr); exit(EXIT_FAILURE); } else if((32 == c) || (9 == c) || (c == '\n')) { c = grab_byte(); } else { do { c = consume_byte(c); require(EOF != c, "Unterminated filename in #FILENAME\n"); } while((32 != c) && (9 != c) && ('\n' != c)); done = TRUE; } } /* with just a little extra to put in the matching at the end */ new_token(hold_string, string_index + 3); return c; } int change_filename(int ch) { require(EOF != ch, "#FILENAME failed to receive filename\n"); /* Remove the #FILENAME */ token = token->next; /* Get new filename */ ch = consume_filename(ch); file = token->s; /* Remove it from the processing list */ token = token->next; require(EOF != ch, "#FILENAME failed to receive filename\n"); /* Get new line number */ ch = get_token(ch); line = strtoint(token->s); if(0 == line) { if('0' != token->s[0]) { fputs("non-line number: ", stderr); fputs(token->s, stderr); fputs(" provided to #FILENAME\n", stderr); exit(EXIT_FAILURE); } } /* Remove it from the processing list */ token = token->next; return ch; } struct token_list* reverse_list(struct token_list* head) { struct token_list* root = NULL; struct token_list* next; while(NULL != head) { next = head->next; head->next = root; root = head; head = next; } return root; } struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename) { input = a; line = 1; file = filename; token = current; int ch = grab_byte(); while(EOF != ch) { ch = get_token(ch); require(NULL != token, "Empty files don't need to be compiled\n"); if(match("#FILENAME", token->s)) ch = change_filename(ch); } return token; }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" #include <stdint.h> struct token_list* emit(char *s, struct token_list* head); void require(int bool, char* error); char upcase(char a) { if(in_set(a, "abcdefghijklmnopqrstuvwxyz")) { a = a - 32; } return a; } int char2hex(int c) { if (c >= '0' && c <= '9') return (c - 48); else if (c >= 'a' && c <= 'f') return (c - 87); else if (c >= 'A' && c <= 'F') return (c - 55); else return -1; } int hexify(int c, int high) { int i = char2hex(c); if(0 > i) { fputs("Tried to print non-hex number\n", stderr); exit(EXIT_FAILURE); } if(high) { i = i << 4; } return i; } int escape_lookup(char* c); int weird(char* string) { int c; string = string + 1; weird_reset: c = string[0]; if(0 == c) return FALSE; if('\\' == c) { c = escape_lookup(string); if('x' == string[1]) string = string + 2; string = string + 1; } if(!in_set(c, "\t\n !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")) return TRUE; if(in_set(c, " \t\n\r") && (':' == string[1])) return TRUE; string = string + 1; goto weird_reset; } /* Lookup escape values */ int escape_lookup(char* c) { if('\\' != c[0]) return c[0]; if(c[1] == 'x') { int t1 = hexify(c[2], TRUE); int t2 = hexify(c[3], FALSE); return t1 + t2; } else if(c[1] == '0') return 0; else if(c[1] == 'a') return 7; else if(c[1] == 'b') return 8; else if(c[1] == 't') return 9; else if(c[1] == 'n') return 10; else if(c[1] == 'v') return 11; else if(c[1] == 'f') return 12; else if(c[1] == 'r') return 13; else if(c[1] == 'e') return 27; else if(c[1] == '"') return 34; else if(c[1] == '\'') return 39; else if(c[1] == '\\') return 92; fputs("Unknown escape received: ", stderr); fputs(c, stderr); fputs(" Unable to process\n", stderr); exit(EXIT_FAILURE); } /* Deal with human strings */ char* collect_regular_string(char* string) { string_index = 0; collect_regular_string_reset: require((MAX_STRING - 3) > string_index, "Attempt at parsing regular string exceeds max length\n"); if(string[0] == '\\') { hold_string[string_index] = escape_lookup(string); if (string[1] == 'x') string = string + 2; string = string + 2; } else { hold_string[string_index] = string[0]; string = string + 1; } string_index = string_index + 1; if(string[0] != 0) goto collect_regular_string_reset; hold_string[string_index] = '"'; hold_string[string_index + 1] = '\n'; char* message = calloc(string_index + 3, sizeof(char)); require(NULL != message, "Exhausted memory while storing regular string\n"); copy_string(message, hold_string, string_index + 2); reset_hold_string(); return message; } /* Deal with non-human strings */ char* collect_weird_string(char* string) { string_index = 1; int temp; char* table = "0123456789ABCDEF"; hold_string[0] = '\''; collect_weird_string_reset: require((MAX_STRING - 6) > string_index, "Attempt at parsing weird string exceeds max length\n"); string = string + 1; hold_string[string_index] = ' '; temp = escape_lookup(string) & 0xFF; hold_string[string_index + 1] = table[(temp >> 4)]; hold_string[string_index + 2] = table[(temp & 15)]; if(string[0] == '\\') { if(string[1] == 'x') string = string + 2; string = string + 1; } string_index = string_index + 3; if(string[1] != 0) goto collect_weird_string_reset; hold_string[string_index] = ' '; hold_string[string_index + 1] = '0'; hold_string[string_index + 2] = '0'; hold_string[string_index + 3] = '\''; hold_string[string_index + 4] = '\n'; char* hold = calloc(string_index + 6, sizeof(char)); require(NULL != hold, "Exhausted available memory while attempting to collect a weird string\n"); copy_string(hold, hold_string, string_index + 5); reset_hold_string(); return hold; } /* Parse string to deal with hex characters*/ char* parse_string(char* string) { /* the string */ if(weird(string)) return collect_weird_string(string); else return collect_regular_string(string); }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" /* Imported functions */ int strtoint(char *a); void line_error(void); void require(int bool, char* error); /* enable easy primitive extension */ struct type* add_primitive(struct type* a) { if(NULL == prim_types) return a; struct type* i = prim_types; while(NULL != i->next) { i = i->next; } i->next = a; return prim_types; } /* enable easy primitive creation */ struct type* new_primitive(char* name0, char* name1, char* name2, int size, int sign) { /* Create type** */ struct type* a = calloc(1, sizeof(struct type)); require(NULL != a, "Exhausted memory while declaring new primitive**\n"); a->name = name2; a->size = register_size; a->indirect = a; a->is_signed = sign; /* Create type* */ struct type* b = calloc(1, sizeof(struct type)); require(NULL != b, "Exhausted memory while declaring new primitive*\n"); b->name = name1; b->size = register_size; b->is_signed = sign; b->indirect = a; a->type = b; struct type* r = calloc(1, sizeof(struct type)); require(NULL != r, "Exhausted memory while declaring new primitive\n"); r->name = name0; r->size = size; r->is_signed = sign; r->indirect = b; r->type = r; b->type = r; return r; } /* Initialize default types */ void initialize_types(void) { if(AMD64 == Architecture || AARCH64 == Architecture || RISCV64 == Architecture) register_size = 8; else register_size = 4; /* Define void */ struct type* hold = new_primitive("void", "void*", "void**", register_size, FALSE); prim_types = add_primitive(hold); /* Define unsigned LONG */ hold = new_primitive("SCM","SCM*", "SCM**", register_size, FALSE); prim_types = add_primitive(hold); /* Define LONG */ hold = new_primitive("long", "long*", "long**", register_size, TRUE); prim_types = add_primitive(hold); /* Define UNSIGNED */ hold = new_primitive("unsigned", "unsigned*", "unsigned**", register_size, FALSE); prim_types = add_primitive(hold); /* Define int */ integer = new_primitive("int", "int*", "int**", register_size, TRUE); prim_types = add_primitive(integer); /* Define uint32_t */ hold = new_primitive("uint32_t", "uint32_t*", "uint32_t**", 4, FALSE); prim_types = add_primitive(hold); /* Define int32_t */ hold = new_primitive("int32_t", "int32_t*", "int32_t**", 4, TRUE); prim_types = add_primitive(hold); /* Define uint16_t */ hold = new_primitive("uint16_t", "uint16_t*", "uint16_t**", 2, FALSE); prim_types = add_primitive(hold); /* Define int16_t */ hold = new_primitive("int16_t", "int16_t*", "int16_t**", 2, TRUE); prim_types = add_primitive(hold); /* Define uint8_t */ hold = new_primitive("uint8_t", "uint8_t*", "uint8_t**", 1, FALSE); prim_types = add_primitive(hold); /* Define int8_t */ hold = new_primitive("int8_t", "int8_t*", "int8_t**", 1, TRUE); prim_types = add_primitive(hold); /* Define char */ hold = new_primitive("char", "char*", "char**", 1, TRUE); prim_types = add_primitive(hold); /* Define FUNCTION */ hold = new_primitive("FUNCTION", "FUNCTION*", "FUNCTION**", register_size, FALSE); prim_types = add_primitive(hold); if(BOOTSTRAP_MODE) { /* Define FILE */ hold = new_primitive("FILE", "FILE*", "FILE**", register_size, TRUE); prim_types = add_primitive(hold); /* Primitives mes.c wanted */ hold = new_primitive("size_t", "size_t*", "size_t**", register_size, FALSE); prim_types = add_primitive(hold); hold = new_primitive("ssize_t", "ssize_t*", "ssize_t**", register_size, FALSE); prim_types = add_primitive(hold); } global_types = prim_types; } struct type* lookup_type(char* s, struct type* start) { struct type* i; for(i = start; NULL != i; i = i->next) { if(match(i->name, s)) { return i; } } return NULL; } struct type* lookup_member(struct type* parent, char* name) { struct type* i; require(NULL != parent, "Not a valid struct type\n"); for(i = parent->members; NULL != i; i = i->members) { if(match(i->name, name)) return i; } fputs("ERROR in lookup_member ", stderr); fputs(parent->name, stderr); fputs("->", stderr); fputs(global_token->s, stderr); fputs(" does not exist\n", stderr); line_error(); fputs("\n", stderr); exit(EXIT_FAILURE); } struct type* type_name(void); void require_match(char* message, char* required); int member_size; struct type* build_member(struct type* last, int offset) { struct type* i = calloc(1, sizeof(struct type)); require(NULL != i, "Exhausted memory while building a struct member\n"); i->members = last; i->offset = offset; struct type* member_type = type_name(); require(NULL != member_type, "struct member type can not be invalid\n"); i->type = member_type; i->name = global_token->s; global_token = global_token->next; require(NULL != global_token, "struct member can not be EOF terminated\n"); /* Check to see if array */ if(match( "[", global_token->s)) { global_token = global_token->next; require(NULL != global_token, "struct member arrays can not be EOF sized\n"); i->size = member_type->type->size * strtoint(global_token->s); if(0 == i->size) { fputs("Struct only supports [num] form\n", stderr); exit(EXIT_FAILURE); } global_token = global_token->next; require_match("Struct only supports [num] form\n", "]"); } else { i->size = member_type->size; } member_size = i->size; return i; } struct type* build_union(struct type* last, int offset) { int size = 0; global_token = global_token->next; require_match("ERROR in build_union\nMissing {\n", "{"); while('}' != global_token->s[0]) { last = build_member(last, offset); if(member_size > size) { size = member_size; } require_match("ERROR in build_union\nMissing ;\n", ";"); require(NULL != global_token, "Unterminated union\n"); } member_size = size; global_token = global_token->next; return last; } void create_struct(void) { int offset = 0; member_size = 0; struct type* head = calloc(1, sizeof(struct type)); require(NULL != head, "Exhausted memory while creating a struct\n"); struct type* i = calloc(1, sizeof(struct type)); require(NULL != i, "Exhausted memory while creating a struct indirection\n"); struct type* ii = calloc(1, sizeof(struct type)); require(NULL != ii, "Exhausted memory while creating a struct double indirection\n"); head->name = global_token->s; head->type = head; head->indirect = i; head->next = global_types; i->name = global_token->s; i->type = head; i->indirect = ii; i->size = register_size; ii->name = global_token->s; ii->type = i; ii->indirect = ii; ii->size = register_size; global_types = head; global_token = global_token->next; require_match("ERROR in create_struct\n Missing {\n", "{"); struct type* last = NULL; require(NULL != global_token, "Incomplete struct definition at end of file\n"); while('}' != global_token->s[0]) { if(match(global_token->s, "union")) { last = build_union(last, offset); } else { last = build_member(last, offset); } offset = offset + member_size; require_match("ERROR in create_struct\n Missing ;\n", ";"); require(NULL != global_token, "Unterminated struct\n"); } global_token = global_token->next; require_match("ERROR in create_struct\n Missing ;\n", ";"); head->size = offset; head->members = last; i->members = last; } struct type* type_name(void) { struct type* ret; require(NULL != global_token, "Received EOF instead of type name\n"); if(match("extern", global_token->s)) { global_token = global_token->next; require(NULL != global_token, "unfinished type definition in extern\n"); } if(match("struct", global_token->s)) { global_token = global_token->next; require(NULL != global_token, "structs can not have a EOF type name\n"); ret = lookup_type(global_token->s, global_types); if(NULL == ret) { create_struct(); return NULL; } } else { ret = lookup_type(global_token->s, global_types); if(NULL == ret) { fputs("Unknown type ", stderr); fputs(global_token->s, stderr); fputs("\n", stderr); line_error(); fputs("\n", stderr); exit(EXIT_FAILURE); } } global_token = global_token->next; require(NULL != global_token, "unfinished type definition\n"); if(match("const", global_token->s)) { global_token = global_token->next; require(NULL != global_token, "unfinished type definition in const\n"); } while(global_token->s[0] == '*') { ret = ret->indirect; global_token = global_token->next; require(NULL != global_token, "unfinished type definition in indirection\n"); } return ret; } struct type* mirror_type(struct type* source, char* name) { struct type* head = calloc(1, sizeof(struct type)); require(NULL != head, "Exhausted memory while creating a struct\n"); struct type* i = calloc(1, sizeof(struct type)); require(NULL != i, "Exhausted memory while creating a struct indirection\n"); head->name = name; i->name = name; head->size = source->size; i->size = source->indirect->size; head->offset = source->offset; i->offset = source->indirect->offset; head->is_signed = source->is_signed; i->is_signed = source->indirect->is_signed; head->indirect = i; i->indirect = head; head->members = source->members; i->members = source->indirect->members; head->type = head; i->type = i; return head; }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org> * Copyright (C) 2020 deesix <deesix@tuta.io> * Copyright (C) 2021 Andrius Å tikonas <andrius@stikonas.eu> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" #include "gcc_req.h" #include <stdint.h> /* Global lists */ struct token_list* global_symbol_list; struct token_list* global_function_list; struct token_list* global_constant_list; /* Core lists for this file */ struct token_list* function; /* What we are currently working on */ struct type* current_target; char* break_target_head; char* break_target_func; char* break_target_num; char* continue_target_head; struct token_list* break_frame; int current_count; int Address_of; /* Imported functions */ char* int2str(int x, int base, int signed_p); int strtoint(char *a); char* parse_string(char* string); int escape_lookup(char* c); void require(int bool, char* error); struct token_list* reverse_list(struct token_list* head); struct type* mirror_type(struct type* source, char* name); struct type* add_primitive(struct type* a); struct token_list* emit(char *s, struct token_list* head) { struct token_list* t = calloc(1, sizeof(struct token_list)); require(NULL != t, "Exhausted memory while generating token to emit\n"); t->next = head; t->s = s; return t; } void emit_out(char* s) { output_list = emit(s, output_list); } struct token_list* uniqueID(char* s, struct token_list* l, char* num) { l = emit("\n", emit(num, emit("_", emit(s, l)))); return l; } void uniqueID_out(char* s, char* num) { output_list = uniqueID(s, output_list, num); } struct token_list* sym_declare(char *s, struct type* t, struct token_list* list) { struct token_list* a = calloc(1, sizeof(struct token_list)); require(NULL != a, "Exhausted memory while attempting to declare a symbol\n"); a->next = list; a->s = s; a->type = t; return a; } struct token_list* sym_lookup(char *s, struct token_list* symbol_list) { struct token_list* i; for(i = symbol_list; NULL != i; i = i->next) { if(match(i->s, s)) return i; } return NULL; } void line_error_token(struct token_list *token) { if(NULL == token) { fputs("EOF reached inside of line_error\n", stderr); fputs("problem at end of file\n", stderr); return; } fputs(token->filename, stderr); fputs(":", stderr); fputs(int2str(token->linenumber, 10, TRUE), stderr); fputs(":", stderr); } void line_error(void) { line_error_token(global_token); } void require_match(char* message, char* required) { if(NULL == global_token) { line_error(); fputs("EOF reached inside of require match\n", stderr); fputs("problem at end of file\n", stderr); fputs(message, stderr); exit(EXIT_FAILURE); } if(!match(global_token->s, required)) { line_error(); fputs(message, stderr); exit(EXIT_FAILURE); } global_token = global_token->next; } void maybe_bootstrap_error(char* feature) { if (BOOTSTRAP_MODE) { line_error(); fputs(feature, stderr); fputs(" is not supported in --bootstrap-mode\n", stderr); exit(EXIT_FAILURE); } } void expression(void); void function_call(char* s, int bool) { require_match("ERROR in process_expression_list\nNo ( was found\n", "("); require(NULL != global_token, "Improper function call\n"); int passed = 0; if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { emit_out("PUSHR R13 R15\t# Prevent overwriting in recursion\n"); emit_out("PUSHR R14 R15\t# Protect the old base pointer\n"); emit_out("COPY R13 R15\t# Copy new base pointer\n"); } else if(X86 == Architecture) { emit_out("push_edi\t# Prevent overwriting in recursion\n"); emit_out("push_ebp\t# Protect the old base pointer\n"); emit_out("mov_edi,esp\t# Copy new base pointer\n"); } else if(AMD64 == Architecture) { emit_out("push_rdi\t# Prevent overwriting in recursion\n"); emit_out("push_rbp\t# Protect the old base pointer\n"); emit_out("mov_rdi,rsp\t# Copy new base pointer\n"); } else if(ARMV7L == Architecture) { emit_out("{R11} PUSH_ALWAYS\t# Prevent overwriting in recursion\n"); emit_out("{BP} PUSH_ALWAYS\t# Protect the old base pointer\n"); emit_out("'0' SP R11 NO_SHIFT MOVE_ALWAYS\t# Copy new base pointer\n"); } else if(AARCH64 == Architecture) { emit_out("PUSH_X16\t# Protect a tmp register we're going to use\n"); emit_out("PUSH_LR\t# Protect the old return pointer (link)\n"); emit_out("PUSH_BP\t# Protect the old base pointer\n"); emit_out("SET_X16_FROM_SP\t# The base pointer to-be\n"); } else if(RISCV32 == Architecture) { emit_out("rd_sp rs1_sp !-12 addi\t# Allocate stack\n"); emit_out("rs1_sp rs2_ra @4 sw\t# Protect the old return pointer\n"); emit_out("rs1_sp rs2_fp sw\t# Protect the old frame pointer\n"); emit_out("rs1_sp rs2_tp @8 sw\t# Protect temp register we are going to use\n"); emit_out("rd_tp rs1_sp mv\t# The base pointer to-be\n"); } else if(RISCV64 == Architecture) { emit_out("rd_sp rs1_sp !-24 addi\t# Allocate stack\n"); emit_out("rs1_sp rs2_ra @8 sd\t# Protect the old return pointer\n"); emit_out("rs1_sp rs2_fp sd\t# Protect the old frame pointer\n"); emit_out("rs1_sp rs2_tp @16 sd\t# Protect temp register we are going to use\n"); emit_out("rd_tp rs1_sp mv\t# The base pointer to-be\n"); } if(global_token->s[0] != ')') { expression(); require(NULL != global_token, "incomplete function call, received EOF instead of )\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("PUSHR R0 R15\t#_process_expression1\n"); else if(X86 == Architecture) emit_out("push_eax\t#_process_expression1\n"); else if(AMD64 == Architecture) emit_out("push_rax\t#_process_expression1\n"); else if(ARMV7L == Architecture) emit_out("{R0} PUSH_ALWAYS\t#_process_expression1\n"); else if(AARCH64 == Architecture) emit_out("PUSH_X0\t#_process_expression1\n"); else if(RISCV32 == Architecture) emit_out("rd_sp rs1_sp !-4 addi\nrs1_sp rs2_a0 sw\t#_process_expression1\n"); else if(RISCV64 == Architecture) emit_out("rd_sp rs1_sp !-8 addi\nrs1_sp rs2_a0 sd\t#_process_expression1\n"); passed = 1; while(global_token->s[0] == ',') { global_token = global_token->next; require(NULL != global_token, "incomplete function call, received EOF instead of argument\n"); expression(); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("PUSHR R0 R15\t#_process_expression2\n"); else if(X86 == Architecture) emit_out("push_eax\t#_process_expression2\n"); else if(AMD64 == Architecture) emit_out("push_rax\t#_process_expression2\n"); else if(ARMV7L == Architecture) emit_out("{R0} PUSH_ALWAYS\t#_process_expression2\n"); else if(AARCH64 == Architecture) emit_out("PUSH_X0\t#_process_expression2\n"); else if(RISCV32 == Architecture) emit_out("rd_sp rs1_sp !-4 addi\nrs1_sp rs2_a0 sw\t#_process_expression2\n"); else if(RISCV64 == Architecture) emit_out("rd_sp rs1_sp !-8 addi\nrs1_sp rs2_a0 sd\t#_process_expression2\n"); passed = passed + 1; } } require_match("ERROR in process_expression_list\nNo ) was found\n", ")"); if(TRUE == bool) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { emit_out("LOAD R0 R14 "); emit_out(s); emit_out("\nMOVE R14 R13\n"); emit_out("CALL R0 R15\n"); } else if(X86 == Architecture) { emit_out("lea_eax,[ebp+DWORD] %"); emit_out(s); emit_out("\nmov_eax,[eax]\n"); emit_out("mov_ebp,edi\n"); emit_out("call_eax\n"); } else if(AMD64 == Architecture) { emit_out("lea_rax,[rbp+DWORD] %"); emit_out(s); emit_out("\nmov_rax,[rax]\n"); emit_out("mov_rbp,rdi\n"); emit_out("call_rax\n"); } else if(ARMV7L == Architecture) { emit_out("!"); emit_out(s); emit_out(" R0 SUB BP ARITH_ALWAYS\n"); emit_out("!0 R0 LOAD32 R0 MEMORY\n"); emit_out("{LR} PUSH_ALWAYS\t# Protect the old link register\n"); emit_out("'0' R11 BP NO_SHIFT MOVE_ALWAYS\n"); emit_out("'3' R0 CALL_REG_ALWAYS\n"); emit_out("{LR} POP_ALWAYS\t# Prevent overwrite\n"); } else if(AARCH64 == Architecture) { emit_out("SET_X0_FROM_BP\n"); emit_out("LOAD_W1_AHEAD\nSKIP_32_DATA\n%"); emit_out(s); emit_out("\nSUB_X0_X0_X1\n"); emit_out("DEREF_X0\n"); emit_out("SET_BP_FROM_X16\n"); emit_out("SET_X16_FROM_X0\n"); emit_out("BLR_X16\n"); } else if(RISCV32 == Architecture) { emit_out("rd_a0 rs1_fp !"); emit_out(s); emit_out(" addi\n"); emit_out("rd_a0 rs1_a0 lw\n"); emit_out("rd_fp rs1_tp mv\n"); emit_out("rd_ra rs1_a0 jalr\n"); } else if(RISCV64 == Architecture) { emit_out("rd_a0 rs1_fp !"); emit_out(s); emit_out(" addi\n"); emit_out("rd_a0 rs1_a0 ld\n"); emit_out("rd_fp rs1_tp mv\n"); emit_out("rd_ra rs1_a0 jalr\n"); } } else { if((KNIGHT_NATIVE == Architecture) || (KNIGHT_POSIX == Architecture)) { emit_out("MOVE R14 R13\n"); emit_out("LOADR R0 4\nJUMP 4\n&FUNCTION_"); emit_out(s); emit_out("\nCALL R0 R15\n"); } else if(X86 == Architecture) { emit_out("mov_ebp,edi\n"); emit_out("call %FUNCTION_"); emit_out(s); emit_out("\n"); } else if(AMD64 == Architecture) { emit_out("mov_rbp,rdi\n"); emit_out("call %FUNCTION_"); emit_out(s); emit_out("\n"); } else if(ARMV7L == Architecture) { emit_out("{LR} PUSH_ALWAYS\t# Protect the old link register\n"); emit_out("'0' R11 BP NO_SHIFT MOVE_ALWAYS\n"); emit_out("^~FUNCTION_"); emit_out(s); emit_out(" CALL_ALWAYS\n"); emit_out("{LR} POP_ALWAYS\t# Restore the old link register\n"); } else if(AARCH64 == Architecture) { emit_out("SET_BP_FROM_X16\n"); emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&FUNCTION_"); emit_out(s); emit_out("\n"); emit_out("BLR_X16\n"); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { emit_out("rd_fp rs1_tp mv\n"); emit_out("rd_ra $FUNCTION_"); emit_out(s); emit_out(" jal\n"); } } for(; passed > 0; passed = passed - 1) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("POPR R1 R15\t# _process_expression_locals\n"); else if(X86 == Architecture) emit_out("pop_ebx\t# _process_expression_locals\n"); else if(AMD64 == Architecture) emit_out("pop_rbx\t# _process_expression_locals\n"); else if(ARMV7L == Architecture) emit_out("{R1} POP_ALWAYS\t# _process_expression_locals\n"); else if(AARCH64 == Architecture) emit_out("POP_X1\t# _process_expression_locals\n"); else if(RISCV32 == Architecture) emit_out("rd_a1 rs1_sp lw\t# _process_expression_locals\nrd_sp rs1_sp !4 addi\n"); else if(RISCV64 == Architecture) emit_out("rd_a1 rs1_sp ld\t# _process_expression_locals\nrd_sp rs1_sp !8 addi\n"); } if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { emit_out("POPR R14 R15\t# Restore old base pointer\n"); emit_out("POPR R13 R15\t# Prevent overwrite\n"); } else if(X86 == Architecture) { emit_out("pop_ebp\t# Restore old base pointer\n"); emit_out("pop_edi\t# Prevent overwrite\n"); } else if(AMD64 == Architecture) { emit_out("pop_rbp\t# Restore old base pointer\n"); emit_out("pop_rdi\t# Prevent overwrite\n"); } else if(ARMV7L == Architecture) { emit_out("{BP} POP_ALWAYS\t# Restore old base pointer\n"); emit_out("{R11} POP_ALWAYS\t# Prevent overwrite\n"); } else if(AARCH64 == Architecture) { emit_out("POP_BP\t# Restore the old base pointer\n"); emit_out("POP_LR\t# Restore the old return pointer (link)\n"); emit_out("POP_X16\t# Restore a register we used as tmp\n"); } else if(RISCV32 == Architecture) { emit_out("rd_fp rs1_sp lw\t# Restore old frame pointer\n"); emit_out("rd_tp rs1_sp !8 lw\t# Restore temp register\n"); emit_out("rd_ra rs1_sp !4 lw\t# Restore return address\n"); emit_out("rd_sp rs1_sp !12 addi\t# Deallocate stack\n"); } else if(RISCV64 == Architecture) { emit_out("rd_fp rs1_sp ld\t# Restore old frame pointer\n"); emit_out("rd_tp rs1_sp !16 ld\t# Restore temp register\n"); emit_out("rd_ra rs1_sp !8 ld\t# Restore return address\n"); emit_out("rd_sp rs1_sp !24 addi\t# Deallocate stack\n"); } } void constant_load(char* s) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("LOADI R0 "); else if(X86 == Architecture) emit_out("mov_eax, %"); else if(AMD64 == Architecture) emit_out("mov_rax, %"); else if(ARMV7L == Architecture) emit_out("!0 R0 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n%"); else if(AARCH64 == Architecture) emit_out("LOAD_W0_AHEAD\nSKIP_32_DATA\n%"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { emit_out("rd_a0 ~"); emit_out(s); emit_out(" lui\nrd_a0 rs1_a0 !"); } emit_out(s); if(RISCV32 == Architecture) emit_out(" addi\n"); else if(RISCV64 == Architecture) emit_out(" addiw\n"); emit_out("\n"); } char* load_value_signed(unsigned size) { if(size == 1) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "LOAD8 R0 R0 0\n"; else if(X86 == Architecture) return "movsx_eax,BYTE_PTR_[eax]\n"; else if(AMD64 == Architecture) return "movsx_rax,BYTE_PTR_[rax]\n"; else if(ARMV7L == Architecture) return "LOADS8 R0 LOAD R0 HALF_MEMORY\n"; else if(AARCH64 == Architecture) return "LDRSB_X0_[X0]\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) return "rd_a0 rs1_a0 lb\n"; } else if(size == 2) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "LOAD16 R0 R0 0\n"; else if(X86 == Architecture) return "movsx_eax,WORD_PTR_[eax]\n"; else if(AMD64 == Architecture) return "movsx_rax,WORD_PTR_[rax]\n"; else if(ARMV7L == Architecture) return "LOADS16 R0 LOAD R0 HALF_MEMORY\n"; else if(AARCH64 == Architecture) return "LDRSH_X0_[X0]\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) return "rd_a0 rs1_a0 lh\n"; } else if(size == 4) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "LOAD R0 R0 0\n"; else if(X86 == Architecture) return "mov_eax,[eax]\n"; else if(AMD64 == Architecture) return "movsx_rax,DWORD_PTR_[rax]\n"; else if(ARMV7L == Architecture) return "!0 R0 LOAD32 R0 MEMORY\n"; else if(AARCH64 == Architecture) return "LDR_W0_[X0]\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) return "rd_a0 rs1_a0 lw\n"; } else if(size == 8) { if(AMD64 == Architecture) return "mov_rax,[rax]\n"; else if(AARCH64 == Architecture) return "DEREF_X0\n"; else if(RISCV64 == Architecture) return "rd_a0 rs1_a0 ld\n"; } line_error(); fputs(" Got unsupported size ", stderr); fputs(int2str(size, 10, TRUE), stderr); fputs(" when trying to load value.\n", stderr); exit(EXIT_FAILURE); } char* load_value_unsigned(unsigned size) { if(size == 1) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "LOADU8 R0 R0 0\n"; else if(X86 == Architecture) return "movzx_eax,BYTE_PTR_[eax]\n"; else if(AMD64 == Architecture) return "movzx_rax,BYTE_PTR_[rax]\n"; else if(ARMV7L == Architecture) return "!0 R0 LOAD R0 MEMORY\n"; else if(AARCH64 == Architecture) return "DEREF_X0_BYTE\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) return "rd_a0 rs1_a0 lbu\n"; } else if(size == 2) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "LOADU16 R0 R0 0\n"; else if(X86 == Architecture) return "movzx_eax,WORD_PTR_[eax]\n"; else if(AMD64 == Architecture) return "movzx_rax,WORD_PTR_[rax]\n"; else if(ARMV7L == Architecture) return "NO_OFFSET R0 LOAD R0 HALF_MEMORY\n"; else if(AARCH64 == Architecture) return "LDRH_W0_[X0]\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) return "rd_a0 rs1_a0 lhu\n"; } else if(size == 4) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "LOAD R0 R0 0\n"; else if(X86 == Architecture) return "mov_eax,[eax]\n"; else if(AMD64 == Architecture) return "mov_eax,[rax]\n"; else if(ARMV7L == Architecture) return "!0 R0 LOAD32 R0 MEMORY\n"; else if(AARCH64 == Architecture) return "LDR_W0_[X0]\n"; else if(RISCV32 == Architecture) return "rd_a0 rs1_a0 lw\n"; else if(RISCV64 == Architecture) return "rd_a0 rs1_a0 lwu\n"; } else if(size == 8) { if(AMD64 == Architecture) return "mov_rax,[rax]\n"; else if(AARCH64 == Architecture) return "DEREF_X0\n"; else if(RISCV64 == Architecture) return "rd_a0 rs1_a0 ld\n"; } line_error(); fputs(" Got unsupported size ", stderr); fputs(int2str(size, 10, TRUE), stderr); fputs(" when trying to load value.\n", stderr); exit(EXIT_FAILURE); } char* load_value(unsigned size, int is_signed) { if(is_signed) return load_value_signed(size); return load_value_unsigned(size); } char* store_value(unsigned size) { if(size == 1) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "STORE8 R0 R1 0\n"; else if(X86 == Architecture) return "mov_[ebx],al\n"; else if(AMD64 == Architecture) return "mov_[rbx],al\n"; else if(ARMV7L == Architecture) return "!0 R0 STORE8 R1 MEMORY\n"; else if(AARCH64 == Architecture) return "STR_BYTE_W0_[X1]\n"; else if(RISCV32 == Architecture) return "rs1_a1 rs2_a0 sb\n"; else if(RISCV64 == Architecture) return "rs1_a1 rs2_a0 sb\n"; } else if(size == 2) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "STORE16 R0 R1 0\n"; else if(X86 == Architecture) return "mov_[ebx],ax\n"; else if(AMD64 == Architecture) return "mov_[rbx],ax\n"; else if(ARMV7L == Architecture) return "NO_OFFSET R0 STORE16 R1 HALF_MEMORY\n"; else if(AARCH64 == Architecture) return "STRH_W0_[X1]\n"; else if(RISCV32 == Architecture || RISCV64 == Architecture) return "rs1_a1 rs2_a0 sh\n"; } else if(size == 4) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) return "STORE R0 R1 0\n"; else if(X86 == Architecture) return "mov_[ebx],eax\n"; else if(AMD64 == Architecture) return "mov_[rbx],eax\n"; else if(ARMV7L == Architecture) return "!0 R0 STORE32 R1 MEMORY\n"; else if(AARCH64 == Architecture) return "STR_W0_[X1]\n"; else if(RISCV32 == Architecture || RISCV64 == Architecture) return "rs1_a1 rs2_a0 sw\n"; } else if(size == 8) { if(AMD64 == Architecture) return "mov_[rbx],rax\n"; else if(AARCH64 == Architecture) return "STR_X0_[X1]\n"; else if(RISCV64 == Architecture) return "rs1_a1 rs2_a0 sd\n"; } /* Should not happen but print error message. */ fputs("Got unsupported size ", stderr); fputs(int2str(size, 10, TRUE), stderr); fputs(" when storing number in register.\n", stderr); line_error(); exit(EXIT_FAILURE); } int is_compound_assignment(char* token) { if(match("+=", token)) return TRUE; else if(match("-=", token)) return TRUE; else if(match("*=", token)) return TRUE; else if(match("/=", token)) return TRUE; else if(match("%=", token)) return TRUE; else if(match("<<=", token)) return TRUE; else if(match(">>=", token)) return TRUE; else if(match("&=", token)) return TRUE; else if(match("^=", token)) return TRUE; else if(match("|=", token)) return TRUE; return FALSE; } void postfix_expr_stub(void); void variable_load(struct token_list* a, int num_dereference) { require(NULL != global_token, "incomplete variable load received\n"); if((match("FUNCTION", a->type->name) || match("FUNCTION*", a->type->name)) && match("(", global_token->s)) { function_call(int2str(a->depth, 10, TRUE), TRUE); return; } current_target = a->type; if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("ADDI R0 R14 "); else if(X86 == Architecture) emit_out("lea_eax,[ebp+DWORD] %"); else if(AMD64 == Architecture) emit_out("lea_rax,[rbp+DWORD] %"); else if(ARMV7L == Architecture) emit_out("!"); else if(AARCH64 == Architecture) emit_out("SET_X0_FROM_BP\nLOAD_W1_AHEAD\nSKIP_32_DATA\n%"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 rs1_fp !"); emit_out(int2str(a->depth, 10, TRUE)); if(ARMV7L == Architecture) emit_out(" R0 SUB BP ARITH_ALWAYS"); else if(AARCH64 == Architecture) emit_out("\nSUB_X0_X0_X1\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out(" addi"); emit_out("\n"); if(TRUE == Address_of) return; if(match(".", global_token->s)) { postfix_expr_stub(); return; } if(!match("=", global_token->s) && !is_compound_assignment(global_token->s)) { emit_out(load_value(current_target->size, current_target->is_signed)); } while (num_dereference > 0) { current_target = current_target->type; emit_out(load_value(current_target->size, current_target->is_signed)); num_dereference = num_dereference - 1; } } void function_load(struct token_list* a) { require(NULL != global_token, "incomplete function load\n"); if(match("(", global_token->s)) { function_call(a->s, FALSE); return; } if((KNIGHT_NATIVE == Architecture) || (KNIGHT_POSIX == Architecture)) emit_out("LOADR R0 4\nJUMP 4\n&FUNCTION_"); else if(X86 == Architecture) emit_out("mov_eax, &FUNCTION_"); else if(AMD64 == Architecture) emit_out("lea_rax,[rip+DWORD] %FUNCTION_"); else if(ARMV7L == Architecture) emit_out("!0 R0 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n&FUNCTION_"); else if(AARCH64 == Architecture) emit_out("LOAD_W0_AHEAD\nSKIP_32_DATA\n&FUNCTION_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 ~FUNCTION_"); emit_out(a->s); if(RISCV32 == Architecture) { emit_out(" auipc\n"); emit_out("rd_a0 rs1_a0 !FUNCTION_"); emit_out(a->s); emit_out(" addi"); } else if(RISCV64 == Architecture) { emit_out(" auipc\n"); emit_out("rd_a0 rs1_a0 !FUNCTION_"); emit_out(a->s); emit_out(" addiw"); } emit_out("\n"); } void global_load(struct token_list* a) { current_target = a->type; if((KNIGHT_NATIVE == Architecture) || (KNIGHT_POSIX == Architecture)) emit_out("LOADR R0 4\nJUMP 4\n&GLOBAL_"); else if(X86 == Architecture) emit_out("mov_eax, &GLOBAL_"); else if(AMD64 == Architecture) emit_out("lea_rax,[rip+DWORD] %GLOBAL_"); else if(ARMV7L == Architecture) emit_out("!0 R0 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n&GLOBAL_"); else if(AARCH64 == Architecture) emit_out("LOAD_W0_AHEAD\nSKIP_32_DATA\n&GLOBAL_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 ~GLOBAL_"); emit_out(a->s); if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { emit_out(" auipc\n"); emit_out("rd_a0 rs1_a0 !GLOBAL_"); emit_out(a->s); emit_out(" addi"); } emit_out("\n"); require(NULL != global_token, "unterminated global load\n"); if(TRUE == Address_of) return; if(match(".", global_token->s)) { postfix_expr_stub(); return; } if(match("=", global_token->s) || is_compound_assignment(global_token->s)) return; emit_out(load_value(register_size, current_target->is_signed)); } /* * primary-expr: * FAILURE * "String" * 'Char' * [0-9]* * [a-z,A-Z]* * ( expression ) */ void primary_expr_failure(void) { require(NULL != global_token, "hit EOF when expecting primary expression\n"); line_error(); fputs("Received ", stderr); fputs(global_token->s, stderr); fputs(" in primary_expr\n", stderr); exit(EXIT_FAILURE); } void primary_expr_string(void) { char* number_string = int2str(current_count, 10, TRUE); current_count = current_count + 1; if((KNIGHT_NATIVE == Architecture) || (KNIGHT_POSIX == Architecture)) emit_out("LOADR R0 4\nJUMP 4\n&STRING_"); else if(X86 == Architecture) emit_out("mov_eax, &STRING_"); else if(AMD64 == Architecture) emit_out("lea_rax,[rip+DWORD] %STRING_"); else if(ARMV7L == Architecture) emit_out("!0 R0 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n&STRING_"); else if(AARCH64 == Architecture) emit_out("LOAD_W0_AHEAD\nSKIP_32_DATA\n&STRING_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 ~STRING_"); uniqueID_out(function->s, number_string); if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { emit_out("auipc\n"); emit_out("rd_a0 rs1_a0 !STRING_"); uniqueID_out(function->s, number_string); emit_out("addi\n"); } /* The target */ strings_list = emit(":STRING_", strings_list); strings_list = uniqueID(function->s, strings_list, number_string); /* catch case of just "foo" from segfaulting */ require(NULL != global_token->next, "a string by itself is not valid C\n"); /* Parse the string */ if('"' != global_token->next->s[0]) { strings_list = emit(parse_string(global_token->s), strings_list); global_token = global_token->next; } else { char* s = calloc(MAX_STRING, sizeof(char)); /* prefix leading string */ s[0] = '"'; int i = 1; int j; while('"' == global_token->s[0]) { /* Step past the leading '"' */ j = 1; /* Copy the rest of the string as is */ while(0 != global_token->s[j]) { require(i < MAX_STRING, "concat string exceeded max string length\n"); s[i] = global_token->s[j]; i = i + 1; j = j + 1; } /* Move on to the next token */ global_token = global_token->next; require(NULL != global_token, "multi-string null is not valid C\n"); } /* Now use it */ strings_list = emit(parse_string(s), strings_list); } } void primary_expr_char(void) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("LOADI R0 "); else if(X86 == Architecture) emit_out("mov_eax, %"); else if(AMD64 == Architecture) emit_out("mov_rax, %"); else if(ARMV7L == Architecture) emit_out("!"); else if(AARCH64 == Architecture) emit_out("LOAD_W0_AHEAD\nSKIP_32_DATA\n%"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 !"); emit_out(int2str(escape_lookup(global_token->s + 1), 10, TRUE)); if(ARMV7L == Architecture) emit_out(" R0 LOADI8_ALWAYS"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out(" addi"); emit_out("\n"); global_token = global_token->next; } int hex2char(int c) { if((c >= 0) && (c <= 9)) return (c + 48); else if((c >= 10) && (c <= 15)) return (c + 55); else return -1; } char* number_to_hex(int a, int bytes) { require(bytes > 0, "number to hex must have a positive number of bytes greater than zero\n"); char* result = calloc(1 + (bytes << 1), sizeof(char)); if(NULL == result) { fputs("calloc failed in number_to_hex\n", stderr); exit(EXIT_FAILURE); } int i = 0; int divisor = (bytes << 3); require(divisor > 0, "unexpected wrap around in number_to_hex\n"); /* Simply collect numbers until divisor is gone */ while(0 != divisor) { divisor = divisor - 4; result[i] = hex2char((a >> divisor) & 0xF); i = i + 1; } return result; } void primary_expr_number(char* s) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { int size = strtoint(s); if((32767 > size) && (size > -32768)) { emit_out("LOADI R0 "); emit_out(s); } else { emit_out("LOADR R0 4\nJUMP 4\n'"); emit_out(number_to_hex(size, register_size)); emit_out("'"); } } else if(X86 == Architecture) { emit_out("mov_eax, %"); emit_out(s); } else if(AMD64 == Architecture) { emit_out("mov_rax, %"); emit_out(s); } else if(ARMV7L == Architecture) { emit_out("!0 R0 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n%"); emit_out(s); } else if(AARCH64 == Architecture) { emit_out("LOAD_W0_AHEAD\nSKIP_32_DATA\n%"); emit_out(s); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { int size = strtoint(s); if((2047 > size) && (size > -2048)) { emit_out("rd_a0 !"); emit_out(s); emit_out(" addi"); } else if (0 == (size >> 30)) { emit_out("rd_a0 ~"); emit_out(s); emit_out(" lui\n"); emit_out("rd_a0 rs1_a0 !"); emit_out(s); emit_out(" addi"); } else { int high = size >> 30; int low = ((size >> 30) << 30) ^ size; emit_out("rd_a0 ~"); emit_out(int2str(high, 10, TRUE)); emit_out(" lui\n"); emit_out("rd_a0 rs1_a0 !"); emit_out(int2str(high, 10, TRUE)); emit_out(" addi\n"); emit_out("rd_a0 rs1_a0 rs2_x30 slli\n"); emit_out("rd_t1 ~"); emit_out(int2str(low, 10, TRUE)); emit_out(" lui\n"); emit_out("rd_t1 rs1_t1 !"); emit_out(int2str(low, 10, TRUE)); emit_out(" addi\n"); emit_out("rd_a0 rs1_a0 rs2_t1 or\n"); } } emit_out("\n"); } void primary_expr_variable(void) { int num_dereference = 0; while(global_token->s[0] == '*') { global_token = global_token->next; require(NULL != global_token, "Walked off the end of a variable dereference\n"); num_dereference = num_dereference + 1; } char* s = global_token->s; global_token = global_token->next; struct token_list* a = sym_lookup(s, global_constant_list); if(NULL != a) { constant_load(a->arguments->s); return; } a = sym_lookup(s, function->locals); if(NULL != a) { variable_load(a, num_dereference); return; } a = sym_lookup(s, function->arguments); if(NULL != a) { variable_load(a, num_dereference); return; } a = sym_lookup(s, global_function_list); if(NULL != a) { function_load(a); return; } a = sym_lookup(s, global_symbol_list); if(NULL != a) { global_load(a); return; } line_error(); fputs(s ,stderr); fputs(" is not a defined symbol\n", stderr); exit(EXIT_FAILURE); } void primary_expr(void); struct type* promote_type(struct type* a, struct type* b) { require(NULL != b, "impossible case 1 in promote_type\n"); require(NULL != a, "impossible case 2 in promote_type\n"); if(a == b) return a; struct type* i; for(i = global_types; NULL != i; i = i->next) { if(a->name == i->name) break; if(b->name == i->name) break; if(a->name == i->indirect->name) break; if(b->name == i->indirect->name) break; if(a->name == i->indirect->indirect->name) break; if(b->name == i->indirect->indirect->name) break; } require(NULL != i, "impossible case 3 in promote_type\n"); return i; } void common_recursion(FUNCTION f) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("PUSHR R0 R15\t#_common_recursion\n"); else if(X86 == Architecture) emit_out("push_eax\t#_common_recursion\n"); else if(AMD64 == Architecture) emit_out("push_rax\t#_common_recursion\n"); else if(ARMV7L == Architecture) emit_out("{R0} PUSH_ALWAYS\t#_common_recursion\n"); else if(AARCH64 == Architecture) emit_out("PUSH_X0\t#_common_recursion\n"); else if(RISCV32 == Architecture) emit_out("rd_sp rs1_sp !-4 addi\t# _common_recursion\nrs1_sp rs2_a0 sw\n"); else if(RISCV64 == Architecture) emit_out("rd_sp rs1_sp !-8 addi\t# _common_recursion\nrs1_sp rs2_a0 sd\n"); struct type* last_type = current_target; global_token = global_token->next; require(NULL != global_token, "Received EOF in common_recursion\n"); f(); current_target = promote_type(current_target, last_type); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("POPR R1 R15\t# _common_recursion\n"); else if(X86 == Architecture) emit_out("pop_ebx\t# _common_recursion\n"); else if(AMD64 == Architecture) emit_out("pop_rbx\t# _common_recursion\n"); else if(ARMV7L == Architecture) emit_out("{R1} POP_ALWAYS\t# _common_recursion\n"); else if(AARCH64 == Architecture) emit_out("POP_X1\t# _common_recursion\n"); else if(RISCV32 == Architecture) emit_out("rd_a1 rs1_sp lw\nrd_sp rs1_sp !4 addi\t# _common_recursion\n"); else if(RISCV64 == Architecture) emit_out("rd_a1 rs1_sp ld\nrd_sp rs1_sp !8 addi\t# _common_recursion\n"); } void general_recursion(FUNCTION f, char* s, char* name, FUNCTION iterate) { require(NULL != global_token, "Received EOF in general_recursion\n"); if(match(name, global_token->s)) { common_recursion(f); emit_out(s); iterate(); } } void arithmetic_recursion(FUNCTION f, char* s1, char* s2, char* name, FUNCTION iterate) { require(NULL != global_token, "Received EOF in arithmetic_recursion\n"); if(match(name, global_token->s)) { common_recursion(f); if(NULL == current_target) { emit_out(s1); } else if(current_target->is_signed) { emit_out(s1); } else { emit_out(s2); } iterate(); } } /* * postfix-expr: * primary-expr * postfix-expr [ expression ] * postfix-expr ( expression-list-opt ) * postfix-expr -> member * postfix-expr . member */ struct type* lookup_member(struct type* parent, char* name); void postfix_expr_arrow(void) { emit_out("# looking up offset\n"); global_token = global_token->next; require(NULL != global_token, "naked -> not allowed\n"); struct type* i = lookup_member(current_target, global_token->s); current_target = i->type; global_token = global_token->next; require(NULL != global_token, "Unterminated -> expression not allowed\n"); if(0 != i->offset) { emit_out("# -> offset calculation\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { emit_out("ADDUI R0 R0 "); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\n"); } else if(X86 == Architecture) { emit_out("mov_ebx, %"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\nadd_eax,ebx\n"); } else if(AMD64 == Architecture) { emit_out("mov_rbx, %"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\nadd_rax,rbx\n"); } else if(ARMV7L == Architecture) { emit_out("!0 R1 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n%"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\n'0' R0 R0 ADD R1 ARITH2_ALWAYS\n"); } else if(AARCH64 == Architecture) { emit_out("LOAD_W1_AHEAD\nSKIP_32_DATA\n%"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\nADD_X0_X1_X0\n"); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { emit_out("rd_a1 !"); emit_out(int2str(i->offset, 10, TRUE)); emit_out(" addi\n"); emit_out("rd_a0 rs1_a1 rs2_a0 add\n"); } } /* We don't yet support assigning structs to structs */ if((!match("=", global_token->s) && !is_compound_assignment(global_token->s) && (register_size >= i->size))) { emit_out(load_value(i->size, i->is_signed)); } } void postfix_expr_dot(void) { maybe_bootstrap_error("Member access using ."); emit_out("# looking up offset\n"); global_token = global_token->next; require(NULL != global_token, "naked . not allowed\n"); struct type* i = lookup_member(current_target, global_token->s); current_target = i->type; global_token = global_token->next; require(NULL != global_token, "Unterminated . expression not allowed\n"); if(0 != i->offset) { emit_out("# . offset calculation\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { emit_out("ADDUI R0 R0 "); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\n"); } else if(X86 == Architecture) { emit_out("mov_ebx, %"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\nadd_eax,ebx\n"); } else if(AMD64 == Architecture) { emit_out("mov_rbx, %"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\nadd_rax,rbx\n"); } else if(ARMV7L == Architecture) { emit_out("!0 R1 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n%"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\n'0' R0 R0 ADD R1 ARITH2_ALWAYS\n"); } else if(AARCH64 == Architecture) { emit_out("LOAD_W1_AHEAD\nSKIP_32_DATA\n%"); emit_out(int2str(i->offset, 10, TRUE)); emit_out("\nADD_X0_X1_X0\n"); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { emit_out("rd_a1 !"); emit_out(int2str(i->offset, 10, TRUE)); emit_out(" addi\n"); emit_out("rd_a0 rs1_a1 rs2_a0 add\n"); } } if(match("=", global_token->s) || is_compound_assignment(global_token->s)) return; if(match("[", global_token->s)) return; emit_out(load_value(current_target->size, current_target->is_signed)); } void postfix_expr_array(void) { struct type* array = current_target; common_recursion(expression); current_target = array; require(NULL != current_target, "Arrays only apply to variables\n"); char* assign = load_value(register_size, current_target->is_signed); /* Add support for Ints */ if(match("char*", current_target->name)) { assign = load_value(1, TRUE); } else { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("PUSHR R1 R15\nLOADI R1 "); else if(X86 == Architecture) emit_out("push_ebx\nmov_ebx, %"); else if(AMD64 == Architecture) emit_out("push_rbx\nmov_rbx, %"); else if(ARMV7L == Architecture) emit_out("{R1} PUSH_ALWAYS\n!0 R1 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n%"); else if(AARCH64 == Architecture) emit_out("PUSH_X1\nLOAD_W1_AHEAD\nSKIP_32_DATA\n%"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a2 rs1_a1 addi\nrd_a1 !"); emit_out(int2str(current_target->type->size, 10, TRUE)); if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out(" addi"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("\nMULU R0 R1 R0\nPOPR R1 R15\n"); else if(X86 == Architecture) emit_out("\nmul_ebx\npop_ebx\n"); else if(AMD64 == Architecture) emit_out("\nmul_rbx\npop_rbx\n"); else if(ARMV7L == Architecture) emit_out("\n'9' R0 '0' R1 MUL R0 ARITH2_ALWAYS\n{R1} POP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nMUL_X0_X1_X0\nPOP_X1\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("\nrd_a0 rs1_a1 rs2_a0 mul\nrd_a1 rs1_a2 addi\n"); } if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("ADD R0 R0 R1\n"); else if(X86 == Architecture) emit_out("add_eax,ebx\n"); else if(AMD64 == Architecture) emit_out("add_rax,rbx\n"); else if(ARMV7L == Architecture) emit_out("'0' R0 R0 ADD R1 ARITH2_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("ADD_X0_X1_X0\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 rs1_a1 rs2_a0 add\n"); require_match("ERROR in postfix_expr\nMissing ]\n", "]"); require(NULL != global_token, "truncated array expression\n"); if(match("=", global_token->s) || is_compound_assignment(global_token->s) || match(".", global_token->s)) { assign = ""; } if(match("[", global_token->s)) { current_target = current_target->type; } emit_out(assign); } /* * unary-expr: * &postfix-expr * - postfix-expr * !postfix-expr * sizeof ( type ) */ struct type* type_name(void); void unary_expr_sizeof(void) { global_token = global_token->next; require(NULL != global_token, "Received EOF when starting sizeof\n"); require_match("ERROR in unary_expr\nMissing (\n", "("); struct type* a = type_name(); require_match("ERROR in unary_expr\nMissing )\n", ")"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("LOADUI R0 "); else if(X86 == Architecture) emit_out("mov_eax, %"); else if(AMD64 == Architecture) emit_out("mov_rax, %"); else if(ARMV7L == Architecture) emit_out("!0 R0 LOAD32 R15 MEMORY\n~0 JUMP_ALWAYS\n%"); else if(AARCH64 == Architecture) emit_out("LOAD_W0_AHEAD\nSKIP_32_DATA\n%"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 !"); emit_out(int2str(a->size, 10, TRUE)); if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out(" addi"); emit_out("\n"); } void postfix_expr_stub(void) { require(NULL != global_token, "Unexpected EOF, improperly terminated primary expression\n"); if(match("[", global_token->s)) { postfix_expr_array(); postfix_expr_stub(); } if(match("->", global_token->s)) { postfix_expr_arrow(); postfix_expr_stub(); } if(match(".", global_token->s)) { postfix_expr_dot(); postfix_expr_stub(); } } void postfix_expr(void) { primary_expr(); postfix_expr_stub(); } /* * additive-expr: * postfix-expr * additive-expr * postfix-expr * additive-expr / postfix-expr * additive-expr % postfix-expr * additive-expr + postfix-expr * additive-expr - postfix-expr * additive-expr << postfix-expr * additive-expr >> postfix-expr */ void additive_expr_stub_a(void) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { arithmetic_recursion(postfix_expr, "MUL R0 R1 R0\n", "MULU R0 R1 R0\n", "*", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "DIV R0 R1 R0\n", "DIVU R0 R1 R0\n", "/", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "MOD R0 R1 R0\n", "MODU R0 R1 R0\n", "%", additive_expr_stub_a); } else if(X86 == Architecture) { arithmetic_recursion(postfix_expr, "imul_ebx\n", "mul_ebx\n", "*", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "xchg_ebx,eax\ncdq\nidiv_ebx\n", "xchg_ebx,eax\nmov_edx, %0\ndiv_ebx\n", "/", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "xchg_ebx,eax\ncdq\nidiv_ebx\nmov_eax,edx\n", "xchg_ebx,eax\nmov_edx, %0\ndiv_ebx\nmov_eax,edx\n", "%", additive_expr_stub_a); } else if(AMD64 == Architecture) { arithmetic_recursion(postfix_expr, "imul_rbx\n", "mul_rbx\n", "*", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "xchg_rbx,rax\ncqo\nidiv_rbx\n", "xchg_rbx,rax\nmov_rdx, %0\ndiv_rbx\n", "/", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "xchg_rbx,rax\ncqo\nidiv_rbx\nmov_rax,rdx\n", "xchg_rbx,rax\nmov_rdx, %0\ndiv_rbx\nmov_rax,rdx\n", "%", additive_expr_stub_a); } else if(ARMV7L == Architecture) { arithmetic_recursion(postfix_expr, "'9' R0 '0' R1 MULS R0 ARITH2_ALWAYS\n", "'9' R0 '0' R1 MUL R0 ARITH2_ALWAYS\n", "*", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "{LR} PUSH_ALWAYS\n^~divides CALL_ALWAYS\n{LR} POP_ALWAYS\n", "{LR} PUSH_ALWAYS\n^~divide CALL_ALWAYS\n{LR} POP_ALWAYS\n", "/", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "{LR} PUSH_ALWAYS\n^~moduluss CALL_ALWAYS\n{LR} POP_ALWAYS\n", "{LR} PUSH_ALWAYS\n^~modulus CALL_ALWAYS\n{LR} POP_ALWAYS\n", "%", additive_expr_stub_a); } else if(AARCH64 == Architecture) { general_recursion(postfix_expr, "MUL_X0_X1_X0\n", "*", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "SDIV_X0_X1_X0\n", "UDIV_X0_X1_X0\n", "/", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "SDIV_X2_X1_X0\nMSUB_X0_X0_X2_X1\n", "UDIV_X2_X1_X0\nMSUB_X0_X0_X2_X1\n", "%", additive_expr_stub_a); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { general_recursion(postfix_expr, "rd_a0 rs1_a1 rs2_a0 mul\n", "*", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "rd_a0 rs1_a1 rs2_a0 div\n", "rd_a0 rs1_a1 rs2_a0 divu\n", "/", additive_expr_stub_a); arithmetic_recursion(postfix_expr, "rd_a0 rs1_a1 rs2_a0 rem\n", "rd_a0 rs1_a1 rs2_a0 remu\n", "%", additive_expr_stub_a); } } void additive_expr_a(void) { postfix_expr(); additive_expr_stub_a(); } void additive_expr_stub_b(void) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { arithmetic_recursion(additive_expr_a, "ADD R0 R1 R0\n", "ADDU R0 R1 R0\n", "+", additive_expr_stub_b); arithmetic_recursion(additive_expr_a, "SUB R0 R1 R0\n", "SUBU R0 R1 R0\n", "-", additive_expr_stub_b); } else if(X86 == Architecture) { arithmetic_recursion(additive_expr_a, "add_eax,ebx\n", "add_eax,ebx\n", "+", additive_expr_stub_b); arithmetic_recursion(additive_expr_a, "sub_ebx,eax\nmov_eax,ebx\n", "sub_ebx,eax\nmov_eax,ebx\n", "-", additive_expr_stub_b); } else if(AMD64 == Architecture) { arithmetic_recursion(additive_expr_a, "add_rax,rbx\n", "add_rax,rbx\n", "+", additive_expr_stub_b); arithmetic_recursion(additive_expr_a, "sub_rbx,rax\nmov_rax,rbx\n", "sub_rbx,rax\nmov_rax,rbx\n", "-", additive_expr_stub_b); } else if(ARMV7L == Architecture) { arithmetic_recursion(additive_expr_a, "'0' R0 R0 ADD R1 ARITH2_ALWAYS\n", "'0' R0 R0 ADD R1 ARITH2_ALWAYS\n", "+", additive_expr_stub_b); arithmetic_recursion(additive_expr_a, "'0' R0 R0 SUB R1 ARITH2_ALWAYS\n", "'0' R0 R0 SUB R1 ARITH2_ALWAYS\n", "-", additive_expr_stub_b); } else if(AARCH64 == Architecture) { general_recursion(additive_expr_a, "ADD_X0_X1_X0\n", "+", additive_expr_stub_b); general_recursion(additive_expr_a, "SUB_X0_X1_X0\n", "-", additive_expr_stub_b); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { general_recursion(additive_expr_a, "rd_a0 rs1_a1 rs2_a0 add\n", "+", additive_expr_stub_b); general_recursion(additive_expr_a, "rd_a0 rs1_a1 rs2_a0 sub\n", "-", additive_expr_stub_b); } } void additive_expr_b(void) { additive_expr_a(); additive_expr_stub_b(); } void additive_expr_stub_c(void) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { arithmetic_recursion(additive_expr_b, "SAL R0 R1 R0\n", "SL0 R0 R1 R0\n", "<<", additive_expr_stub_c); arithmetic_recursion(additive_expr_b, "SAR R0 R1 R0\n", "SR0 R0 R1 R0\n", ">>", additive_expr_stub_c); } else if(X86 == Architecture) { arithmetic_recursion(additive_expr_b, "mov_ecx,eax\nmov_eax,ebx\nsal_eax,cl\n", "mov_ecx,eax\nmov_eax,ebx\nshl_eax,cl\n", "<<", additive_expr_stub_c); arithmetic_recursion(additive_expr_b, "mov_ecx,eax\nmov_eax,ebx\nsar_eax,cl\n", "mov_ecx,eax\nmov_eax,ebx\nshr_eax,cl\n", ">>", additive_expr_stub_c); } else if(AMD64 == Architecture) { arithmetic_recursion(additive_expr_b, "mov_rcx,rax\nmov_rax,rbx\nsal_rax,cl\n", "mov_rcx,rax\nmov_rax,rbx\nshl_rax,cl\n", "<<", additive_expr_stub_c); arithmetic_recursion(additive_expr_b, "mov_rcx,rax\nmov_rax,rbx\nsar_rax,cl\n", "mov_rcx,rax\nmov_rax,rbx\nshr_rax,cl\n", ">>", additive_expr_stub_c); } else if(ARMV7L == Architecture) { arithmetic_recursion(additive_expr_b, "LEFT R1 R0 R0 SHIFT AUX_ALWAYS\n", "LEFT R1 R0 R0 SHIFT AUX_ALWAYS\n", "<<", additive_expr_stub_c); arithmetic_recursion(additive_expr_b, "ARITH_RIGHT R1 R0 R0 SHIFT AUX_ALWAYS\n", "RIGHT R1 R0 R0 SHIFT AUX_ALWAYS\n", ">>", additive_expr_stub_c); } else if(AARCH64 == Architecture) { general_recursion(additive_expr_b, "LSHIFT_X0_X1_X0\n", "<<", additive_expr_stub_c); arithmetic_recursion(additive_expr_b, "ARITH_RSHIFT_X0_X1_X0\n", "LOGICAL_RSHIFT_X0_X1_X0\n", ">>", additive_expr_stub_c); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { general_recursion(additive_expr_b, "rd_a0 rs1_a1 rs2_a0 sll\n", "<<", additive_expr_stub_c); arithmetic_recursion(additive_expr_b, "rd_a0 rs1_a1 rs2_a0 sra\n", "rd_a0 rs1_a1 rs2_a0 srl\n", ">>", additive_expr_stub_c); } } void additive_expr_c(void) { additive_expr_b(); additive_expr_stub_c(); } /* * relational-expr: * additive_expr * relational-expr < additive_expr * relational-expr <= additive_expr * relational-expr >= additive_expr * relational-expr > additive_expr */ void relational_expr_stub(void) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { arithmetic_recursion(additive_expr_c, "CMP R0 R1 R0\nSET.L R0 R0 1\n", "CMPU R0 R1 R0\nSET.L R0 R0 1\n", "<", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP R0 R1 R0\nSET.LE R0 R0 1\n", "CMPU R0 R1 R0\nSET.LE R0 R0 1\n", "<=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP R0 R1 R0\nSET.GE R0 R0 1\n", "CMPU R0 R1 R0\nSET.GE R0 R0 1\n", ">=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP R0 R1 R0\nSET.G R0 R0 1\n", "CMPU R0 R1 R0\nSET.G R0 R0 1\n", ">", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP R0 R1 R0\nSET.E R0 R0 1\n", "CMPU R0 R1 R0\nSET.E R0 R0 1\n", "==", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP R0 R1 R0\nSET.NE R0 R0 1\n", "CMPU R0 R1 R0\nSET.NE R0 R0 1\n", "!=", relational_expr_stub); } else if(X86 == Architecture) { arithmetic_recursion(additive_expr_c, "cmp\nsetl_al\nmovzx_eax,al\n", "cmp\nsetb_al\nmovzx_eax,al\n", "<", relational_expr_stub); arithmetic_recursion(additive_expr_c, "cmp\nsetle_al\nmovzx_eax,al\n", "cmp\nsetbe_al\nmovzx_eax,al\n", "<=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "cmp\nsetge_al\nmovzx_eax,al\n", "cmp\nsetae_al\nmovzx_eax,al\n", ">=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "cmp\nsetg_al\nmovzx_eax,al\n", "cmp\nseta_al\nmovzx_eax,al\n", ">", relational_expr_stub); general_recursion(additive_expr_c, "cmp\nsete_al\nmovzx_eax,al\n", "==", relational_expr_stub); general_recursion(additive_expr_c, "cmp\nsetne_al\nmovzx_eax,al\n", "!=", relational_expr_stub); } else if(AMD64 == Architecture) { arithmetic_recursion(additive_expr_c, "cmp_rbx,rax\nsetl_al\nmovzx_rax,al\n", "cmp_rbx,rax\nsetb_al\nmovzx_rax,al\n", "<", relational_expr_stub); arithmetic_recursion(additive_expr_c, "cmp_rbx,rax\nsetle_al\nmovzx_rax,al\n", "cmp_rbx,rax\nsetbe_al\nmovzx_rax,al\n", "<=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "cmp_rbx,rax\nsetge_al\nmovzx_rax,al\n", "cmp_rbx,rax\nsetae_al\nmovzx_rax,al\n", ">=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "cmp_rbx,rax\nsetg_al\nmovzx_rax,al\n", "cmp_rbx,rax\nseta_al\nmovzx_rax,al\n", ">", relational_expr_stub); general_recursion(additive_expr_c, "cmp_rbx,rax\nsete_al\nmovzx_rax,al\n", "==", relational_expr_stub); general_recursion(additive_expr_c, "cmp_rbx,rax\nsetne_al\nmovzx_rax,al\n", "!=", relational_expr_stub); } else if(ARMV7L == Architecture) { arithmetic_recursion(additive_expr_c, "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_L\n", "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_LO\n", "<", relational_expr_stub); arithmetic_recursion(additive_expr_c, "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_LE\n", "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_LS\n", "<=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_GE\n", "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_HS\n", ">=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_G\n", "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_HI\n", ">", relational_expr_stub); general_recursion(additive_expr_c, "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_EQUAL\n", "==", relational_expr_stub); general_recursion(additive_expr_c, "'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_NE\n", "!=", relational_expr_stub); } else if(AARCH64 == Architecture) { arithmetic_recursion(additive_expr_c, "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_LT\nSET_X0_TO_0\n", "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_LO\nSET_X0_TO_0\n", "<", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_LE\nSET_X0_TO_0\n", "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_LS\nSET_X0_TO_0\n", "<=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_GE\nSET_X0_TO_0\n", "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_HS\nSET_X0_TO_0\n", ">=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_GT\nSET_X0_TO_0\n", "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_HI\nSET_X0_TO_0\n", ">", relational_expr_stub); general_recursion(additive_expr_c, "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_EQ\nSET_X0_TO_0\n", "==", relational_expr_stub); general_recursion(additive_expr_c, "CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_NE\nSET_X0_TO_0\n", "!=", relational_expr_stub); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { arithmetic_recursion(additive_expr_c, "rd_a0 rs1_a1 rs2_a0 slt\n", "rd_a0 rs1_a1 rs2_a0 sltu\n", "<", relational_expr_stub); arithmetic_recursion(additive_expr_c, "rd_a0 rs1_a0 rs2_a1 slt\nrd_a0 rs1_a0 !1 xori\n", "rd_a0 rs1_a0 rs2_a1 sltu\nrd_a0 rs1_a0 !1 xori\n", "<=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "rd_a0 rs1_a1 rs2_a0 slt\nrd_a0 rs1_a0 !1 xori\n", "rd_a0 rs1_a1 rs2_a0 sltu\nrd_a0 rs1_a0 !1 xori\n", ">=", relational_expr_stub); arithmetic_recursion(additive_expr_c, "rd_a0 rs1_a0 rs2_a1 slt\n", "rd_a0 rs1_a0 rs2_a1 sltu\n", ">", relational_expr_stub); general_recursion(additive_expr_c, "rd_a0 rs1_a0 rs2_a1 sub\nrd_a0 rs1_a0 !1 sltiu\n", "==", relational_expr_stub); general_recursion(additive_expr_c, "rd_a0 rs1_a0 rs2_a1 sub\nrd_a0 rs2_a0 sltu\n", "!=", relational_expr_stub); } } void relational_expr(void) { additive_expr_c(); relational_expr_stub(); } /* * bitwise-expr: * relational-expr * bitwise-expr & bitwise-expr * bitwise-expr && bitwise-expr * bitwise-expr | bitwise-expr * bitwise-expr || bitwise-expr * bitwise-expr ^ bitwise-expr */ void bitwise_expr_stub(void) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { general_recursion(relational_expr, "AND R0 R0 R1\n", "&", bitwise_expr_stub); general_recursion(relational_expr, "AND R0 R0 R1\n", "&&", bitwise_expr_stub); general_recursion(relational_expr, "OR R0 R0 R1\n", "|", bitwise_expr_stub); general_recursion(relational_expr, "OR R0 R0 R1\n", "||", bitwise_expr_stub); general_recursion(relational_expr, "XOR R0 R0 R1\n", "^", bitwise_expr_stub); } else if(X86 == Architecture) { general_recursion(relational_expr, "and_eax,ebx\n", "&", bitwise_expr_stub); general_recursion(relational_expr, "and_eax,ebx\n", "&&", bitwise_expr_stub); general_recursion(relational_expr, "or_eax,ebx\n", "|", bitwise_expr_stub); general_recursion(relational_expr, "or_eax,ebx\n", "||", bitwise_expr_stub); general_recursion(relational_expr, "xor_eax,ebx\n", "^", bitwise_expr_stub); } else if(AMD64 == Architecture) { general_recursion(relational_expr, "and_rax,rbx\n", "&", bitwise_expr_stub); general_recursion(relational_expr, "and_rax,rbx\n", "&&", bitwise_expr_stub); general_recursion(relational_expr, "or_rax,rbx\n", "|", bitwise_expr_stub); general_recursion(relational_expr, "or_rax,rbx\n", "||", bitwise_expr_stub); general_recursion(relational_expr, "xor_rax,rbx\n", "^", bitwise_expr_stub); } else if(ARMV7L == Architecture) { general_recursion(relational_expr, "NO_SHIFT R0 R0 AND R1 ARITH2_ALWAYS\n", "&", bitwise_expr_stub); general_recursion(relational_expr, "NO_SHIFT R0 R0 AND R1 ARITH2_ALWAYS\n", "&&", bitwise_expr_stub); general_recursion(relational_expr, "NO_SHIFT R0 R0 OR R1 AUX_ALWAYS\n", "|", bitwise_expr_stub); general_recursion(relational_expr, "NO_SHIFT R0 R0 OR R1 AUX_ALWAYS\n", "||", bitwise_expr_stub); general_recursion(relational_expr, "'0' R0 R0 XOR R1 ARITH2_ALWAYS\n", "^", bitwise_expr_stub); } else if(AARCH64 == Architecture) { general_recursion(relational_expr, "AND_X0_X1_X0\n", "&", bitwise_expr_stub); general_recursion(relational_expr, "AND_X0_X1_X0\n", "&&", bitwise_expr_stub); general_recursion(relational_expr, "OR_X0_X1_X0\n", "|", bitwise_expr_stub); general_recursion(relational_expr, "OR_X0_X1_X0\n", "||", bitwise_expr_stub); general_recursion(relational_expr, "XOR_X0_X1_X0\n", "^", bitwise_expr_stub); } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { general_recursion(relational_expr, "rd_a0 rs1_a1 rs2_a0 and\n", "&", bitwise_expr_stub); general_recursion(relational_expr, "rd_a0 rs1_a1 rs2_a0 and\n", "&&", bitwise_expr_stub); general_recursion(relational_expr, "rd_a0 rs1_a1 rs2_a0 or\n", "|", bitwise_expr_stub); general_recursion(relational_expr, "rd_a0 rs1_a1 rs2_a0 or\n", "||", bitwise_expr_stub); general_recursion(relational_expr, "rd_a0 rs1_a1 rs2_a0 xor\n", "^", bitwise_expr_stub); } } void bitwise_expr(void) { relational_expr(); bitwise_expr_stub(); } /* * expression: * bitwise-or-expr * bitwise-or-expr = expression */ void primary_expr(void) { require(NULL != global_token, "Received EOF where primary expression expected\n"); if(match("&", global_token->s)) { Address_of = TRUE; global_token = global_token->next; require(NULL != global_token, "Received EOF after & where primary expression expected\n"); } else { Address_of = FALSE; } if(match("sizeof", global_token->s)) unary_expr_sizeof(); else if('-' == global_token->s[0]) { if(X86 == Architecture) emit_out("mov_eax, %0\n"); else if(AMD64 == Architecture) emit_out("mov_rax, %0\n"); else if(ARMV7L == Architecture) emit_out("!0 R0 LOADI8_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("SET_X0_TO_0\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 mv\n"); common_recursion(primary_expr); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("NEG R0 R0\n"); else if(X86 == Architecture) emit_out("sub_ebx,eax\nmov_eax,ebx\n"); else if(AMD64 == Architecture) emit_out("sub_rbx,rax\nmov_rax,rbx\n"); else if(ARMV7L == Architecture) emit_out("'0' R0 R0 SUB R1 ARITH2_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("SUB_X0_X1_X0\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 rs1_a1 rs2_a0 sub\n"); } else if('!' == global_token->s[0]) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("LOADI R0 1\n"); else if(X86 == Architecture) emit_out("mov_eax, %1\n"); else if(AMD64 == Architecture) emit_out("mov_rax, %1\n"); else if(ARMV7L == Architecture) emit_out("!1 R0 LOADI8_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("SET_X0_TO_1\n"); common_recursion(postfix_expr); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("CMPU R0 R1 R0\nSET.G R0 R0 1\n"); else if(X86 == Architecture) emit_out("cmp\nseta_al\nmovzx_eax,al\n"); else if(AMD64 == Architecture) emit_out("cmp_rbx,rax\nseta_al\nmovzx_rax,al\n"); else if(ARMV7L == Architecture) emit_out("'0' R0 CMP R1 AUX_ALWAYS\n!0 R0 LOADI8_ALWAYS\n!1 R0 LOADI8_HI\n"); else if(AARCH64 == Architecture) emit_out("CMP_X1_X0\nSET_X0_TO_1\nSKIP_INST_HI\nSET_X0_TO_0\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 rs1_a0 !1 sltiu\n"); } else if('~' == global_token->s[0]) { common_recursion(postfix_expr); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("NOT R0 R0\n"); else if(X86 == Architecture) emit_out("not_eax\n"); else if(AMD64 == Architecture) emit_out("not_rax\n"); else if(ARMV7L == Architecture) emit_out("'0' R0 R0 MVN_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("MVN_X0\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 rs1_a0 not\n"); } else if(global_token->s[0] == '(') { global_token = global_token->next; expression(); require_match("Error in Primary expression\nDidn't get )\n", ")"); } else if(global_token->s[0] == '\'') primary_expr_char(); else if(global_token->s[0] == '"') primary_expr_string(); else if(in_set(global_token->s[0], "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")) primary_expr_variable(); else if(global_token->s[0] == '*') primary_expr_variable(); else if(in_set(global_token->s[0], "0123456789")) { primary_expr_number(global_token->s); global_token = global_token->next; } else primary_expr_failure(); } char* compound_operation(char* operator, int is_signed) { char* operation = ""; if(match("+=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { if(is_signed) operation = "ADD R0 R1 R0\n"; else operation = "ADDU R0 R1 R0\n"; } else if(X86 == Architecture) operation = "add_eax,ebx\n"; else if(AMD64 == Architecture) operation = "add_rax,rbx\n"; else if(ARMV7L == Architecture) operation = "'0' R0 R0 ADD R1 ARITH2_ALWAYS\n"; else if(AARCH64 == Architecture) operation = "ADD_X0_X1_X0\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) operation = "rd_a0 rs1_a1 rs2_a0 add\n"; } else if(match("-=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { if(is_signed) operation = "SUB R0 R1 R0\n"; else operation = "SUBU R0 R1 R0\n"; } else if(X86 == Architecture) operation = "sub_ebx,eax\nmov_eax,ebx\n"; else if(AMD64 == Architecture) operation = "sub_rbx,rax\nmov_rax,rbx\n"; else if(ARMV7L == Architecture) operation = "'0' R0 R0 SUB R1 ARITH2_ALWAYS\n"; else if(AARCH64 == Architecture) operation = "SUB_X0_X1_X0\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) operation = "rd_a0 rs1_a1 rs2_a0 sub\n"; } else if(match("*=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { if(is_signed) operation = "MUL R0 R1 R0\n"; else operation = "MULU R0 R1 R0\n"; } else if(X86 == Architecture) { if(is_signed) operation = "imul_ebx\n"; else operation = "mul_ebx\n"; } else if(AMD64 == Architecture) { if(is_signed) operation = "imul_rbx\n"; else operation = "mul_rbx\n"; } else if(ARMV7L == Architecture) operation = "'9' R0 '0' R1 MULS R0 ARITH2_ALWAYS\n"; else if(AARCH64 == Architecture) operation = "MUL_X0_X1_X0\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) operation = "rd_a0 rs1_a1 rs2_a0 mul\n"; } else if(match("/=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { if(is_signed) operation = "DIV R0 R1 R0\n"; else operation = "DIVU R0 R1 R0\n"; } else if(X86 == Architecture) { if (is_signed) operation = "xchg_ebx,eax\ncdq\nidiv_ebx\n"; else operation = "xchg_ebx,eax\nmov_edx, %0\ndiv_ebx\n"; } else if(AMD64 == Architecture) { if(is_signed) operation = "xchg_rbx,rax\ncqo\nidiv_rbx\n"; else operation = "xchg_rbx,rax\nmov_rdx, %0\ndiv_rbx\n"; } else if(ARMV7L == Architecture) { if(is_signed) operation = "{LR} PUSH_ALWAYS\n^~divides CALL_ALWAYS\n{LR} POP_ALWAYS\n"; else operation = "{LR} PUSH_ALWAYS\n^~divide CALL_ALWAYS\n{LR} POP_ALWAYS\n"; } else if(AARCH64 == Architecture) { if(is_signed) operation = "SDIV_X0_X1_X0\n"; else operation = "UDIV_X0_X1_X0\n"; } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { if(is_signed) operation = "rd_a0 rs1_a1 rs2_a0 div\n"; else operation = "rd_a0 rs1_a1 rs2_a0 divu\n"; } } else if(match("%=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { if(is_signed) operation = "MOD R0 R1 R0\n"; else operation = "MODU R0 R1 R0\n"; } else if(X86 == Architecture) { if(is_signed) operation = "xchg_ebx,eax\ncdq\nidiv_ebx\nmov_eax,edx\n"; else operation = "xchg_ebx,eax\nmov_edx, %0\ndiv_ebx\nmov_eax,edx\n"; } else if(AMD64 == Architecture) { if(is_signed) operation = "xchg_rbx,rax\ncqo\nidiv_rbx\nmov_rax,rdx\n"; else operation = "xchg_rbx,rax\nmov_rdx, %0\ndiv_rbx\nmov_rax,rdx\n"; } else if(ARMV7L == Architecture) { if(is_signed) operation = "{LR} PUSH_ALWAYS\n^~moduluss CALL_ALWAYS\n{LR} POP_ALWAYS\n"; else operation = "{LR} PUSH_ALWAYS\n^~modulus CALL_ALWAYS\n{LR} POP_ALWAYS\n"; } else if(AARCH64 == Architecture) { if(is_signed) operation = "SDIV_X2_X1_X0\nMSUB_X0_X0_X2_X1\n"; else operation = "UDIV_X2_X1_X0\nMSUB_X0_X0_X2_X1\n"; } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { if(is_signed) operation = "rd_a0 rs1_a1 rs2_a0 rem\n"; else operation = "rd_a0 rs1_a1 rs2_a0 remu\n"; } } else if(match("<<=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { if(is_signed) operation = "SAL R0 R1 R0\n"; else operation = "SL0 R0 R1 R0\n"; } else if(X86 == Architecture) { if(is_signed) operation = "mov_ecx,eax\nmov_eax,ebx\nsal_eax,cl\n"; else operation = "mov_ecx,eax\nmov_eax,ebx\nshl_eax,cl\n"; } else if(AMD64 == Architecture) { if(is_signed) operation = "mov_rcx,rax\nmov_rax,rbx\nsal_rax,cl\n"; else operation = "mov_rcx,rax\nmov_rax,rbx\nshl_rax,cl\n"; } else if(ARMV7L == Architecture) operation = "LEFT R1 R0 R0 SHIFT AUX_ALWAYS\n"; else if(AARCH64 == Architecture) operation = "LSHIFT_X0_X1_X0\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) operation = "rd_a0 rs1_a1 rs2_a0 sll\n"; } else if(match(">>=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) { if(is_signed) operation = "SAR R0 R1 R0\n"; else operation = "SR0 R0 R1 R0\n"; } else if(X86 == Architecture) { if(is_signed) operation = "mov_ecx,eax\nmov_eax,ebx\nsar_eax,cl\n"; else operation = "mov_ecx,eax\nmov_eax,ebx\nshr_eax,cl\n"; } else if(AMD64 == Architecture) { if(is_signed) operation = "mov_rcx,rax\nmov_rax,rbx\nsar_rax,cl\n"; else operation = "mov_rcx,rax\nmov_rax,rbx\nshr_rax,cl\n"; } else if(ARMV7L == Architecture) { if(is_signed) operation = "ARITH_RIGHT R1 R0 R0 SHIFT AUX_ALWAYS\n"; else operation = "RIGHT R1 R0 R0 SHIFT AUX_ALWAYS\n"; } else if(AARCH64 == Architecture) { if(is_signed) operation = "ARITH_RSHIFT_X0_X1_X0\n"; else operation = "LOGICAL_RSHIFT_X0_X1_X0\n"; } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { if(is_signed) operation = "rd_a0 rs1_a1 rs2_a0 sra\n"; else operation = "rd_a0 rs1_a1 rs2_a0 srl\n"; } } else if(match("&=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) operation = "AND R0 R0 R1\n"; else if(X86 == Architecture) operation = "and_eax,ebx\n"; else if(AMD64 == Architecture) operation = "and_rax,rbx\n"; else if(ARMV7L == Architecture) operation = "NO_SHIFT R0 R0 AND R1 ARITH2_ALWAYS\n"; else if(AARCH64 == Architecture) operation = "AND_X0_X1_X0\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) operation = "rd_a0 rs1_a1 rs2_a0 and\n"; } else if(match("^=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) operation = "XOR R0 R0 R1\n"; else if(X86 == Architecture) operation = "xor_eax,ebx\n"; else if(AMD64 == Architecture) operation = "xor_rax,rbx\n"; else if(ARMV7L == Architecture) operation = "'0' R0 R0 XOR R1 ARITH2_ALWAYS\n"; else if(AARCH64 == Architecture) operation = "XOR_X0_X1_X0\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) operation = "rd_a0 rs1_a1 rs2_a0 xor\n"; } else if(match("|=", operator)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) operation = "OR R0 R0 R1\n"; else if(X86 == Architecture) operation = "or_eax,ebx\n"; else if(AMD64 == Architecture) operation = "or_rax,rbx\n"; else if(ARMV7L == Architecture) operation = "NO_SHIFT R0 R0 OR R1 AUX_ALWAYS\n"; else if(AARCH64 == Architecture) operation = "OR_X0_X1_X0\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) operation = "rd_a0 rs1_a1 rs2_a0 or\n"; } else { fputs("Found illegal compound assignment operator: ", stderr); fputs(operator, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } return operation; } void expression(void) { bitwise_expr(); if(match("=", global_token->s)) { char* store = ""; if(match("]", global_token->prev->s)) { store = store_value(current_target->type->size); } else { store = store_value(current_target->size); } common_recursion(expression); emit_out(store); current_target = integer; } else if(is_compound_assignment(global_token->s)) { maybe_bootstrap_error("compound operator"); char* push = ""; char* load = ""; char* operation = ""; char* pop = ""; char* store = ""; struct type* last_type = current_target; if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) push = "PUSHR R1 R15\n"; else if(X86 == Architecture) push = "push_ebx\n"; else if(AMD64 == Architecture) push = "push_rbx\n"; else if(ARMV7L == Architecture) push = "{R1} PUSH_ALWAYS\n"; else if(AARCH64 == Architecture) push = "PUSH_X1\n"; else if(RISCV32 == Architecture) push = "rs1_sp rs2_a1 @-4 sw\n"; else if(RISCV64 == Architecture) push = "rs1_sp rs2_a1 @-8 sd\n"; if(!match("]", global_token->prev->s) || !match("char*", current_target->name)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) load = "LOAD R1 R1 0\n"; else if(X86 == Architecture) load = "mov_ebx,[ebx]\n"; else if(AMD64 == Architecture) load = "mov_rbx,[rbx]\n"; else if(ARMV7L == Architecture) load = "!0 R1 LOAD32 R1 MEMORY\n"; else if(AARCH64 == Architecture) load = "DEREF_X1\n"; else if(RISCV32 == Architecture) load = "rd_a1 rs1_a1 lw\n"; else if(RISCV64 == Architecture) load = "rd_a1 rs1_a1 ld\n"; } else { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) load = "LOAD8 R1 R1 0\n"; else if(X86 == Architecture) load = "movsx_ebx,BYTE_PTR_[ebx]\n"; else if(AMD64 == Architecture) load = "movsx_rbx,BYTE_PTR_[rbx]\n"; else if(ARMV7L == Architecture) load = "LOADU8 R1 LOAD R1 MEMORY\n"; else if(AARCH64 == Architecture) load = "DEREF_X1_BYTE\n"; else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) load = "rd_a1 rs1_a1 lbu\n"; } char *operator = global_token->s; if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) pop = "POPR R1 R15\n"; else if(X86 == Architecture) pop = "pop_ebx\n"; else if(AMD64 == Architecture) pop = "pop_rbx\n"; else if(ARMV7L == Architecture) pop = "{R1} POP_ALWAYS\n"; else if(AARCH64 == Architecture) pop = "POP_X1\n"; else if(RISCV32 == Architecture) pop = "rd_a1 rs1_sp !-4 lw\n"; else if(RISCV64 == Architecture) pop = "rd_a1 rs1_sp !-8 ld\n"; if(match("]", global_token->prev->s)) { store = store_value(current_target->type->size); } else { store = store_value(current_target->size); } common_recursion(expression); current_target = promote_type(current_target, last_type); emit_out(push); emit_out(load); operation = compound_operation(operator, current_target->is_signed); emit_out(operation); emit_out(pop); emit_out(store); current_target = integer; } } int iskeywordp(char* s) { if(match("auto", s)) return TRUE; if(match("break", s)) return TRUE; if(match("case", s)) return TRUE; if(match("char", s)) return TRUE; if(match("const", s)) return TRUE; if(match("continue", s)) return TRUE; if(match("default", s)) return TRUE; if(match("do", s)) return TRUE; if(match("double", s)) return TRUE; if(match("else", s)) return TRUE; if(match("enum", s)) return TRUE; if(match("extern", s)) return TRUE; if(match("float", s)) return TRUE; if(match("for", s)) return TRUE; if(match("goto", s)) return TRUE; if(match("if", s)) return TRUE; if(match("int", s)) return TRUE; if(match("long", s)) return TRUE; if(match("register", s)) return TRUE; if(match("return", s)) return TRUE; if(match("short", s)) return TRUE; if(match("signed", s)) return TRUE; if(match("sizeof", s)) return TRUE; if(match("static", s)) return TRUE; if(match("struct", s)) return TRUE; if(match("switch", s)) return TRUE; if(match("typedef", s)) return TRUE; if(match("union", s)) return TRUE; if(match("unsigned", s)) return TRUE; if(match("void", s)) return TRUE; if(match("volatile", s)) return TRUE; if(match("while", s)) return TRUE; return FALSE; } /* Similar to integer division a / b but rounds up */ unsigned ceil_div(unsigned a, unsigned b) { return (a + b - 1) / b; } /* Process local variable */ void collect_local(void) { if(NULL != break_target_func) { fputs("Local variable initialized inside of loop in file: ", stderr); line_error(); fputs("\nMove the variable outside of the loop to resolve\n", stderr); fputs("Otherwise the binary will segfault while running\n", stderr); exit(EXIT_FAILURE); } struct type* type_size = type_name(); require(NULL != global_token, "Received EOF while collecting locals\n"); require(!in_set(global_token->s[0], "[{(<=>)}]|&!^%;:'\""), "forbidden character in local variable name\n"); require(!iskeywordp(global_token->s), "You are not allowed to use a keyword as a local variable name\n"); require(NULL != type_size, "Must have non-null type\n"); struct token_list* a = sym_declare(global_token->s, type_size, function->locals); if(match("main", function->s) && (NULL == function->locals)) { if(KNIGHT_NATIVE == Architecture) a->depth = register_size; else if(KNIGHT_POSIX == Architecture) a->depth = 20; else if(X86 == Architecture) a->depth = -20; else if(AMD64 == Architecture) a->depth = -40; else if(ARMV7L == Architecture) a->depth = 16; else if(AARCH64 == Architecture) a->depth = 32; /* argc, argv, envp and the local (8 bytes each) */ else if(RISCV32 == Architecture) a->depth = -16; else if(RISCV64 == Architecture) a->depth = -32; } else if((NULL == function->arguments) && (NULL == function->locals)) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) a->depth = register_size; else if(X86 == Architecture) a->depth = -8; else if(AMD64 == Architecture) a->depth = -16; else if(ARMV7L == Architecture) a->depth = 8; else if(AARCH64 == Architecture) a->depth = register_size; else if(RISCV32 == Architecture) a->depth = -4; else if(RISCV64 == Architecture) a->depth = -8; } else if(NULL == function->locals) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) a->depth = function->arguments->depth + 8; else if(X86 == Architecture) a->depth = function->arguments->depth - 8; else if(AMD64 == Architecture) a->depth = function->arguments->depth - 16; else if(ARMV7L == Architecture) a->depth = function->arguments->depth + 8; else if(AARCH64 == Architecture) a->depth = function->arguments->depth + register_size; else if(RISCV32 == Architecture) a->depth = function->arguments->depth - 4; else if(RISCV64 == Architecture) a->depth = function->arguments->depth - 8; } else { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) a->depth = function->locals->depth + register_size; else if(X86 == Architecture) a->depth = function->locals->depth - register_size; else if(AMD64 == Architecture) a->depth = function->locals->depth - register_size; else if(ARMV7L == Architecture) a->depth = function->locals->depth + register_size; else if(AARCH64 == Architecture) a->depth = function->locals->depth + register_size; else if(RISCV32 == Architecture) a->depth = function->locals->depth - register_size; else if(RISCV64 == Architecture) a->depth = function->locals->depth - register_size; } /* Adjust the depth of local structs. When stack grows downwards, we want them to start at the bottom of allocated space. */ unsigned struct_depth_adjustment = (ceil_div(a->type->size, register_size) - 1) * register_size; if(KNIGHT_POSIX == Architecture) a->depth = a->depth + struct_depth_adjustment; else if(KNIGHT_NATIVE == Architecture) a->depth = a->depth + struct_depth_adjustment; else if(X86 == Architecture) a->depth = a->depth - struct_depth_adjustment; else if(AMD64 == Architecture) a->depth = a->depth - struct_depth_adjustment; else if(ARMV7L == Architecture) a->depth = a->depth + struct_depth_adjustment; else if(AARCH64 == Architecture) a->depth = a->depth + struct_depth_adjustment; else if(RISCV32 == Architecture) a->depth = a->depth - struct_depth_adjustment; else if(RISCV64 == Architecture) a->depth = a->depth - struct_depth_adjustment; function->locals = a; emit_out("# Defining local "); emit_out(global_token->s); emit_out("\n"); global_token = global_token->next; require(NULL != global_token, "incomplete local missing name\n"); if(match("=", global_token->s)) { global_token = global_token->next; require(NULL != global_token, "incomplete local assignment\n"); expression(); } require_match("ERROR in collect_local\nMissing ;\n", ";"); unsigned i = (a->type->size + register_size - 1) / register_size; while(i != 0) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("PUSHR R0 R15\t#"); else if(X86 == Architecture) emit_out("push_eax\t#"); else if(AMD64 == Architecture) emit_out("push_rax\t#"); else if(ARMV7L == Architecture) emit_out("{R0} PUSH_ALWAYS\t#"); else if(AARCH64 == Architecture) emit_out("PUSH_X0\t#"); else if(RISCV32 == Architecture) emit_out("rd_sp rs1_sp !-4 addi\nrs1_sp rs2_a0 sw\t#"); else if(RISCV64 == Architecture) emit_out("rd_sp rs1_sp !-8 addi\nrs1_sp rs2_a0 sd\t#"); emit_out(a->s); emit_out("\n"); i = i - 1; } } void statement(void); /* Evaluate if statements */ void process_if(void) { char* number_string = int2str(current_count, 10, TRUE); current_count = current_count + 1; emit_out("# IF_"); uniqueID_out(function->s, number_string); global_token = global_token->next; require_match("ERROR in process_if\nMISSING (\n", "("); expression(); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP.Z R0 @ELSE_"); else if(X86 == Architecture) emit_out("test_eax,eax\nje %ELSE_"); else if(AMD64 == Architecture) emit_out("test_rax,rax\nje %ELSE_"); else if(ARMV7L == Architecture) emit_out("!0 CMPI8 R0 IMM_ALWAYS\n^~ELSE_"); else if(AARCH64 == Architecture) emit_out("CBNZ_X0_PAST_BR\nLOAD_W16_AHEAD\nSKIP_32_DATA\n&ELSE_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rs1_a0 @8 bnez\n$ELSE_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_EQUAL\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); require_match("ERROR in process_if\nMISSING )\n", ")"); statement(); require(NULL != global_token, "Reached EOF inside of function\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @_END_IF_"); else if(X86 == Architecture) emit_out("jmp %_END_IF_"); else if(AMD64 == Architecture) emit_out("jmp %_END_IF_"); else if(ARMV7L == Architecture) emit_out("^~_END_IF_"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&_END_IF_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$_END_IF_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); emit_out(":ELSE_"); uniqueID_out(function->s, number_string); if(match("else", global_token->s)) { global_token = global_token->next; require(NULL != global_token, "Received EOF where an else statement expected\n"); statement(); require(NULL != global_token, "Reached EOF inside of function\n"); } emit_out(":_END_IF_"); uniqueID_out(function->s, number_string); } void process_case(void) { process_case_iter: if(match("case", global_token->s)) return; if(match(":default", global_token->s)) return; if(match("break", global_token->s)) { statement(); } else { statement(); goto process_case_iter; } } void process_switch(void) { maybe_bootstrap_error("switch/case statements"); struct token_list* nested_locals = break_frame; char* nested_break_head = break_target_head; char* nested_break_func = break_target_func; char* nested_break_num = break_target_num; char* nested_continue_head = continue_target_head; char* number_string = int2str(current_count, 10, TRUE); current_count = current_count + 1; break_target_head = "_SWITCH_END_"; continue_target_head = NULL; /* don't allow continue in switch statements */ break_target_num = number_string; break_frame = function->locals; break_target_func = function->s; emit_out("# switch_"); uniqueID_out(function->s, number_string); /* get what we are casing on */ global_token = global_token->next; require_match("ERROR in process_switch\nMISSING (\n", "("); expression(); require_match("ERROR in process_switch\nMISSING )\n", ")"); /* Put the value in R1 as it is currently in R0 */ if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("MOVE R1 R0\n"); else if(X86 == Architecture) emit_out("mov_ebx,eax\n"); else if(AMD64 == Architecture) emit_out("push_rax\npop_rbx\n"); else if(ARMV7L == Architecture) emit_out("'0' R1 R0 NO_SHIFT MOVE_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("SET_X1_FROM_X0\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a1 rs1_a0 mv\n"); /* Jump to the switch table */ if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @_SWITCH_TABLE_"); else if(X86 == Architecture) emit_out("jmp %_SWITCH_TABLE_"); else if(AMD64 == Architecture) emit_out("jmp %_SWITCH_TABLE_"); else if(ARMV7L == Architecture) emit_out("^~_SWITCH_TABLE_"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&_SWITCH_TABLE_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$_SWITCH_TABLE_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); /* must be switch (exp) {$STATEMENTS}; form */ require_match("ERROR in process_switch\nMISSING {\n", "{"); struct case_list* backtrack = NULL; process_switch_iter: if(match("case", global_token->s)) { global_token = global_token->next; if(':' == global_token->s[0]) { struct case_list* c = calloc(1, sizeof(struct case_list)); c->next = backtrack; c->value = global_token->s + 1; backtrack = c; emit_out(":_SWITCH_CASE_"); emit_out(c->value); emit_out("_"); uniqueID_out(function->s, number_string); global_token = global_token->next; process_case(); } else line_error(); goto process_switch_iter; } else if(match(":default", global_token->s)) { /* because of how M2-Planet treats labels */ global_token = global_token->next; emit_out(":_SWITCH_DEFAULT_"); uniqueID_out(function->s, number_string); /* collect statements until } */ while(!match("}", global_token->s)) { statement(); } /* jump over the switch table */ if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @_SWITCH_END_"); else if(X86 == Architecture) emit_out("jmp %_SWITCH_END_"); else if(AMD64 == Architecture) emit_out("jmp %_SWITCH_END_"); else if(ARMV7L == Architecture) emit_out("^~_SWITCH_END_"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&_SWITCH_END_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$_SWITCH_END_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); } /* Switch statements must end with } */ require_match("ERROR in process_switch\nMISSING }\n", "}"); /* create the table */ emit_out(":_SWITCH_TABLE_"); uniqueID_out(function->s, number_string); struct case_list* hold; while(NULL != backtrack) { /* put case value in R0 as the switch (value) is in R1 */ primary_expr_number(backtrack->value); hold = backtrack->next; /* compare R0 and R1 and jump to case if equal */ if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("CMPU R0 R0 R1\nJUMP.E R0 @_SWITCH_CASE_"); else if(X86 == Architecture) emit_out("cmp\nje %_SWITCH_CASE_"); else if(AMD64 == Architecture) emit_out("cmp_rbx,rax\nje %_SWITCH_CASE_"); else if(ARMV7L == Architecture) emit_out("'0' R0 CMP R1 AUX_ALWAYS\n^~_SWITCH_CASE_"); else if(AARCH64 == Architecture) emit_out("CMP_X1_X0\nSKIP_32_DATA\n&_SWITCH_CASE_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rd_a0 rs1_a0 rs2_a1 sub\nrs1_a0 @8 bnez\n$_SWITCH_CASE_"); emit_out(backtrack->value); emit_out("_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_EQUAL\n"); else if(AARCH64 == Architecture) emit_out("\nSKIP_INST_NE\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); free(backtrack); backtrack = hold; } /* Default to :default */ if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @_SWITCH_DEFAULT_"); else if(X86 == Architecture) emit_out("jmp %_SWITCH_DEFAULT_"); else if(AMD64 == Architecture) emit_out("jmp %_SWITCH_DEFAULT_"); else if(ARMV7L == Architecture) emit_out("^~_SWITCH_DEFAULT_"); else if(AARCH64 == Architecture) emit_out("SKIP_32_DATA\n&_SWITCH_DEFAULT_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$_SWITCH_DEFAULT_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); /* put the exit of the switch */ emit_out(":_SWITCH_END_"); uniqueID_out(function->s, number_string); break_target_head = nested_break_head; break_target_func = nested_break_func; break_target_num = nested_break_num; continue_target_head = nested_continue_head; break_frame = nested_locals; } void process_for(void) { struct token_list* nested_locals = break_frame; char* nested_break_head = break_target_head; char* nested_break_func = break_target_func; char* nested_break_num = break_target_num; char* nested_continue_head = continue_target_head; char* number_string = int2str(current_count, 10, TRUE); current_count = current_count + 1; break_target_head = "FOR_END_"; continue_target_head = "FOR_ITER_"; break_target_num = number_string; break_frame = function->locals; break_target_func = function->s; emit_out("# FOR_initialization_"); uniqueID_out(function->s, number_string); global_token = global_token->next; require_match("ERROR in process_for\nMISSING (\n", "("); if(!match(";",global_token->s)) { expression(); } emit_out(":FOR_"); uniqueID_out(function->s, number_string); require_match("ERROR in process_for\nMISSING ;1\n", ";"); expression(); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP.Z R0 @FOR_END_"); else if(X86 == Architecture) emit_out("test_eax,eax\nje %FOR_END_"); else if(AMD64 == Architecture) emit_out("test_rax,rax\nje %FOR_END_"); else if(ARMV7L == Architecture) emit_out("!0 CMPI8 R0 IMM_ALWAYS\n^~FOR_END_"); else if(AARCH64 == Architecture) emit_out("CBNZ_X0_PAST_BR\nLOAD_W16_AHEAD\nSKIP_32_DATA\n&FOR_END_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rs1_a0 @8 bnez\n$FOR_END_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_EQUAL\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @FOR_THEN_"); else if(X86 == Architecture) emit_out("jmp %FOR_THEN_"); else if(AMD64 == Architecture) emit_out("jmp %FOR_THEN_"); else if(ARMV7L == Architecture) emit_out("^~FOR_THEN_"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&FOR_THEN_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$FOR_THEN_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); emit_out(":FOR_ITER_"); uniqueID_out(function->s, number_string); require_match("ERROR in process_for\nMISSING ;2\n", ";"); expression(); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @FOR_"); else if(X86 == Architecture) emit_out("jmp %FOR_"); else if(AMD64 == Architecture) emit_out("jmp %FOR_"); else if(ARMV7L == Architecture) emit_out("^~FOR_"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&FOR_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$FOR_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); emit_out(":FOR_THEN_"); uniqueID_out(function->s, number_string); require_match("ERROR in process_for\nMISSING )\n", ")"); statement(); require(NULL != global_token, "Reached EOF inside of function\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @FOR_ITER_"); else if(X86 == Architecture) emit_out("jmp %FOR_ITER_"); else if(AMD64 == Architecture) emit_out("jmp %FOR_ITER_"); else if(ARMV7L == Architecture) emit_out("^~FOR_ITER_"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&FOR_ITER_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$FOR_ITER_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); emit_out(":FOR_END_"); uniqueID_out(function->s, number_string); break_target_head = nested_break_head; break_target_func = nested_break_func; break_target_num = nested_break_num; continue_target_head = nested_continue_head; break_frame = nested_locals; } /* Process Assembly statements */ void process_asm(void) { global_token = global_token->next; require_match("ERROR in process_asm\nMISSING (\n", "("); while('"' == global_token->s[0]) { emit_out((global_token->s + 1)); emit_out("\n"); global_token = global_token->next; require(NULL != global_token, "Received EOF inside asm statement\n"); } require_match("ERROR in process_asm\nMISSING )\n", ")"); require_match("ERROR in process_asm\nMISSING ;\n", ";"); } /* Process do while loops */ void process_do(void) { struct token_list* nested_locals = break_frame; char* nested_break_head = break_target_head; char* nested_break_func = break_target_func; char* nested_break_num = break_target_num; char* nested_continue_head = continue_target_head; char* number_string = int2str(current_count, 10, TRUE); current_count = current_count + 1; break_target_head = "DO_END_"; continue_target_head = "DO_TEST_"; break_target_num = number_string; break_frame = function->locals; break_target_func = function->s; emit_out(":DO_"); uniqueID_out(function->s, number_string); global_token = global_token->next; require(NULL != global_token, "Received EOF where do statement is expected\n"); statement(); require(NULL != global_token, "Reached EOF inside of function\n"); emit_out(":DO_TEST_"); uniqueID_out(function->s, number_string); require_match("ERROR in process_do\nMISSING while\n", "while"); require_match("ERROR in process_do\nMISSING (\n", "("); expression(); require_match("ERROR in process_do\nMISSING )\n", ")"); require_match("ERROR in process_do\nMISSING ;\n", ";"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP.NZ R0 @DO_"); else if(X86 == Architecture) emit_out("test_eax,eax\njne %DO_"); else if(AMD64 == Architecture) emit_out("test_rax,rax\njne %DO_"); else if(ARMV7L == Architecture) emit_out("!0 CMPI8 R0 IMM_ALWAYS\n^~DO_"); else if(AARCH64 == Architecture) emit_out("CBZ_X0_PAST_BR\nLOAD_W16_AHEAD\nSKIP_32_DATA\n&DO_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rs1_a0 @DO_END_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_NE\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { emit_out("beqz\n$DO_"); uniqueID_out(function->s, number_string); emit_out("jal\n"); } emit_out(":DO_END_"); uniqueID_out(function->s, number_string); break_frame = nested_locals; break_target_head = nested_break_head; break_target_func = nested_break_func; break_target_num = nested_break_num; continue_target_head = nested_continue_head; } /* Process while loops */ void process_while(void) { struct token_list* nested_locals = break_frame; char* nested_break_head = break_target_head; char* nested_break_func = break_target_func; char* nested_break_num = break_target_num; char* nested_continue_head = continue_target_head; char* number_string = int2str(current_count, 10, TRUE); current_count = current_count + 1; break_target_head = "END_WHILE_"; continue_target_head = "WHILE_"; break_target_num = number_string; break_frame = function->locals; break_target_func = function->s; emit_out(":WHILE_"); uniqueID_out(function->s, number_string); global_token = global_token->next; require_match("ERROR in process_while\nMISSING (\n", "("); expression(); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP.Z R0 @END_WHILE_"); else if(X86 == Architecture) emit_out("test_eax,eax\nje %END_WHILE_"); else if(AMD64 == Architecture) emit_out("test_rax,rax\nje %END_WHILE_"); else if(ARMV7L == Architecture) emit_out("!0 CMPI8 R0 IMM_ALWAYS\n^~END_WHILE_"); else if(AARCH64 == Architecture) emit_out("CBNZ_X0_PAST_BR\nLOAD_W16_AHEAD\nSKIP_32_DATA\n&END_WHILE_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("rs1_a0 @8 bnez\n$END_WHILE_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_EQUAL\t"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); emit_out("# THEN_while_"); uniqueID_out(function->s, number_string); require_match("ERROR in process_while\nMISSING )\n", ")"); statement(); require(NULL != global_token, "Reached EOF inside of function\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @WHILE_"); else if(X86 == Architecture) emit_out("jmp %WHILE_"); else if(AMD64 == Architecture) emit_out("jmp %WHILE_"); else if(ARMV7L == Architecture) emit_out("^~WHILE_"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&WHILE_"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$WHILE_"); uniqueID_out(function->s, number_string); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS\n"); else if(AARCH64 == Architecture) emit_out("\nBR_X16\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("jal\n"); emit_out(":END_WHILE_"); uniqueID_out(function->s, number_string); break_target_head = nested_break_head; break_target_func = nested_break_func; break_target_num = nested_break_num; continue_target_head = nested_continue_head; break_frame = nested_locals; } /* Ensure that functions return */ void return_result(void) { global_token = global_token->next; require(NULL != global_token, "Incomplete return statement received\n"); if(global_token->s[0] != ';') expression(); require_match("ERROR in return_result\nMISSING ;\n", ";"); struct token_list* i; unsigned size_local_var; for(i = function->locals; NULL != i; i = i->next) { size_local_var = ceil_div(i->type->size, register_size); while(size_local_var != 0) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("POPR R1 R15\t# _return_result_locals\n"); else if(X86 == Architecture) emit_out("pop_ebx\t# _return_result_locals\n"); else if(AMD64 == Architecture) emit_out("pop_rbx\t# _return_result_locals\n"); else if(ARMV7L == Architecture) emit_out("{R1} POP_ALWAYS\t# _return_result_locals\n"); else if(AARCH64 == Architecture) emit_out("POP_X1\t# _return_result_locals\n"); else if(RISCV32 == Architecture) emit_out("rd_a1 rs1_sp lw # _return_result_locals\nrd_sp rs1_sp !4 addi\n"); else if(RISCV64 == Architecture) emit_out("rd_a1 rs1_sp ld # _return_result_locals\nrd_sp rs1_sp !8 addi\n"); size_local_var = size_local_var - 1; } } if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("RET R15\n"); else if(X86 == Architecture) emit_out("ret\n"); else if(AMD64 == Architecture) emit_out("ret\n"); else if(ARMV7L == Architecture) emit_out("'1' LR RETURN\n"); else if(AARCH64 == Architecture) emit_out("RETURN\n"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("ret\n"); } void process_break(void) { if(NULL == break_target_head) { line_error(); fputs("Not inside of a loop or case statement\n", stderr); exit(EXIT_FAILURE); } struct token_list* i = function->locals; while(i != break_frame) { if(NULL == i) break; if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("POPR R1 R15\t# break_cleanup_locals\n"); else if(X86 == Architecture) emit_out("pop_ebx\t# break_cleanup_locals\n"); else if(AMD64 == Architecture) emit_out("pop_rbx\t# break_cleanup_locals\n"); else if(ARMV7L == Architecture) emit_out("{R1} POP_ALWAYS\t# break_cleanup_locals\n"); else if(AARCH64 == Architecture) emit_out("POP_X1\t# break_cleanup_locals\n"); else if(RISCV32 == Architecture) emit_out("rd_a1 rs1_sp lw\t# break_cleanup_locals\nrd_sp rs1_sp !4 addi\n"); else if(RISCV64 == Architecture) emit_out("rd_a1 rs1_sp ld\t# break_cleanup_locals\nrd_sp rs1_sp !8 addi\n"); i = i->next; } global_token = global_token->next; if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @"); else if(X86 == Architecture) emit_out("jmp %"); else if(AMD64 == Architecture) emit_out("jmp %"); else if(ARMV7L == Architecture) emit_out("^~"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$"); emit_out(break_target_head); emit_out(break_target_func); emit_out("_"); emit_out(break_target_num); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS"); else if(AARCH64 == Architecture) emit_out("\nBR_X16"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out(" jal"); emit_out("\n"); require_match("ERROR in break statement\nMissing ;\n", ";"); } void process_continue(void) { if(NULL == continue_target_head) { line_error(); fputs("Not inside of a loop\n", stderr); exit(EXIT_FAILURE); } global_token = global_token->next; if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @"); else if(X86 == Architecture) emit_out("jmp %"); else if(AMD64 == Architecture) emit_out("jmp %"); else if(ARMV7L == Architecture) emit_out("^~"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$"); emit_out(continue_target_head); emit_out(break_target_func); emit_out("_"); emit_out(break_target_num); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS"); else if(AARCH64 == Architecture) emit_out("\nBR_X16"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out(" jal"); emit_out("\n"); require_match("ERROR in continue statement\nMissing ;\n", ";"); } void recursive_statement(void) { global_token = global_token->next; require(NULL != global_token, "Received EOF in recursive statement\n"); struct token_list* frame = function->locals; while(!match("}", global_token->s)) { statement(); require(NULL != global_token, "Received EOF in recursive statement prior to }\n"); } global_token = global_token->next; /* Clean up any locals added */ if(((X86 == Architecture) && !match("ret\n", output_list->s)) || ((AMD64 == Architecture) && !match("ret\n", output_list->s)) || (((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) && !match("RET R15\n", output_list->s)) || ((ARMV7L == Architecture) && !match("'1' LR RETURN\n", output_list->s)) || ((AARCH64 == Architecture) && !match("RETURN\n", output_list->s)) || (((RISCV32 == Architecture) || (RISCV64 == Architecture)) && !match("ret\n", output_list->s))) { struct token_list* i; for(i = function->locals; frame != i; i = i->next) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("POPR R1 R15\t# _recursive_statement_locals\n"); else if(X86 == Architecture) emit_out( "pop_ebx\t# _recursive_statement_locals\n"); else if(AMD64 == Architecture) emit_out("pop_rbx\t# _recursive_statement_locals\n"); else if(ARMV7L == Architecture) emit_out("{R1} POP_ALWAYS\t# _recursive_statement_locals\n"); else if(AARCH64 == Architecture) emit_out("POP_X1\t# _recursive_statement_locals\n"); else if(RISCV32 == Architecture) emit_out("rd_a1 rs1_sp lw\t# _recursive_statement_locals\nrd_sp rs1_sp !4 addi\n"); else if(RISCV64 == Architecture) emit_out("rd_a1 rs1_sp ld\t# _recursive_statement_locals\nrd_sp rs1_sp !8 addi\n"); } } function->locals = frame; } /* * statement: * { statement-list-opt } * type-name identifier ; * type-name identifier = expression; * if ( expression ) statement * if ( expression ) statement else statement * do statement while ( expression ) ; * while ( expression ) statement * for ( expression ; expression ; expression ) statement * asm ( "assembly" ... "assembly" ) ; * goto label ; * label: * return ; * break ; * expr ; */ struct type* lookup_type(char* s, struct type* start); void statement(void) { require(NULL != global_token, "expected a C statement but received EOF\n"); /* Always an integer until told otherwise */ current_target = integer; if(global_token->s[0] == '{') { recursive_statement(); } else if(':' == global_token->s[0]) { emit_out(global_token->s); emit_out("\t#C goto label\n"); global_token = global_token->next; } else if((NULL != lookup_type(global_token->s, prim_types)) || match("struct", global_token->s)) { collect_local(); } else if(match("if", global_token->s)) { process_if(); } else if(match("switch", global_token->s)) { process_switch(); } else if(match("do", global_token->s)) { process_do(); } else if(match("while", global_token->s)) { process_while(); } else if(match("for", global_token->s)) { process_for(); } else if(match("asm", global_token->s)) { process_asm(); } else if(match("goto", global_token->s)) { global_token = global_token->next; require(NULL != global_token, "naked goto is not supported\n"); if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) emit_out("JUMP @"); else if(X86 == Architecture) emit_out("jmp %"); else if(AMD64 == Architecture) emit_out("jmp %"); else if(ARMV7L == Architecture) emit_out("^~"); else if(AARCH64 == Architecture) emit_out("LOAD_W16_AHEAD\nSKIP_32_DATA\n&"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out("$"); emit_out(global_token->s); if(ARMV7L == Architecture) emit_out(" JUMP_ALWAYS"); else if(AARCH64 == Architecture) emit_out("\nBR_X16"); else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) emit_out(" jal"); emit_out("\n"); global_token = global_token->next; require_match("ERROR in statement\nMissing ;\n", ";"); } else if(match("return", global_token->s)) { return_result(); } else if(match("break", global_token->s)) { process_break(); } else if(match("continue", global_token->s)) { process_continue(); } else { expression(); require_match("ERROR in statement\nMISSING ;\n", ";"); } } /* Collect function arguments */ void collect_arguments(void) { global_token = global_token->next; require(NULL != global_token, "Received EOF when attempting to collect arguments\n"); struct type* type_size; struct token_list* a; while(!match(")", global_token->s)) { type_size = type_name(); require(NULL != global_token, "Received EOF when attempting to collect arguments\n"); require(NULL != type_size, "Must have non-null type\n"); if(global_token->s[0] == ')') { /* foo(int,char,void) doesn't need anything done */ continue; } else if(global_token->s[0] != ',') { /* deal with foo(int a, char b) */ require(!in_set(global_token->s[0], "[{(<=>)}]|&!^%;:'\""), "forbidden character in argument variable name\n"); require(!iskeywordp(global_token->s), "You are not allowed to use a keyword as a argument variable name\n"); a = sym_declare(global_token->s, type_size, function->arguments); if(NULL == function->arguments) { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) a->depth = 0; else if(X86 == Architecture) a->depth = -4; else if(AMD64 == Architecture) a->depth = -8; else if(ARMV7L == Architecture) a->depth = 4; else if(AARCH64 == Architecture) a->depth = register_size; else if(RISCV32 == Architecture) a->depth = -4; else if(RISCV64 == Architecture) a->depth = -8; } else { if((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) a->depth = function->arguments->depth + register_size; else if(X86 == Architecture) a->depth = function->arguments->depth - register_size; else if(AMD64 == Architecture) a->depth = function->arguments->depth - register_size; else if(ARMV7L == Architecture) a->depth = function->arguments->depth + register_size; else if(AARCH64 == Architecture) a->depth = function->arguments->depth + register_size; else if(RISCV32 == Architecture) a->depth = function->arguments->depth - register_size; else if(RISCV64 == Architecture) a->depth = function->arguments->depth - register_size; } global_token = global_token->next; require(NULL != global_token, "Incomplete argument list\n"); function->arguments = a; } /* ignore trailing comma (needed for foo(bar(), 1); expressions*/ if(global_token->s[0] == ',') { global_token = global_token->next; require(NULL != global_token, "naked comma in collect arguments\n"); } require(NULL != global_token, "Argument list never completed\n"); } global_token = global_token->next; } void declare_function(void) { current_count = 0; function = sym_declare(global_token->prev->s, NULL, global_function_list); /* allow previously defined functions to be looked up */ global_function_list = function; if((KNIGHT_NATIVE == Architecture) && match("main", function->s)) { require_match("Impossible error ( vanished\n", "("); require_match("Reality ERROR (USING KNIGHT-NATIVE)\nHardware does not support arguments\nthus neither can main on this architecture\ntry tape_01 and tape_02 instead\n", ")"); } else collect_arguments(); require(NULL != global_token, "Function definitions either need to be prototypes or full\n"); /* If just a prototype don't waste time */ if(global_token->s[0] == ';') global_token = global_token->next; else { emit_out("# Defining function "); emit_out(function->s); emit_out("\n"); emit_out(":FUNCTION_"); emit_out(function->s); emit_out("\n"); statement(); /* Prevent duplicate RETURNS */ if(((KNIGHT_POSIX == Architecture) || (KNIGHT_NATIVE == Architecture)) && !match("RET R15\n", output_list->s)) emit_out("RET R15\n"); else if((X86 == Architecture) && !match("ret\n", output_list->s)) emit_out("ret\n"); else if((AMD64 == Architecture) && !match("ret\n", output_list->s)) emit_out("ret\n"); else if((ARMV7L == Architecture) && !match("'1' LR RETURN\n", output_list->s)) emit_out("'1' LR RETURN\n"); else if((AARCH64 == Architecture) && !match("RETURN\n", output_list->s)) emit_out("RETURN\n"); else if((RISCV32 == Architecture) && !match("ret\n", output_list->s)) emit_out("ret\n"); else if((RISCV64 == Architecture) && !match("ret\n", output_list->s)) emit_out("ret\n"); } } void global_constant(void) { global_token = global_token->next; require(NULL != global_token, "CONSTANT lacks a name\n"); global_constant_list = sym_declare(global_token->s, NULL, global_constant_list); require(NULL != global_token->next, "CONSTANT lacks a value\n"); if(match("sizeof", global_token->next->s)) { global_token = global_token->next->next; require_match("ERROR in CONSTANT with sizeof\nMissing (\n", "("); struct type* a = type_name(); require_match("ERROR in CONSTANT with sizeof\nMissing )\n", ")"); global_token->prev->s = int2str(a->size, 10, TRUE); global_constant_list->arguments = global_token->prev; } else { global_constant_list->arguments = global_token->next; global_token = global_token->next->next; } } struct type* global_typedef(void) { struct type* type_size; /* typedef $TYPE $NAME; */ global_token = global_token->next; type_size = type_name(); require(NULL != global_token, "Received EOF while reading typedef\n"); type_size = mirror_type(type_size, global_token->s); add_primitive(type_size); global_token = global_token->next; require_match("ERROR in typedef statement\nMissing ;\n", ";"); return type_size; } void global_static_array(struct type* type_size, struct token_list* name) { int size; maybe_bootstrap_error("global array definitions"); globals_list = emit(":GLOBAL_", globals_list); globals_list = emit(name->s, globals_list); globals_list = emit("\n&GLOBAL_STORAGE_", globals_list); globals_list = emit(name->s, globals_list); if (AARCH64 == Architecture || AMD64 == Architecture || RISCV64 == Architecture) { globals_list = emit(" %0", globals_list); } globals_list = emit("\n:GLOBAL_STORAGE_", globals_list); globals_list = emit(name->s, globals_list); require(NULL != global_token->next, "Unterminated global\n"); global_token = global_token->next; /* Make sure not negative */ if(match("-", global_token->s)) { line_error(); fputs("Negative values are not supported for allocated arrays\n", stderr); exit(EXIT_FAILURE); } /* length */ size = strtoint(global_token->s) * type_size->size; /* Stop bad states */ if((size < 0) || (size > 0x100000)) { line_error(); fputs("M2-Planet is very inefficient so you probably don't want to allocate over 1MB into your binary for NULLs\n", stderr); exit(EXIT_FAILURE); } /* Ensure properly closed */ global_token = global_token->next; require_match("missing close bracket\n", "]"); require_match("missing ;\n", ";"); globals_list = emit("\n'", globals_list); while (0 != size) { globals_list = emit(" 00", globals_list); size = size - 1; } globals_list = emit("'\n", globals_list); } void global_assignment(void) { /* Store the global's value*/ globals_list = emit(":GLOBAL_", globals_list); globals_list = emit(global_token->prev->s, globals_list); globals_list = emit("\n", globals_list); global_token = global_token->next; require(NULL != global_token, "Global locals value in assignment\n"); unsigned padding_zeroes; if(in_set(global_token->s[0], "0123456789")) { /* Assume Int */ globals_list = emit("%", globals_list); globals_list = emit(global_token->s, globals_list); /* broken for big endian architectures */ padding_zeroes = (register_size / 4) - 1; while(padding_zeroes > 0) { /* Assume positive Int */ globals_list = emit(" %0", globals_list); padding_zeroes = padding_zeroes - 1; } globals_list = emit("\n", globals_list); } else if(('"' == global_token->s[0])) { /* Assume a string*/ globals_list = emit("&GLOBAL_", globals_list); globals_list = emit(global_token->prev->prev->s, globals_list); globals_list = emit("_contents\n", globals_list); globals_list = emit(":GLOBAL_", globals_list); globals_list = emit(global_token->prev->prev->s, globals_list); globals_list = emit("_contents\n", globals_list); globals_list = emit(parse_string(global_token->s), globals_list); } else { line_error(); fputs("Received ", stderr); fputs(global_token->s, stderr); fputs(" in program\n", stderr); exit(EXIT_FAILURE); } global_token = global_token->next; require_match("ERROR in Program\nMissing ;\n", ";"); } /* * program: * declaration * declaration program * * declaration: * CONSTANT identifer value * typedef identifer type; * type-name identifier ; * type-name identifier = value ; * type-name identifier [ value ]; * type-name identifier ( parameter-list ) ; * type-name identifier ( parameter-list ) statement * * parameter-list: * parameter-declaration * parameter-list, parameter-declaration * * parameter-declaration: * type-name identifier-opt */ void program(void) { unsigned i; function = NULL; Address_of = FALSE; struct type* type_size; new_type: /* Deal with garbage input */ if (NULL == global_token) return; require('#' != global_token->s[0], "unhandled macro directive\n"); require(!match("\n", global_token->s), "unexpected newline token\n"); /* Handle cc_* CONSTANT statements */ if(match("CONSTANT", global_token->s)) { global_constant(); goto new_type; } /* Handle c typedef statements */ if(match("typedef", global_token->s)) { type_size = global_typedef(); goto new_type; } type_size = type_name(); /* Deal with case of struct definitions */ if(NULL == type_size) goto new_type; require(NULL != global_token->next, "Unterminated global\n"); /* Add to global symbol table */ global_symbol_list = sym_declare(global_token->s, type_size, global_symbol_list); global_token = global_token->next; /* Deal with global variables */ if(match(";", global_token->s)) { /* Ensure enough bytes are allocated to store global variable. In some cases it allocates too much but that is harmless. */ globals_list = emit(":GLOBAL_", globals_list); globals_list = emit(global_token->prev->s, globals_list); /* round up division */ i = ceil_div(type_size->size, register_size); globals_list = emit("\n", globals_list); while(i != 0) { globals_list = emit("NULL\n", globals_list); i = i - 1; } global_token = global_token->next; goto new_type; } /* Deal with global functions */ if(match("(", global_token->s)) { declare_function(); goto new_type; } /* Deal with assignment to a global variable */ if(match("=", global_token->s)) { global_assignment(); goto new_type; } /* Deal with global static arrays */ if(match("[", global_token->s)) { global_static_array(type_size, global_token->prev); goto new_type; } /* Everything else is just an error */ line_error(); fputs("Received ", stderr); fputs(global_token->s, stderr); fputs(" in program\n", stderr); exit(EXIT_FAILURE); } void recursive_output(struct token_list* head, FILE* out) { struct token_list* i = reverse_list(head); while(NULL != i) { fputs(i->s, out); i = i->next; } } void output_tokens(struct token_list *i, FILE* out) { while(NULL != i) { fputs(i->s, out); fputs(" ", out); i = i->next; } }
/* Copyright (C) 2021 Sanne Wouda * Copyright (C) 2021 Andrius Å tikonas <andrius@stikonas.eu> * Copyright (C) 2022 Jan (janneke) Nieuwenhuizen <janneke@gnu.org> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" #include "gcc_req.h" void require(int bool, char* error); int strtoint(char* a); void line_error_token(struct token_list* list); struct token_list* eat_token(struct token_list* head); struct conditional_inclusion { struct conditional_inclusion* prev; int include; /* 1 == include, 0 == skip */ int previous_condition_matched; /* 1 == all subsequent conditions treated as FALSE */ }; struct macro_list { struct macro_list* next; char* symbol; struct token_list* expansion; }; struct macro_list* macro_env; struct conditional_inclusion* conditional_inclusion_top; /* point where we are currently modifying the global_token list */ struct token_list* macro_token; void init_macro_env(char* sym, char* value, char* source, int num) { struct macro_list* hold = macro_env; macro_env = calloc(1, sizeof(struct macro_list)); macro_env->symbol = sym; macro_env->next = hold; macro_env->expansion = calloc(1, sizeof(struct token_list)); macro_env->expansion->s = value; macro_env->expansion->filename = source; macro_env->expansion->linenumber = num; } void eat_current_token(void) { int update_global_token = FALSE; if (macro_token == global_token) update_global_token = TRUE; macro_token = eat_token(macro_token); if(update_global_token) global_token = macro_token; } void eat_newline_tokens(void) { macro_token = global_token; while(TRUE) { if(NULL == macro_token) return; if(match("\n", macro_token->s)) { eat_current_token(); } else { macro_token = macro_token->next; } } } /* returns the first token inserted; inserts *before* point */ struct token_list* insert_tokens(struct token_list* point, struct token_list* token) { struct token_list* copy; struct token_list* first = NULL; while (NULL != token) { copy = calloc(1, sizeof(struct token_list)); copy->s = token->s; copy->filename = token->filename; copy->linenumber = token->linenumber; if(NULL == first) { first = copy; } copy->next = point; if (NULL != point) { copy->prev = point->prev; if(NULL != point->prev) { point->prev->next = copy; } point->prev = copy; } token = token->next; } return first; } struct macro_list* lookup_macro(struct token_list* token) { if(NULL == token) { line_error_token(macro_token); fputs("null token received in lookup_macro\n", stderr); exit(EXIT_FAILURE); } struct macro_list* hold = macro_env; while (NULL != hold) { if (match(token->s, hold->symbol)) { /* found! */ return hold; } hold = hold->next; } /* not found! */ return NULL; } void remove_macro(struct token_list* token) { if(NULL == token) { line_error_token(macro_token); fputs("received a null in remove_macro\n", stderr); exit(EXIT_FAILURE); } struct macro_list* hold = macro_env; struct macro_list* temp; /* Deal with the first element */ if (match(token->s, hold->symbol)) { macro_env = hold->next; free(hold); return; } /* Remove element form the middle of linked list */ while (NULL != hold->next) { if (match(token->s, hold->next->symbol)) { temp = hold->next; hold->next = hold->next->next; free(temp); return; } hold = hold->next; } /* nothing to undefine */ return; } int macro_expression(void); int macro_variable(void) { int value = 0; struct macro_list* hold = lookup_macro(macro_token); if (NULL != hold) { if(NULL == hold->expansion) { line_error_token(macro_token); fputs("hold->expansion is a null\n", stderr); exit(EXIT_FAILURE); } value = strtoint(hold->expansion->s); } eat_current_token(); return value; } int macro_number(void) { int result = strtoint(macro_token->s); eat_current_token(); return result; } int macro_primary_expr(void) { int defined_has_paren = FALSE; int hold; require(NULL != macro_token, "got an EOF terminated macro primary expression\n"); if('-' == macro_token->s[0]) { eat_current_token(); return -macro_primary_expr(); } else if('!' == macro_token->s[0]) { eat_current_token(); return !macro_primary_expr(); } else if('(' == macro_token->s[0]) { eat_current_token(); hold = macro_expression(); require(')' == macro_token->s[0], "missing ) in macro expression\n"); eat_current_token(); return hold; } else if(match("defined", macro_token->s)) { eat_current_token(); require(NULL != macro_token, "got an EOF terminated macro defined expression\n"); if('(' == macro_token->s[0]) { defined_has_paren = TRUE; eat_current_token(); } if (NULL != lookup_macro(macro_token)) { hold = TRUE; } else { hold = FALSE; } eat_current_token(); if(TRUE == defined_has_paren) { if(NULL == macro_token) { line_error_token(macro_token); fputs("unterminated define ( statement\n", stderr); exit(EXIT_FAILURE); } require(')' == macro_token->s[0], "missing close parenthesis for defined()\n"); eat_current_token(); } return hold; } else if(in_set(macro_token->s[0], "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")) { return macro_variable(); } else if(in_set(macro_token->s[0], "0123456789")) { return macro_number(); } else { return 0; /* FIXME: error handling */ } } int macro_additive_expr(void) { int lhs = macro_primary_expr(); int hold; require(NULL != macro_token, "got an EOF terminated macro additive expression\n"); if(match("+", macro_token->s)) { eat_current_token(); return lhs + macro_additive_expr(); } else if(match("-", macro_token->s)) { eat_current_token(); return lhs - macro_additive_expr(); } else if(match("*", macro_token->s)) { eat_current_token(); return lhs * macro_additive_expr(); } else if(match("/", macro_token->s)) { eat_current_token(); hold = macro_additive_expr(); require(0 != hold, "divide by zero not valid even in C macros\n"); return lhs / hold; } else if(match("%", macro_token->s)) { eat_current_token(); hold = macro_additive_expr(); require(0 != hold, "modulus by zero not valid even in C macros\n"); return lhs % hold; } else if(match(">>", macro_token->s)) { eat_current_token(); return lhs >> macro_additive_expr(); } else if(match("<<", macro_token->s)) { eat_current_token(); return lhs << macro_additive_expr(); } else { return lhs; } } int macro_relational_expr(void) { int lhs = macro_additive_expr(); if(match("<", macro_token->s)) { eat_current_token(); return lhs < macro_relational_expr(); } else if(match("<=", macro_token->s)) { eat_current_token(); return lhs <= macro_relational_expr(); } else if(match(">=", macro_token->s)) { eat_current_token(); return lhs >= macro_relational_expr(); } else if(match(">", macro_token->s)) { eat_current_token(); return lhs > macro_relational_expr(); } else if(match("==", macro_token->s)) { eat_current_token(); return lhs == macro_relational_expr(); } else if(match("!=", macro_token->s)) { eat_current_token(); return lhs != macro_relational_expr(); } else { return lhs; } } int macro_bitwise_expr(void) { int rhs; int lhs = macro_relational_expr(); if(match("&", macro_token->s)) { eat_current_token(); return lhs & macro_bitwise_expr(); } else if(match("&&", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs && rhs; } else if(match("|", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs | rhs; } else if(match("||", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs || rhs; } else if(match("^", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs ^ rhs; } else { return lhs; } } int macro_expression(void) { return macro_bitwise_expr(); } void handle_define(void) { struct macro_list* hold; struct token_list* expansion_end = NULL; /* don't use #define statements from non-included blocks */ int conditional_define = TRUE; if(NULL != conditional_inclusion_top) { if(FALSE == conditional_inclusion_top->include) { conditional_define = FALSE; } } eat_current_token(); require(NULL != macro_token, "got an EOF terminated #define\n"); require('\n' != macro_token->s[0], "unexpected newline after #define\n"); /* insert new macro */ hold = calloc(1, sizeof(struct macro_list)); hold->symbol = macro_token->s; hold->next = macro_env; /* provided it isn't in a non-included block */ if(conditional_define) macro_env = hold; /* discard the macro name */ eat_current_token(); while (TRUE) { require(NULL != macro_token, "got an EOF terminated #define\n"); if ('\n' == macro_token->s[0]) { if(NULL == expansion_end) { hold->expansion = NULL; expansion_end = macro_token; return; } expansion_end->next = NULL; return; } require(NULL != hold, "#define got something it can't handle\n"); expansion_end = macro_token; /* in the first iteration, we set the first token of the expansion, if it exists */ if (NULL == hold->expansion) { hold->expansion = macro_token; } /* throw away if not used */ if(!conditional_define && (NULL != hold)) { free(hold); hold = NULL; } eat_current_token(); } } void handle_undef(void) { eat_current_token(); remove_macro(macro_token); eat_current_token(); } void handle_error(int warning_p) { /* don't use #error statements from non-included blocks */ int conditional_error = TRUE; if(NULL != conditional_inclusion_top) { if(FALSE == conditional_inclusion_top->include) { conditional_error = FALSE; } } eat_current_token(); /* provided it isn't in a non-included block */ if(conditional_error) { line_error_token(macro_token); if(warning_p) fputs(" warning: #warning ", stderr); else fputs(" error: #error ", stderr); while (TRUE) { require(NULL != macro_token, "\nFailed to properly terminate error message with \\n\n"); if ('\n' == macro_token->s[0]) break; fputs(macro_token->s, stderr); macro_token = macro_token->next; fputs(" ", stderr); } fputs("\n", stderr); if(!warning_p) exit(EXIT_FAILURE); } while (TRUE) { require(NULL != macro_token, "\nFailed to properly terminate error message with \\n\n"); /* discard the error */ if ('\n' == macro_token->s[0]) { return; } eat_current_token(); } } void eat_block(void); void macro_directive(void) { struct conditional_inclusion *t; int result; /* FIXME: whitespace is allowed between "#"" and "if" */ if(match("#if", macro_token->s)) { eat_current_token(); /* evaluate constant integer expression */ result = macro_expression(); /* push conditional inclusion */ t = calloc(1, sizeof(struct conditional_inclusion)); t->prev = conditional_inclusion_top; conditional_inclusion_top = t; t->include = TRUE; if(FALSE == result) { t->include = FALSE; eat_block(); } t->previous_condition_matched = t->include; } else if(match("#ifdef", macro_token->s)) { eat_current_token(); require(NULL != macro_token, "got an EOF terminated macro defined expression\n"); if (NULL != lookup_macro(macro_token)) { result = TRUE; eat_current_token(); } else { result = FALSE; eat_block(); } /* push conditional inclusion */ t = calloc(1, sizeof(struct conditional_inclusion)); t->prev = conditional_inclusion_top; conditional_inclusion_top = t; t->include = TRUE; if(FALSE == result) { t->include = FALSE; } t->previous_condition_matched = t->include; } else if(match("#ifndef", macro_token->s)) { eat_current_token(); require(NULL != macro_token, "got an EOF terminated macro defined expression\n"); if (NULL != lookup_macro(macro_token)) { result = FALSE; } else { result = TRUE; eat_current_token(); } /* push conditional inclusion */ t = calloc(1, sizeof(struct conditional_inclusion)); t->prev = conditional_inclusion_top; conditional_inclusion_top = t; t->include = TRUE; if(FALSE == result) { t->include = FALSE; eat_block(); } t->previous_condition_matched = t->include; } else if(match("#elif", macro_token->s)) { eat_current_token(); result = macro_expression(); require(NULL != conditional_inclusion_top, "#elif without leading #if\n"); conditional_inclusion_top->include = result && !conditional_inclusion_top->previous_condition_matched; conditional_inclusion_top->previous_condition_matched = conditional_inclusion_top->previous_condition_matched || conditional_inclusion_top->include; if(FALSE == result) { eat_block(); } } else if(match("#else", macro_token->s)) { eat_current_token(); require(NULL != conditional_inclusion_top, "#else without leading #if\n"); conditional_inclusion_top->include = !conditional_inclusion_top->previous_condition_matched; if(FALSE == conditional_inclusion_top->include) { eat_block(); } } else if(match("#endif", macro_token->s)) { if(NULL == conditional_inclusion_top) { line_error_token(macro_token); fputs("unexpected #endif\n", stderr); exit(EXIT_FAILURE); } eat_current_token(); /* pop conditional inclusion */ t = conditional_inclusion_top; conditional_inclusion_top = conditional_inclusion_top->prev; free(t); } else if(match("#define", macro_token->s)) { handle_define(); } else if(match("#undef", macro_token->s)) { handle_undef(); } else if(match("#error", macro_token->s)) { handle_error(FALSE); } else if(match("#warning", macro_token->s)) { handle_error(TRUE); } else { if(!match("#include", macro_token->s)) { /* Put a big fat warning but see if we can just ignore */ fputs(">>WARNING<<\n>>WARNING<<\n", stderr); line_error_token(macro_token); fputs("feature: ", stderr); fputs(macro_token->s, stderr); fputs(" unsupported in M2-Planet\nIgnoring line, may result in bugs\n>>WARNING<<\n>>WARNING<<\n\n", stderr); } /* unhandled macro directive; let's eat until a newline; om nom nom */ while(TRUE) { if(NULL == macro_token) { return; } if('\n' == macro_token->s[0]) { return; } eat_current_token(); } } } void eat_until_endif(void) { /* This #if block is nested inside of an #if block that needs to be dropped, lose EVERYTHING */ do { require(NULL != macro_token, "Unterminated #if block\n"); if(match("#if", macro_token->s) || match("#ifdef", macro_token->s) || match("#ifndef", macro_token->s)) { eat_current_token(); eat_until_endif(); } eat_current_token(); require(NULL != macro_token, "Unterminated #if block\n"); } while(!match("#endif", macro_token->s)); } void eat_block(void) { /* This conditional #if block is wrong, drop everything until the #elif/#else/#endif */ do { if(match("#if", macro_token->s) || match("#ifdef", macro_token->s) || match("#ifndef", macro_token->s)) { eat_current_token(); eat_until_endif(); } eat_current_token(); require(NULL != macro_token, "Unterminated #if block\n"); if(match("#elif", macro_token->s)) break; if(match("#else", macro_token->s)) break; if(match("#endif", macro_token->s)) break; } while(TRUE); require(NULL != macro_token->prev, "impossible #if block\n"); /* rewind the newline */ if(match("\n", macro_token->prev->s)) macro_token = macro_token->prev; } struct token_list* maybe_expand(struct token_list* token) { if(NULL == token) { line_error_token(macro_token); fputs("maybe_expand passed a null token\n", stderr); exit(EXIT_FAILURE); } struct macro_list* hold = lookup_macro(token); struct token_list* hold2; if(NULL == token->next) { line_error_token(macro_token); fputs("we can't expand a null token: ", stderr); fputs(token->s, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } if (NULL == hold) { return token->next; } token = eat_token(token); if (NULL == hold->expansion) { return token->next; } hold2 = insert_tokens(token, hold->expansion); return hold2->next; } void preprocess(void) { int start_of_line = TRUE; macro_token = global_token; while(NULL != macro_token) { if(start_of_line && '#' == macro_token->s[0]) { macro_directive(); if(macro_token) { if('\n' != macro_token->s[0]) { line_error_token(macro_token); fputs("newline expected at end of macro directive\n", stderr); fputs("found: '", stderr); fputs(macro_token->s, stderr); fputs("'\n", stderr); exit(EXIT_FAILURE); } } } else if('\n' == macro_token->s[0]) { start_of_line = TRUE; macro_token = macro_token->next; } else { start_of_line = FALSE; if(NULL == conditional_inclusion_top) { macro_token = maybe_expand(macro_token); } else if(!conditional_inclusion_top->include) { /* rewrite the token stream to exclude the current token */ eat_block(); start_of_line = TRUE; } else { macro_token = maybe_expand(macro_token); } } } }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include<stdlib.h> #include<stdio.h> #include<string.h> #include"cc.h" /* The core functions */ void initialize_types(void); struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename); struct token_list* reverse_list(struct token_list* head); struct token_list* remove_line_comments(struct token_list* head); struct token_list* remove_line_comment_tokens(struct token_list* head); struct token_list* remove_preprocessor_directives(struct token_list* head); void eat_newline_tokens(void); void init_macro_env(char* sym, char* value, char* source, int num); void preprocess(void); void program(void); void recursive_output(struct token_list* i, FILE* out); void output_tokens(struct token_list *i, FILE* out); int strtoint(char *a); int main(int argc, char** argv) { MAX_STRING = 4096; BOOTSTRAP_MODE = FALSE; PREPROCESSOR_MODE = FALSE; int DEBUG = FALSE; FILE* in = stdin; FILE* destination_file = stdout; Architecture = 0; /* catch unset */ init_macro_env("__M2__", "42", "__INTERNAL_M2__", 0); /* Setup __M2__ */ char* arch; char* name; char* hold; int env=0; char* val; int i = 1; while(i <= argc) { if(NULL == argv[i]) { i = i + 1; } else if(match(argv[i], "-f") || match(argv[i], "--file")) { if(NULL == hold_string) { hold_string = calloc(MAX_STRING + 4, sizeof(char)); require(NULL != hold_string, "Impossible Exhaustion has occurred\n"); } name = argv[i + 1]; if(NULL == name) { fputs("did not receive a file name\n", stderr); exit(EXIT_FAILURE); } in = fopen(name, "r"); if(NULL == in) { fputs("Unable to open for reading file: ", stderr); fputs(name, stderr); fputs("\n Aborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } global_token = read_all_tokens(in, global_token, name); fclose(in); i = i + 2; } else if(match(argv[i], "-o") || match(argv[i], "--output")) { destination_file = fopen(argv[i + 1], "w"); if(NULL == destination_file) { fputs("Unable to open for writing file: ", stderr); fputs(argv[i + 1], stderr); fputs("\n Aborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } i = i + 2; } else if(match(argv[i], "-A") || match(argv[i], "--architecture")) { arch = argv[i + 1]; if(match("knight-native", arch)) { Architecture = KNIGHT_NATIVE; init_macro_env("__knight__", "1", "--architecture", env); env = env + 1; } else if(match("knight-posix", arch)) { Architecture = KNIGHT_POSIX; init_macro_env("__knight_posix__", "1", "--architecture", env); env = env + 1; } else if(match("x86", arch)) { Architecture = X86; init_macro_env("__i386__", "1", "--architecture", env); env = env + 1; } else if(match("amd64", arch)) { Architecture = AMD64; init_macro_env("__x86_64__", "1", "--architecture", env); env = env + 1; } else if(match("armv7l", arch)) { Architecture = ARMV7L; init_macro_env("__arm__", "1", "--architecture", env); env = env + 1; } else if(match("aarch64", arch)) { Architecture = AARCH64; init_macro_env("__aarch64__", "1", "--architecture", env); env = env + 1; } else if(match("riscv32", arch)) { Architecture = RISCV32; init_macro_env("__riscv", "1", "--architecture", env); init_macro_env("__riscv_xlen", "32", "--architecture", env + 1); env = env + 2; } else if(match("riscv64", arch)) { Architecture = RISCV64; init_macro_env("__riscv", "1", "--architecture", env); init_macro_env("__riscv_xlen", "64", "--architecture", env + 1); env = env + 2; } else { fputs("Unknown architecture: ", stderr); fputs(arch, stderr); fputs(" know values are: knight-native, knight-posix, x86, amd64, armv7l, aarch64, riscv32 and riscv64\n", stderr); exit(EXIT_FAILURE); } i = i + 2; } else if(match(argv[i], "--max-string")) { hold = argv[i+1]; if(NULL == hold) { fputs("--max-string requires a numeric argument\n", stderr); exit(EXIT_FAILURE); } MAX_STRING = strtoint(hold); require(0 < MAX_STRING, "Not a valid string size\nAbort and fix your --max-string\n"); i = i + 2; } else if(match(argv[i], "--bootstrap-mode")) { BOOTSTRAP_MODE = TRUE; i = i + 1; } else if(match(argv[i], "-g") || match(argv[i], "--debug")) { DEBUG = TRUE; i = i + 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs(" -f input file\n -o output file\n --help for this message\n --version for file version\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-E")) { PREPROCESSOR_MODE = TRUE; i = i + 1; } else if(match(argv[i], "-D")) { val = argv[i+1]; if(NULL == val) { fputs("-D requires an argument", stderr); exit(EXIT_FAILURE); } while(0 != val[0]) { if('=' == val[0]) { val[0] = 0; val = val + 1; break; } val = val + 1; } init_macro_env(argv[i+1], val, "__ARGV__", env); env = env + 1; i = i + 2; } else if(match(argv[i], "-V") || match(argv[i], "--version")) { fputs("M2-Planet v1.11.0\n", stderr); exit(EXIT_SUCCESS); } else { fputs("UNKNOWN ARGUMENT\n", stdout); exit(EXIT_FAILURE); } } /* Deal with special case of architecture not being set */ if(0 == Architecture) { Architecture = KNIGHT_NATIVE; init_macro_env("__knight__", "1", "--architecture", env); } /* Deal with special case of wanting to read from standard input */ if(stdin == in) { hold_string = calloc(MAX_STRING + 4, sizeof(char)); require(NULL != hold_string, "Impossible Exhaustion has occurred\n"); global_token = read_all_tokens(in, global_token, "STDIN"); } if(NULL == global_token) { fputs("Either no input files were given or they were empty\n", stderr); exit(EXIT_FAILURE); } global_token = reverse_list(global_token); if (BOOTSTRAP_MODE) { global_token = remove_line_comment_tokens(global_token); global_token = remove_preprocessor_directives(global_token); } else { global_token = remove_line_comments(global_token); preprocess(); } if (PREPROCESSOR_MODE) { fputs("\n/* Preprocessed source */\n", destination_file); output_tokens(global_token, destination_file); goto exit_success; } /* the main parser doesn't know how to handle newline tokens */ eat_newline_tokens(); initialize_types(); reset_hold_string(); output_list = NULL; program(); /* Output the program we have compiled */ fputs("\n# Core program\n", destination_file); recursive_output(output_list, destination_file); if(KNIGHT_NATIVE == Architecture) fputs("\n", destination_file); else if(DEBUG) fputs("\n:ELF_data\n", destination_file); fputs("\n# Program global variables\n", destination_file); recursive_output(globals_list, destination_file); fputs("\n# Program strings\n", destination_file); recursive_output(strings_list, destination_file); if(KNIGHT_NATIVE == Architecture) fputs("\n:STACK\n", destination_file); else if(!DEBUG) fputs("\n:ELF_end\n", destination_file); exit_success: if (destination_file != stdout) { fclose(destination_file); } return EXIT_SUCCESS; }
## Copyright (C) 2017 Jeremiah Orians ## This file is part of M2-Planet. ## ## M2-Planet is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## M2-Planet is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. DEFINE add_rax, 4805 DEFINE add_rbp, 4881C5 DEFINE add_rax,rbx 4801D8 DEFINE add_rax,rbp 4801E8 DEFINE add_rbx,rax 4801C3 DEFINE and_rax,rbx 4821D8 DEFINE call E8 DEFINE call_rax FFD0 DEFINE cmp_rbx,rax 4839C3 DEFINE cqo 4899 DEFINE div_rbx 48F7F3 DEFINE idiv_rbx 48F7FB DEFINE jmp E9 DEFINE je 0F84 DEFINE jne 0F85 DEFINE lea_rax,[rbp+DWORD] 488D85 DEFINE lea_rax,[rip+DWORD] 488D05 DEFINE lea_rdi,[rsp+DWORD] 488DBC24 DEFINE lea_rdx,[rsp+DWORD] 488D9424 DEFINE lea_rsi,[rsp+DWORD] 488DB424 DEFINE mov_rax, 48C7C0 DEFINE mov_rbx, 48C7C3 DEFINE mov_rdi, 48C7C7 DEFINE mov_rdx, 48C7C2 DEFINE mov_rsi, 48C7C6 DEFINE mov_r10, 49C7C2 DEFINE mov_rax,rbp 4889E8 DEFINE mov_rax,rbx 4889D8 DEFINE mov_rax,rdx 4889D0 DEFINE mov_rbp,rdi 4889FD DEFINE mov_rbp,rsp 4889E5 DEFINE mov_rcx,rax 4889C1 DEFINE mov_rdi,rax 4889C7 DEFINE mov_rdi,rbx 4889DF DEFINE mov_rdi,rsp 4889E7 DEFINE mov_[rbx],al 8803 DEFINE mov_[rbx],rax 488903 DEFINE mov_rax,[rax] 488B00 DEFINE mov_rbx,[rbx] 488B1B DEFINE mov_rdi,[rdi] 488B3F DEFINE mov_rdx,[rdx] 488B12 DEFINE mov_rsi,[rsi] 488B36 DEFINE mov_rax,[rsp+DWORD] 488B8424 DEFINE movzx_rax,al 480FB6C0 DEFINE movsxd_rax,eax 4863C0 DEFINE movsx_rax,BYTE_PTR_[rax] 480FBE00 DEFINE movsx_rbx,BYTE_PTR_[rbx] 480FBE1B DEFINE imul_rbx 48F7EB DEFINE mul_rbx 48F7E3 DEFINE NULL 0000000000000000 DEFINE not_rax 48F7D0 DEFINE or_rax,rbx 4809D8 DEFINE pop_rax 58 DEFINE pop_rbp 5D DEFINE pop_rbx 5B DEFINE pop_rdi 5F DEFINE push_rax 50 DEFINE push_rbp 55 DEFINE push_rbx 53 DEFINE push_rdi 57 DEFINE ret C3 DEFINE sal_rax, 48C1E0 DEFINE sal_rax,cl 48D3E0 DEFINE sar_rax,cl 48D3F8 DEFINE shl_rax,cl 48D3E0 DEFINE shr_rax,cl 48D3E8 DEFINE seta_al 0F97C0 DEFINE setae_al 0F93C0 DEFINE setb_al 0F92C0 DEFINE setbe_al 0F96C0 DEFINE sete_al 0F94C0 DEFINE setg_al 0F9FC0 DEFINE setge_al 0F9DC0 DEFINE setl_al 0F9CC0 DEFINE setle_al 0F9EC0 DEFINE setne_al 0F95C0 DEFINE sub_rbx,rax 4829C3 DEFINE syscall 0F05 DEFINE test_rax,rax 4885C0 DEFINE xchg_rbx,rax 4893 DEFINE xor_rax,rbx 4831D8
## Copyright (C) 2016 Jeremiah Orians ## This file is part of M2-Planet. ## ## M2-Planet is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## M2-Planet is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. :_start mov_rbp,rsp ; Protect rsp ;; Prepare argv lea_rax,[rbp+DWORD] %8 ; ARGV_address = RBP + 8 push_rax ; Put argv on the stack ;; Prepare envp mov_rax,rbp ; Address we need to load from mov_rax,[rax] ; Get ARGC add_rax, %2 ; OFFSET = ARGC + 2 sal_rax, !3 ; OFFSET = OFFSET * WORDSIZE add_rax,rbp ; ENVP_address = RSP + OFFSET push_rax ; Put envp on the stack ;; Stack offset add_rbp, %8 ; Fix rbp ;; Perform the main loop call %FUNCTION_main ;; Exit to kernel mov_rdi,rax ; Using the return code given by main mov_rax, %0x3C ; Syscall exit syscall ; Exit with that code
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> * This file is part of mescc-tools. * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> // CONSTANT HEX 16 #define HEX 16 // CONSTANT OCTAL 8 #define OCTAL 8 // CONSTANT BINARY 2 #define BINARY 2 /*********************************************************** * Needed for current implementation of little endian * * Can be used to support little bit endian instruction * * sets if we ever find one that might be useful * * But I seriously doubt it * ***********************************************************/ void reverseBitOrder(char* c, int ByteMode) { if(NULL == c) return; if(0 == c[1]) return; int hold = c[0]; if(HEX == ByteMode) { c[0] = c[1]; c[1] = hold; reverseBitOrder(c+2, ByteMode); } else if(OCTAL == ByteMode) { c[0] = c[2]; c[2] = hold; reverseBitOrder(c+3, ByteMode); } else if(BINARY == ByteMode) { c[0] = c[7]; c[7] = hold; hold = c[1]; c[1] = c[6]; c[6] = hold; hold = c[2]; c[2] = c[5]; c[5] = hold; hold = c[3]; c[3] = c[4]; c[4] = hold; reverseBitOrder(c+8, ByteMode); } } void LittleEndian(char* start, int ByteMode) { char* end = start; char* c = start; while(0 != end[0]) end = end + 1; int hold; for(end = end - 1; start < end; start = start + 1) { hold = start[0]; start[0] = end[0]; end[0] = hold; end = end - 1; } /* The above makes a reversed bit order */ reverseBitOrder(c, ByteMode); } int hex2char(int c) { if((c >= 0) && (c <= 9)) return (c + 48); else if((c >= 10) && (c <= 15)) return (c + 55); else return -1; } int stringify(char* s, int digits, int divisor, int value, int shift) { int i = value; if(digits > 1) { i = stringify(s+1, (digits - 1), divisor, value, shift); } s[0] = hex2char(i & (divisor - 1)); return (i >> shift); }
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2017 Jeremiah Orians * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/stat.h> #include "M2libc/bootstrappable.h" // CONSTANT max_string 4096 #define max_string 4096 int BITSIZE; int BigEndian; // CONSTANT HEX 16 #define HEX 16 // CONSTANT OCTAL 8 #define OCTAL 8 // CONSTANT BINARY 2 #define BINARY 2 /* Strings needed for constants */ char* zero_8; char* zero_16; char* zero_32; char* one_16; char* one_32; char* two_8; char* two_32; char* three_32; char* six_32; char* sixteen_32; char* twentyfour_32; /* Imported from stringify.c */ int stringify(char* s, int digits, int divisor, int value, int shift); void LittleEndian(char* start, int ByteMode); struct entry { struct entry* next; char* name; }; FILE* output; struct entry* jump_table; int count; char* entry; void consume_token(FILE* source_file, char* s) { int i = 0; int c = fgetc(source_file); require(EOF != c, "Can not have an EOF token\n"); do { s[i] = c; i = i + 1; require(max_string > i, "Token exceeds token length restriction\n"); c = fgetc(source_file); if(EOF == c) break; } while(!in_set(c, " \t\n>")); } void storeLabel(FILE* source_file) { struct entry* entry = calloc(1, sizeof(struct entry)); /* Prepend to list */ entry->next = jump_table; jump_table = entry; /* Store string */ entry->name = calloc((max_string + 1), sizeof(char)); consume_token(source_file, entry->name); count = count + 1; } void line_Comment(FILE* source_file) { int c = fgetc(source_file); while(!in_set(c, "\n\r")) { if(EOF == c) break; c = fgetc(source_file); } } void purge_string(FILE* source_file) { int c = fgetc(source_file); while((EOF != c) && ('"' != c)) { c = fgetc(source_file); } } void first_pass(struct entry* input) { if(NULL == input) return; first_pass(input->next); FILE* source_file = fopen(input->name, "r"); if(NULL == source_file) { fputs("The file: ", stderr); fputs(input->name, stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } int c; for(c = fgetc(source_file); EOF != c; c = fgetc(source_file)) { /* Check for and deal with label */ if(58 == c) { storeLabel(source_file); } /* Check for and deal with line comments */ else if (c == '#' || c == ';') { line_Comment(source_file); } else if ('"' == c) { purge_string(source_file); } } fclose(source_file); } void output_string_table(struct entry* node) { fputs("\n# Generated string table\n:ELF_str\n", output); fputs(zero_8, output); fputs("\t# NULL string\n", output); struct entry* i; for(i = node; NULL != i; i = i->next) { fputs(":ELF_str_", output); fputs(i->name, output); fputs("\t\"", output); fputs(i->name, output); fputs("\"\n", output); } fputs("# END Generated string table\n\n", output); } void output_symbol_table(struct entry* node) { fputs("\n# Generated symbol table\n:ELF_sym\n# Required NULL symbol entry\n", output); if(64 == BITSIZE) { fputs(zero_32, output); fputs("\t# st_name\n", output); fputs(zero_8, output); fputs("\t# st_info\n", output); fputs(zero_8, output); fputs("\t# st_other\n", output); fputs(one_16, output); fputs("\t# st_shndx\n", output); fputs(zero_32, output); fputc(' ', output); fputs(zero_32, output); fputs("\t# st_value\n", output); fputs(zero_32, output); fputc(' ', output); fputs(zero_32, output); fputs("\t# st_size\n\n", output); } else { fputs(zero_32, output); fputs("\t# st_name\n", output); fputs(zero_32, output); fputs("\t# st_value\n", output); fputs(zero_32, output); fputs("\t# st_size\n", output); fputs(zero_8, output); fputs("\t# st_info\n", output); fputs(zero_8, output); fputs("\t# st_other\n", output); fputs(one_16, output); fputs("\t# st_shndx\n\n", output); } struct entry* i; for(i = node; NULL != i; i = i->next) { fputs("%ELF_str_", output); fputs(i->name, output); fputs(">ELF_str\t# st_name\n", output); if(64 == BITSIZE) { fputs(two_8, output); fputs("\t# st_info (FUNC)\n", output); if(('_' == i->name[0]) && !match(entry, i->name)) { fputs(two_8, output); fputs("\t# st_other (hidden)\n", output); } else { fputs(zero_8, output); fputs("\t# st_other (other)\n", output); } fputs(one_16, output); fputs("\t# st_shndx\n", output); fputs("&", output); fputs(i->name, output); fputc(' ', output); fputs(zero_32, output); fputs("\t# st_value\n", output); fputs(zero_32, output); fputc(' ', output); fputs(zero_32, output); fputs("\t# st_size (unknown size)\n\n", output); } else { fputs("&", output); fputs(i->name, output); fputs("\t#st_value\n", output); fputs(zero_32, output); fputs("\t# st_size (unknown size)\n", output); fputs(two_8, output); fputs("\t# st_info (FUNC)\n", output); if(('_' == i->name[0]) && !match(entry, i->name)) { fputs(two_8, output); fputs("\t# st_other (hidden)\n", output); } else { fputs(zero_8, output); fputs("\t# st_other (default)\n", output); } fputs(one_16, output); fputs("\t# st_shndx\n\n", output); } } fputs("# END Generated symbol table\n", output); } struct entry* reverse_list(struct entry* head) { struct entry* root = NULL; struct entry* next; while(NULL != head) { next = head->next; head->next = root; root = head; head = next; } return root; } void write_int(char* field, char* label) { fputs(field, output); fputs("\t#", output); fputs(label, output); fputc('\n', output); } void write_register(char* field, char* label) { /* $field section in the section headers are different size for 32 and 64bits */ /* The below is broken for BigEndian */ fputs(field, output); if(64 == BITSIZE) { fputc(' ', output); fputs(zero_32, output); } fputs("\t#", output); fputs(label, output); fputc('\n', output); } void write_section(char* label, char* name, char* type, char* flags, char* address, char* offset, char* size, char* link, char* info, char* entry) { /* Write label */ fputc('\n', output); fputs(label, output); fputc('\n', output); write_int(name, "sh_name"); write_int(type, "sh_type"); write_register(flags, "sh_flags"); write_register(address, "sh_addr"); write_register(offset, "sh_offset"); write_register(size, "sh_size"); write_int(link, "sh_link"); /* Deal with the ugly case of stubs */ fputs(info, output); fputs("\t#sh_info\n", output); /* Alignment section in the section headers are different size for 32 and 64bits */ /* The below is broken for BigEndian */ if(64 == BITSIZE) { fputs(one_32, output); fputc(' ', output); fputs(zero_32, output); fputs("\t#sh_addralign\n", output); } else { fputs(one_32, output); fputs("\t#sh_addralign\n", output); } write_register(entry, "sh_entsize"); } char* get_string(int value, int size, int ByteMode, int shift) { char* ch = calloc(42, sizeof(char)); require(NULL != ch, "Exhausted available memory\n"); ch[0] = '\''; stringify(ch+1, size, ByteMode, value, shift); if(!BigEndian) LittleEndian(ch+1, ByteMode); int i = 0; while(0 != ch[i]) { i = i + 1; } ch[i] = '\''; return ch; } char* setup_string(int value, int number_of_bytes, int ByteMode) { int shift; int size; if(HEX == ByteMode) { size = 2; shift = 4; } else if(OCTAL == ByteMode) { size = 3; shift = 3; } else if(BINARY == ByteMode) { size = 8; shift = 1; } else { fputs("reached impossible mode\n", stderr); exit(EXIT_FAILURE); } return get_string(value, number_of_bytes *size, ByteMode, shift); } void setup_strings(int ByteMode) { zero_8 = setup_string(0, 1, ByteMode); zero_16 = setup_string(0, 2, ByteMode); zero_32 = setup_string(0, 4, ByteMode); one_16 = setup_string(1, 2, ByteMode); one_32 = setup_string(1, 4, ByteMode); two_8 = setup_string(2, 1, ByteMode); two_32 = setup_string(2, 4, ByteMode); three_32 = setup_string(3, 4, ByteMode); six_32 = setup_string(6, 4, ByteMode); sixteen_32 = setup_string(16, 4, ByteMode); twentyfour_32 = setup_string(24, 4, ByteMode); } /* Standard C main program */ int main(int argc, char **argv) { jump_table = NULL; struct entry* input = NULL; output = stdout; char* output_file = ""; entry = ""; BITSIZE = 32; count = 1; BigEndian = TRUE; int ByteMode = HEX; int set = FALSE; struct entry* temp; struct entry* head; int option_index = 1; while(option_index <= argc) { if(NULL == argv[option_index]) { option_index = option_index + 1; } else if(match(argv[option_index], "-h") || match(argv[option_index], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" --file FILENAME1 {--file FILENAME2} --output FILENAME\n", stderr); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "--64")) { BITSIZE = 64; option_index = option_index + 1; } else if(match(argv[option_index], "-f") || match(argv[option_index], "--file")) { temp = calloc(1, sizeof(struct entry)); temp->name = argv[option_index + 1]; temp->next = input; input = temp; option_index = option_index + 2; } else if(match(argv[option_index], "-o") || match(argv[option_index], "--output")) { output_file = argv[option_index + 1]; output = fopen(output_file, "w"); if(NULL == output) { fputs("The file: ", stderr); fputs(input->name, stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } option_index = option_index + 2; } else if(match(argv[option_index], "-b") || match(argv[option_index], "--binary")) { ByteMode = BINARY; option_index = option_index + 1; } else if(match(argv[option_index], "-O") || match(argv[option_index], "--octal")) { ByteMode = OCTAL; option_index = option_index + 1; } else if(match(argv[option_index], "-X") || match(argv[option_index], "--hex")) { ByteMode = HEX; option_index = option_index + 1; } else if(match(argv[option_index], "--big-endian")) { BigEndian = TRUE; set = TRUE; option_index = option_index + 1; } else if(match(argv[option_index], "--little-endian")) { BigEndian = FALSE; set = TRUE; option_index = option_index + 1; } else if(match(argv[option_index], "-V") || match(argv[option_index], "--version")) { fputs("blood-elf 2.0.1\n(Basically Launches Odd Object Dump ExecutabLe Files\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "--entry")) { head = calloc(1, sizeof(struct entry)); /* Include _start or any other entry from your .hex2 */ head->next = jump_table; jump_table = head; jump_table->name = argv[option_index + 1]; /* However only the last one will be exempt from the _name hidden rule */ entry = argv[option_index + 1]; option_index = option_index + 2; count = count + 1; } else { fputs("Unknown option\n", stderr); exit(EXIT_FAILURE); } } /* Make sure we have a program tape to run */ if (NULL == input) { return EXIT_FAILURE; } /* Force setting of endianness */ if(!set) { fputs("either --little-endian or --big-endian MUST be set\n", stderr); return EXIT_FAILURE; } /* Setup the ugly formating because RISC-V sucks */ setup_strings(ByteMode); /* Get all of the labels */ first_pass(input); /* Reverse their order */ jump_table = reverse_list(jump_table); /* Create sections */ /* Create string names for sections */ fputs("# Generated sections\n:ELF_shstr\n", output); fputs(zero_8, output); fputs("\t# NULL\n", output); fputs(":ELF_shstr__text\n\".text\"\n", output); fputs(":ELF_shstr__shstr\n\".shstrtab\"\n", output); fputs(":ELF_shstr__sym\n\".symtab\"\n", output); fputs(":ELF_shstr__str\n\".strtab\"\n", output); /* Create NULL section header as is required by the Spec. So dumb and waste of bytes*/ write_section(":ELF_section_headers", zero_32, zero_32, zero_32, zero_32, zero_32, zero_32, zero_32, zero_32, zero_32); write_section(":ELF_section_header_text", "%ELF_shstr__text>ELF_shstr", one_32, six_32, "&ELF_text", "%ELF_text>ELF_base", "%ELF_data>ELF_text", zero_32, zero_32, zero_32); write_section(":ELF_section_header_shstr", "%ELF_shstr__shstr>ELF_shstr", three_32, zero_32, "&ELF_shstr", "%ELF_shstr>ELF_base", "%ELF_section_headers>ELF_shstr", zero_32, zero_32, zero_32); write_section(":ELF_section_header_str", "%ELF_shstr__str>ELF_shstr", three_32, zero_32, "&ELF_str", "%ELF_str>ELF_base", "%ELF_sym>ELF_str", zero_32, zero_32, zero_32); if(64 == BITSIZE) write_section(":ELF_section_header_sym", "%ELF_shstr__sym>ELF_shstr", two_32, zero_32, "&ELF_sym", "%ELF_sym>ELF_base", "%ELF_end>ELF_sym", three_32, setup_string(count, 4, ByteMode), twentyfour_32); else write_section(":ELF_section_header_sym", "%ELF_shstr__sym>ELF_shstr", two_32, zero_32, "&ELF_sym", "%ELF_sym>ELF_base", "%ELF_end>ELF_sym", three_32, setup_string(count, 4, ByteMode), sixteen_32); /* Create dwarf stubs needed for objdump -d to get function names */ output_string_table(jump_table); output_symbol_table(jump_table); fputs("\n:ELF_end\n", output); /* Close output file */ fclose(output); return EXIT_SUCCESS; }
## Copyright (C) 2017 Jeremiah Orians ## This file is part of M2-Planet. ## ## M2-Planet is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## M2-Planet is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. DEFINE add_rax, 4805 DEFINE add_rbp, 4881C5 DEFINE add_rax,rbx 4801D8 DEFINE add_rax,rbp 4801E8 DEFINE add_rax,rdx 4801D0 DEFINE add_rbx,rax 4801C3 DEFINE and_rax,rbx 4821D8 DEFINE and_rsp, 4881E4 DEFINE call E8 DEFINE call_rax FFD0 DEFINE cmp_rbx,rax 4839C3 DEFINE cqo 4899 DEFINE div_rbx 48F7F3 DEFINE idiv_rbx 48F7FB DEFINE jmp E9 DEFINE jmp_rcx FFE1 DEFINE je 0F84 DEFINE jne 0F85 DEFINE lea_rax,[rbp+DWORD] 488D85 DEFINE lea_rax,[rip+DWORD] 488D05 DEFINE lea_rbx,[rip+DWORD] 488D1D DEFINE lea_rcx,[rbp+DWORD] 488D8D DEFINE lea_rdi,[rsp+DWORD] 488DBC24 DEFINE lea_rdx,[rip+DWORD] 488D15 DEFINE lea_rdx,[rsp+DWORD] 488D9424 DEFINE lea_rdx,[rbp+DWORD] 488D95 DEFINE lea_rsi,[rsp+DWORD] 488DB424 DEFINE lea_r8,[rbp+DWORD] 4C8D85 DEFINE lea_r8,[rsp+DWORD] 4C8D8424 DEFINE lea_r9,[rbp+DWORD] 4C8D8D DEFINE lea_r10,[rsp+DWORD] 4C8D9424 DEFINE mov_rax, 48C7C0 DEFINE mov_rbx, 48C7C3 DEFINE mov_rdi, 48C7C7 DEFINE mov_rdx, 48C7C2 DEFINE mov_rsi, 48C7C6 DEFINE mov_r10, 49C7C2 DEFINE mov_rax,rdx 4889D0 DEFINE mov_rax,rbx 4889D8 DEFINE mov_rax,rbp 4889E8 DEFINE mov_rax,rsp 4889E0 DEFINE mov_rbx,rdi 4889FB DEFINE mov_rbp,rdi 4889FD DEFINE mov_rbp,rsp 4889E5 DEFINE mov_rcx,rax 4889C1 DEFINE mov_rdi,rax 4889C7 DEFINE mov_rdi,rbx 4889DF DEFINE mov_rdi,rsp 4889E7 DEFINE mov_rdx,rsp 4889E2 DEFINE mov_rsp,rax 4889C4 DEFINE mov_r8,rsp 4989E0 DEFINE mov_[rbx],al 8803 DEFINE mov_[rbx],ax 668903 DEFINE mov_[rbx],eax 8903 DEFINE mov_[rbx],rax 488903 DEFINE mov_[rbp+DWORD],rax 488985 DEFINE mov_[rbp+DWORD],rdi 4889BD DEFINE mov_[rip+DWORD],rax 488905 DEFINE mov_[rip+DWORD],rbp 48892D DEFINE mov_[rip+DWORD],rcx 48890D DEFINE mov_[rip+DWORD],rdx 488915 DEFINE mov_[rip+DWORD],rsp 488925 DEFINE mov_eax,[rax] 8B00 DEFINE mov_rax,[rax] 488B00 DEFINE mov_rbx,[rbx] 488B1B DEFINE mov_rcx,[rcx] 488B09 DEFINE mov_rdi,[rdi] 488B3F DEFINE mov_rdx,[rdx] 488B12 DEFINE mov_rsi,[rsi] 488B36 DEFINE mov_rax,[rsp+DWORD] 488B8424 DEFINE mov_rax,[rip+DWORD] 488B05 DEFINE mov_rbp,[rip+DWORD] 488B2D DEFINE mov_rsp,[rip+DWORD] 488B25 DEFINE mov_rsp,[rsp+BYTE] 488BA424 DEFINE mov_r8,[r8] 4D8B00 DEFINE mov_r9,[r9] 4D8B09 DEFINE mov_r10,[r10] 4D8B12 DEFINE mov_ss,eax 8ED0 DEFINE movzx_rax,al 480FB6C0 DEFINE movsxd_rax,eax 4863C0 DEFINE movsx_rax,BYTE_PTR_[rax] 480FBE00 DEFINE movsx_rbx,BYTE_PTR_[rbx] 480FBE1B DEFINE movsx_rax,WORD_PTR_[rax] 480FBF00 DEFINE movsx_rax,DWORD_PTR_[rax] 486300 DEFINE movzx_rax,BYTE_PTR_[rax] 480FB600 DEFINE movzx_rax,WORD_PTR_[rax] 480FB700 DEFINE imul_rbx 48F7EB DEFINE mul_rbx 48F7E3 DEFINE NULL 0000000000000000 DEFINE not_rax 48F7D0 DEFINE or_rax,rbx 4809D8 DEFINE pop_rax 58 DEFINE pop_rbp 5D DEFINE pop_rbx 5B DEFINE pop_rcx 59 DEFINE pop_rdx 5A DEFINE pop_rsi 5E DEFINE pop_rdi 5F DEFINE pop_r8 4158 DEFINE pop_r9 4159 DEFINE pop_r10 415A DEFINE pop_r12 415C DEFINE pop_r13 415D DEFINE pop_r14 415E DEFINE pop_r15 415F DEFINE push 6A DEFINE push_rax 50 DEFINE push_rbp 55 DEFINE push_rbx 53 DEFINE push_rcx 51 DEFINE push_rdi 57 DEFINE push_rdx 52 DEFINE push_rsi 56 DEFINE push_rsp 54 DEFINE push_r8 4150 DEFINE push_r9 4151 DEFINE push_r10 4152 DEFINE push_r12 4154 DEFINE push_r13 4155 DEFINE push_r14 4156 DEFINE push_r15 4157 DEFINE push_[rsp] FF3424 DEFINE rdmsr 0F32 DEFINE ret C3 DEFINE sal_rax, 48C1E0 DEFINE shr_rdx, 48C1EA DEFINE shl_rax,cl 48D3E0 DEFINE sal_rax,cl 48D3E0 DEFINE shr_rax,cl 48D3E8 DEFINE sar_rax,cl 48D3F8 DEFINE seta_al 0F97C0 DEFINE setae_al 0F93C0 DEFINE setb_al 0F92C0 DEFINE setbe_al 0F96C0 DEFINE sete_al 0F94C0 DEFINE setg_al 0F9FC0 DEFINE setge_al 0F9DC0 DEFINE setl_al 0F9CC0 DEFINE setle_al 0F9EC0 DEFINE setne_al 0F95C0 DEFINE sub_rsp, 4881EC DEFINE sub_rbx,rax 4829C3 DEFINE syscall 0F05 DEFINE test_rax,rax 4885C0 DEFINE wrmsr 0F30 DEFINE xchg_rbx,rax 4893 DEFINE xor_rax,rbx 4831D8
## Copyright (C) 2016 Jeremiah Orians ## This file is part of M2-Planet. ## ## M2-Planet is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## M2-Planet is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. :_start mov_rbp,rsp ; Protect rsp ;; Prepare argv lea_rax,[rbp+DWORD] %8 ; ARGV_address = RBP + 8 push_rax ; Put argv on the stack ;; Prepare envp mov_rax,rbp ; Address we need to load from mov_rax,[rax] ; Get ARGC add_rax, %2 ; OFFSET = ARGC + 2 sal_rax, !3 ; OFFSET = OFFSET * WORDSIZE add_rax,rbp ; ENVP_address = RSP + OFFSET push_rax ; Put envp on the stack ;; Stack offset add_rbp, %8 ; Fix rbp ;; Perform the main loop call %FUNCTION_main push_rax ; Put return value on the stack push_rax ; So that _exit gets it :FUNCTION_exit :FUNCTION__exit pop_rbx pop_rdi mov_rax, %0x3C syscall
### Copyright (C) 2016 Jeremiah Orians ### Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> ### This file is part of M2-Planet. ### ### M2-Planet is free software: you can redistribute it and/or modify ### it under the terms of the GNU General Public License as published by ### the Free Software Foundation, either version 3 of the License, or ### (at your option) any later version. ### ### M2-Planet is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### ### You should have received a copy of the GNU General Public License ### along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. ### stage0's hex2 format ### !<label> 1 byte relative ### $<label> 2 byte address ### @<label> 2 byte relative ### &<label> 4 byte address ### %<label> 4 byte relative ### if you wish to use this header, you need to add :ELF_end to the end of your ### M1 or hex2 files. ## ELF Header :ELF_base 7F 45 4C 46 # e_ident[EI_MAG0-3] ELF's magic number 02 # e_ident[EI_CLASS] Indicating 64 bit 01 # e_ident[EI_DATA] Indicating little endianness 01 # e_ident[EI_VERSION] Indicating original elf 03 # e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 # e_ident[EI_ABIVERSION] See above 00 00 00 00 00 00 00 # e_ident[EI_PAD] 02 00 # e_type Indicating Executable 3E 00 # e_machine Indicating AMD64 01 00 00 00 # e_version Indicating original elf &_start 00 00 00 00 # e_entry Address of the entry point %ELF_program_headers>ELF_base 00 00 00 00 # e_phoff Address of program header table 00 00 00 00 00 00 00 00 # e_shoff Address of section header table 00 00 00 00 # e_flags 40 00 # e_ehsize Indicating our 64 Byte header 38 00 # e_phentsize size of a program header table 01 00 # e_phnum number of entries in program table 00 00 # e_shentsize size of a section header table 00 00 # e_shnum number of entries in section table 00 00 # e_shstrndx index of the section names :ELF_program_headers :ELF_program_header__text 01 00 00 00 # ph_type: PT-LOAD = 1 07 00 00 00 # ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 # ph_offset &ELF_base 00 00 00 00 # ph_vaddr &ELF_base 00 00 00 00 # ph_physaddr %ELF_end>ELF_base 00 00 00 00 # ph_filesz %ELF_end>ELF_base 00 00 00 00 # ph_memsz 01 00 00 00 00 00 00 00 # ph_align :ELF_text
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> * This file is part of mescc-tools. * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdlib.h> #include <stdio.h> #include <string.h> #include "M2libc/bootstrappable.h" /* Internal processing Constants */ // CONSTANT max_string 4096 #define max_string 4096 // CONSTANT STR 2 #define STR 2 // CONSTANT NEWLINE 3 #define NEWLINE 3 /* Unique code for each architecture */ // CONSTANT KNIGHT 0 #define KNIGHT 0 // CONSTANT X86 3 #define X86 0x03 // CONSTANT AMD64 62 #define AMD64 0x3E // CONSTANT ARMV7L 40 #define ARMV7L 0x28 // CONSTANT AARM64 183 #define AARM64 0xB7 // CONSTANT PPC64LE 21 #define PPC64LE 0x15 // CONSTANT RISCV32 243 #define RISCV32 0xF3 // CONSTANT RISCV64 65779 #define RISCV64 0x100F3 /* Because RISC-V unlike all other architectures does get a seperate e_machine when changing from 32 to 64bit */ /* How do you want that output? */ // CONSTANT HEX 16 #define HEX 16 // CONSTANT OCTAL 8 #define OCTAL 8 // CONSTANT BINARY 2 #define BINARY 2 /* Imported from stringify.c */ int stringify(char* s, int digits, int divisor, int value, int shift); void LittleEndian(char* start, int ByteMode); struct blob { struct blob* next; int type; char* Text; char* Expression; struct blob* hash_next; }; struct Token { struct Token* next; struct blob* contents; char* filename; int linenumber; }; /* Globals */ FILE* source_file; FILE* destination_file; int BigEndian; int ByteMode; int Architecture; int linenumber; struct Token* token_list; struct blob* blob_list; struct blob* define_blob; struct blob* newline_blob; int blob_count; char* SCRATCH; struct blob** hash_table; void line_error(char* filename, int linenumber) { fputs(filename, stderr); fputs(":", stderr); fputs(int2str(linenumber,10, FALSE), stderr); fputs(" :", stderr); } void ClearScratch() { int i = 0; int c = SCRATCH[i]; while(0 != c) { SCRATCH[i] = 0; i = i + 1; c = SCRATCH[i]; } } int GetHash(char* s) { int i = 5381; while(0 != s[0]) { i = (i << 5) + i + s[0]; s = s + 1; } return i & 0xFFFF; } struct blob* FindBlob() { int hash = GetHash(SCRATCH); struct blob* i = hash_table[hash]; while(NULL != i) { if(match(SCRATCH, i->Text)) return i; i = i->hash_next; } return NULL; } void AddHash(struct blob* a, char* s) { int i = GetHash(s); a->hash_next = hash_table[i]; hash_table[i] = a; } void NewBlob(int size) { blob_count = blob_count + 1; struct blob* a = calloc(1, sizeof(struct blob)); require(NULL != a, "Exhausted available memory\n"); a->Text = calloc(size + 1, sizeof(char)); require(NULL != a->Text, "Exhausted available memory\n"); int i = 0; while(i <= size) { a->Text[i] = SCRATCH[i]; i = i + 1; } a->next = blob_list; blob_list = a; AddHash(a, SCRATCH); } struct Token* newToken(char* filename, int linenumber) { struct Token* p; p = calloc (1, sizeof (struct Token)); require(NULL != p, "Exhausted available memory\n"); p->filename = filename; p->linenumber = linenumber; return p; } struct Token* reverse_list(struct Token* head) { struct Token* root = NULL; struct Token* next; while(NULL != head) { next = head->next; head->next = root; root = head; head = next; } return root; } void purge_lineComment() { int c = fgetc(source_file); while(!in_set(c, "\n\r")) { if(EOF == c) break; c = fgetc(source_file); } } struct Token* append_newline(struct Token* head, char* filename) { linenumber = linenumber + 1; if(NULL == head) return NULL; if(NEWLINE == head->contents->type) {/* Don't waste whitespace*/ return head; } struct Token* lf = newToken(filename, linenumber); lf->contents = newline_blob; lf->next = head; return lf; } struct Token* store_atom(struct Token* head, char c, char* filename) { ClearScratch(); int ch = c; int i = 0; do { SCRATCH[i] = ch; ch = fgetc(source_file); i = i + 1; if(i >= max_string) { fputs("storing atom of size larger than max_string\n", stderr); line_error(filename, linenumber); fputc('\n', stderr); exit(EXIT_FAILURE); } if(EOF == ch) break; } while (!in_set(ch, "\t\n ")); head->contents = FindBlob(); if(NULL == head->contents) { NewBlob(i); head->contents = blob_list; } if('\n' == ch) { return append_newline(head, filename); } return head; } struct blob* store_string(char c, char* filename) { ClearScratch(); int ch = c; int i = 0; do { SCRATCH[i] = ch; i = i + 1; if('\n' == ch) linenumber = linenumber + 1; ch = fgetc(source_file); require(EOF != ch, "Unmatched \"!\n"); if(max_string == i) { line_error(filename, linenumber); fputs("String: ", stderr); fputs(SCRATCH, stderr); fputs(" exceeds max string size\n", stderr); exit(EXIT_FAILURE); } } while(ch != c); struct blob* a = FindBlob(); if(NULL == a) { NewBlob(i); a = blob_list; a->type = STR; } return a; } struct Token* Tokenize_Line(struct Token* head, char* filename) { int c; struct Token* p; linenumber = 1; do { restart: c = fgetc(source_file); if(in_set(c, ";#")) { purge_lineComment(); head = append_newline(head, filename); goto restart; } if(in_set(c, "\t ")) { goto restart; } if('\n' == c) { head = append_newline(head, filename); goto restart; } if(EOF == c) { head = append_newline(head, filename); goto done; } p = newToken(filename, linenumber); p->next = head; if(in_set(c, "'\"")) { p->contents = store_string(c, filename); } else { p = store_atom(p, c, filename); } head = p; } while(TRUE); done: return head; } void line_macro(struct Token* p) { struct Token* i; for(i = p; NULL != i; i = i->next) { if(define_blob == i->contents) { require(NULL != i->next, "Macro name must exist\n"); require(NULL != i->next->next, "Macro value must exist\n"); i->contents = newline_blob; if (STR == i->next->next->contents->type) { i->next->contents->Expression = i->next->next->contents->Text + 1; } else { i->next->contents->Expression = i->next->next->contents->Text; } i->next = i->next->next->next; } } } void hexify_string(struct blob* p) { char* table = "0123456789ABCDEF"; int i = strlen(p->Text); int size; if(HEX == ByteMode) size = (((i << 1) + i) + 12); else if(OCTAL == ByteMode) size = (i << 2) + 1; else if(BINARY == ByteMode) size = (i << 3) + i + 1; else size = 1; require(1 != size, "hexify_string lacked a valid bytemode\n"); char* d = calloc(size, sizeof(char)); require(NULL != d, "Exhausted available memory\n"); p->Expression = d; char* S = p->Text; if((KNIGHT == Architecture) && (HEX == ByteMode)) { i = (((((i - 1) >> 2) + 1) << 3) + i); while( 0 < i) { i = i - 1; d[i] = '0'; } } if(HEX == ByteMode) { while(0 != S[0]) { S = S + 1; d[0] = table[S[0] >> 4]; d[1] = table[S[0] & 0xF]; d[2] = ' '; d = d + 3; } } else if(OCTAL == ByteMode) { while(0 != S[0]) { S = S + 1; d[0] = table[S[0] >> 6]; d[1] = table[(S[0] >> 3) & 0x7]; d[2] = table[S[0] & 0x7]; d[3] = ' '; d = d + 4; } } else if(BINARY == ByteMode) { while(0 != S[0]) { S = S + 1; d[0] = table[S[0] >> 7]; d[1] = table[(S[0] >> 6) & 0x1]; d[2] = table[(S[0] >> 5) & 0x1]; d[3] = table[(S[0] >> 4) & 0x1]; d[4] = table[(S[0] >> 3) & 0x1]; d[5] = table[(S[0] >> 2) & 0x1]; d[6] = table[(S[0] >> 1) & 0x1]; d[7] = table[S[0] & 0x1]; d[8] = ' '; d = d + 9; } } } void process_string(struct blob* p) { struct blob* i; for(i = p; NULL != i; i = i->next) { if(STR == i->type) { if('\'' == i->Text[0]) { i->Expression = i->Text + 1; } else if('"' == i->Text[0]) { hexify_string(i); } } } } char* pad_nulls(int size, char* nil) { if(0 == size) return nil; require(size > 0, "negative null padding not possible\n"); if(HEX == ByteMode) size = size * 2; else if (OCTAL == ByteMode) size = size * 3; else if (BINARY == ByteMode) size = size * 8; char* s = calloc(size + 1, sizeof(char)); require(NULL != s, "Exhausted available memory\n"); int i = 0; while(i < size) { s[i] = '0'; i = i + 1; } return s; } void preserve_other(struct blob* p) { struct blob* i; char c; for(i = p; NULL != i; i = i->next) { if(NULL == i->Expression) { c = i->Text[0]; if(in_set(c, "!@$~%&:^")) { i->Expression = i->Text; } else if('<' == c) { i->Expression = pad_nulls(strtoint(i->Text + 1), i->Text); } } } } void bound_values(int displacement, int number_of_bytes, int low, int high) { if((high < displacement) || (displacement < low)) { fputs("A displacement of ", stderr); fputs(int2str(displacement, 10, TRUE), stderr); fputs(" does not fit in ", stderr); fputs(int2str(number_of_bytes, 10, TRUE), stderr); fputs(" bytes\n", stderr); exit(EXIT_FAILURE); } } void range_check(int displacement, int number_of_bytes, int absolute) { if(4 == number_of_bytes) return; else if(absolute && (3 == number_of_bytes)) { bound_values(displacement, number_of_bytes, -8388609, 16777217); return; } else if(3 == number_of_bytes) { bound_values(displacement, number_of_bytes, -8388609, 8388608); return; } else if(absolute && (2 == number_of_bytes)) { bound_values(displacement, number_of_bytes, -32769, 65536); return; } else if(2 == number_of_bytes) { bound_values(displacement, number_of_bytes, -32769, 32768); return; } else if(absolute && (1 == number_of_bytes)) { bound_values(displacement, number_of_bytes, -1, 256); return; } else if(1 == number_of_bytes) { /* work around current only signed bytes */ bound_values(displacement, number_of_bytes, -129, 256); return; } fputs("Received an invalid number of bytes in range_check\n", stderr); exit(EXIT_FAILURE); } char* express_number(int value, char c) { char* ch = calloc(42, sizeof(char)); require(NULL != ch, "Exhausted available memory\n"); int size; int number_of_bytes; int shift; int absolute = FALSE; if('!' == c) number_of_bytes = 1; else if('@' == c) number_of_bytes = 2; else if('$' == c) { number_of_bytes = 2; absolute = TRUE; } else if('~' == c) number_of_bytes = 3; else if('%' == c) number_of_bytes = 4; else if('&' == c) { number_of_bytes = 4; absolute = TRUE; } else { fputs("Given symbol ", stderr); fputc(c, stderr); fputs(" to express immediate value ", stderr); fputs(int2str(value, 10, TRUE), stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } range_check(value, number_of_bytes, absolute); /* don't truncate prior to range check for -1 behavior */ if('!' == c) value = value & 0xFF; else if(('@' == c) || ('$' == c)) value = value & 0xFFFF; else if('~' == c) value = value & 0xFFFFFF; else if(('%' == c) || ('&' == c)) value = value & 0xFFFFFFFF; if(HEX == ByteMode) { size = number_of_bytes * 2; shift = 4; } else if(OCTAL == ByteMode) { size = number_of_bytes * 3; shift = 3; } else if(BINARY == ByteMode) { size = number_of_bytes * 8; shift = 1; } else { fputs("Got invalid ByteMode in express_number\n", stderr); exit(EXIT_FAILURE); } stringify(ch, size, ByteMode, value, shift); if(!BigEndian) LittleEndian(ch, ByteMode); return ch; } char* express_word(int value, char c) { char* s = calloc(43, sizeof(char)); s[0] = '.'; char* ch = s + 1; require(NULL != ch, "Exhausted available memory\n"); int size; int shift; int immediate; if('!' == c) { /* Corresponds to RISC-V I format */ immediate = (value & 0xFFF) << 20; } else if('@' == c) { /* Corresponds to RISC-V S format */ immediate = ((value & 0x1F) << 7) | ((value & 0xFE0) << (31 - 11)); } else if('~' == c) { /* Corresponds with RISC-V U format */ if ((value & 0xFFF) < 0x800) { immediate = value & 0xFFFFF000; } else { immediate = (value & 0xFFFFF000) + 0x1000; } } else if('%' == c) { /* provides an option for 32bit immediate constants */ immediate = value & 0xFFFFFFFF; /* Drop the leading . */ ch = s; } else { fputs("Given symbol ", stderr); fputc(c, stderr); fputs(" to express immediate value ", stderr); fputs(int2str(value, 10, TRUE), stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } if(HEX == ByteMode) { size = 4 * 2; shift = 4; } else if(OCTAL == ByteMode) { size = 4 * 3; shift = 3; } else if(BINARY == ByteMode) { size = 4 * 8; shift = 1; } else { fputs("Got invalid ByteMode in express_number\n", stderr); exit(EXIT_FAILURE); } stringify(ch, size, ByteMode, immediate, shift); if(!BigEndian) LittleEndian(ch, ByteMode); return s; } void eval_immediates(struct blob* p) { struct blob* i; int value; for(i = p; NULL != i; i = i->next) { if(NEWLINE == i->type) continue; else if('<' == i->Text[0]) continue; else if(NULL == i->Expression) { if((X86 == Architecture) || (AMD64 == Architecture) || (ARMV7L == Architecture) || (AARM64 == Architecture) || (PPC64LE == Architecture)) { if(in_set(i->Text[0], "%~@!&$")) { value = strtoint(i->Text + 1); if(('0' == i->Text[1]) || (0 != value)) { i->Expression = express_number(value, i->Text[0]); } } } else if((RISCV32 == Architecture) || (RISCV64 == Architecture)) { if(in_set(i->Text[0], "%~@!")) { value = strtoint(i->Text + 1); if(('0' == i->Text[1]) || (0 != value)) { i->Expression = express_word(value, i->Text[0]); } } } else if(KNIGHT == Architecture) { value = strtoint(i->Text); if(('0' == i->Text[0]) || (0 != value)) { if(value > 65536) continue; else if(value > 32767) i->Expression = express_number(value, '$'); else i->Expression = express_number(value, '@'); } } else { fputs("Unknown architecture received in eval_immediates\n", stderr); exit(EXIT_FAILURE); } } } } void print_hex(struct Token* p) { struct Token* i; for(i = p; NULL != i; i = i->next) { if(NEWLINE == i->contents->type) { if(NULL == i->next) fputc('\n', destination_file); else if(NEWLINE != i->next->contents->type) fputc('\n', destination_file); } else if(NULL != i->contents->Expression) { fputs(i->contents->Expression, destination_file); if(NEWLINE != i->next->contents->type) fputc(' ', destination_file); } else { line_error(i->filename, i->linenumber); fputs("Received invalid other; ", stderr); fputs(i->contents->Text, stderr); fputs("\n", stderr); exit(EXIT_FAILURE); } } } /* Standard C main program */ int main(int argc, char **argv) { BigEndian = TRUE; Architecture = KNIGHT; destination_file = stdout; ByteMode = HEX; char* filename; char* arch; blob_count = 2; hash_table = calloc(65537, sizeof(struct blob*)); require(NULL != hash_table, "failed to allocate hash_table\n"); /* Create newline blob */ newline_blob = calloc(1, sizeof(struct blob)); require(NULL != newline_blob, "failed to allocate newline_blob\n"); newline_blob->Text = "\n"; newline_blob->Expression = "\n"; newline_blob->type = NEWLINE; AddHash(newline_blob, "\n"); /* Start the blob list with DEFINE and newline */ blob_list = calloc(1, sizeof(struct blob)); require(NULL != blob_list, "failed to allocate DEFINE blob\n"); blob_list->Text = "DEFINE"; define_blob = blob_list; blob_list->next = newline_blob; AddHash(define_blob, "DEFINE"); /* Initialize scratch */ SCRATCH = calloc(max_string + 1, sizeof(char)); require(NULL != SCRATCH, "failed to allocate SCRATCH buffer"); int option_index = 1; while(option_index <= argc) { if(NULL == argv[option_index]) { option_index = option_index + 1; } else if(match(argv[option_index], "--big-endian")) { BigEndian = TRUE; option_index = option_index + 1; } else if(match(argv[option_index], "--little-endian")) { BigEndian = FALSE; option_index = option_index + 1; } else if(match(argv[option_index], "-A") || match(argv[option_index], "--architecture")) { arch = argv[option_index + 1]; if(match("knight-native", arch) || match("knight-posix", arch)) Architecture = KNIGHT; else if(match("x86", arch)) Architecture = X86; else if(match("amd64", arch)) Architecture = AMD64; else if(match("armv7l", arch)) Architecture = ARMV7L; else if(match("aarch64", arch)) Architecture = AARM64; else if(match("ppc64le", arch)) Architecture = PPC64LE; else if(match("riscv32", arch)) Architecture = RISCV32; else if(match("riscv64", arch)) Architecture = RISCV64; else { fputs("Unknown architecture: ", stderr); fputs(arch, stderr); fputs(" know values are: knight-native, knight-posix, x86, amd64, armv7l, aarch64, ppc64le, riscv32 and riscv64", stderr); exit(EXIT_FAILURE); } option_index = option_index + 2; } else if(match(argv[option_index], "-b") || match(argv[option_index], "--binary")) { ByteMode = BINARY; option_index = option_index + 1; } else if(match(argv[option_index], "-h") || match(argv[option_index], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" --file FILENAME1 {-f FILENAME2} (--big-endian|--little-endian) ", stderr); fputs("[--architecture name]\nArchitectures: knight-native, knight-posix, x86, amd64, armv7, riscv32 and riscv64\n", stderr); fputs("To leverage octal or binary output: --octal, --binary\n", stderr); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "-f") || match(argv[option_index], "--file")) { filename = argv[option_index + 1]; source_file = fopen(filename, "r"); if(NULL == source_file) { fputs("The file: ", stderr); fputs(argv[option_index + 1], stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } token_list = Tokenize_Line(token_list, filename); fclose(source_file); option_index = option_index + 2; } else if(match(argv[option_index], "-o") || match(argv[option_index], "--output")) { destination_file = fopen(argv[option_index + 1], "w"); if(NULL == destination_file) { fputs("The file: ", stderr); fputs(argv[option_index + 1], stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } option_index = option_index + 2; } else if(match(argv[option_index], "-O") || match(argv[option_index], "--octal")) { ByteMode = OCTAL; option_index = option_index + 1; } else if(match(argv[option_index], "-V") || match(argv[option_index], "--version")) { fputs("M1 1.5.0\n", stdout); exit(EXIT_SUCCESS); } else { fputs("Unknown option\n", stderr); exit(EXIT_FAILURE); } } if(NULL == token_list) { fputs("Either no input files were given or they were empty\n", stderr); exit(EXIT_FAILURE); } token_list = reverse_list(token_list); line_macro(token_list); process_string(blob_list); eval_immediates(blob_list); preserve_other(blob_list); print_hex(token_list); fclose(destination_file); return EXIT_SUCCESS; }
### Copyright (C) 2016 Jeremiah Orians ### Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> ### This file is part of M2-Planet. ### ### M2-Planet is free software: you can redistribute it and/or modify ### it under the terms of the GNU General Public License as published by ### the Free Software Foundation, either version 3 of the License, or ### (at your option) any later version. ### ### M2-Planet is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU General Public License for more details. ### ### You should have received a copy of the GNU General Public License ### along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. ### stage0's hex2 format ### !<label> 1 byte relative ### $<label> 2 byte address ### @<label> 2 byte relative ### &<label> 4 byte address ### %<label> 4 byte relative ### if you wish to use this header, you need to add :ELF_end to the end of your ### M1 or hex2 files. ## ELF Header :ELF_base 7F 45 4C 46 # e_ident[EI_MAG0-3] ELF's magic number 02 # e_ident[EI_CLASS] Indicating 64 bit 01 # e_ident[EI_DATA] Indicating little endianness 01 # e_ident[EI_VERSION] Indicating original elf 03 # e_ident[EI_OSABI] Set at 3 because FreeBSD is strict 00 # e_ident[EI_ABIVERSION] See above 00 00 00 00 00 00 00 # e_ident[EI_PAD] 02 00 # e_type Indicating Executable 3E 00 # e_machine Indicating AMD64 01 00 00 00 # e_version Indicating original elf &_start 00 00 00 00 # e_entry Address of the entry point %ELF_program_headers>ELF_base 00 00 00 00 # e_phoff Address of program header table %ELF_section_headers>ELF_base 00 00 00 00 # e_shoff Address of section header table 00 00 00 00 # e_flags 40 00 # e_ehsize Indicating our 64 Byte header 38 00 # e_phentsize size of a program header table 01 00 # e_phnum number of entries in program table 40 00 # e_shentsize size of a section header table 05 00 # e_shnum number of entries in section table 02 00 # e_shstrndx index of the section names :ELF_program_headers :ELF_program_header__text 01 00 00 00 # ph_type: PT-LOAD = 1 07 00 00 00 # ph_flags: PF-X|PF-W|PF-R = 7 00 00 00 00 00 00 00 00 # ph_offset &ELF_base 00 00 00 00 # ph_vaddr &ELF_base 00 00 00 00 # ph_physaddr %ELF_end>ELF_base 00 00 00 00 # ph_filesz %ELF_end>ELF_base 00 00 00 00 # ph_memsz 01 00 00 00 00 00 00 00 # ph_align :ELF_text
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_TYPES_H #define _SYS_TYPES_H #ifndef __M2__ #include "../gcc_req.h" #endif typedef SCM ulong; typedef long ssize_t; typedef int pid_t; typedef long intptr_t; typedef ulong uintptr_t; typedef long clock_t; typedef int mode_t; typedef long dev_t; #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _STDDEF_H #define _STDDEF_H #include <sys/types.h> #define NULL 0 typedef long ptrdiff_t; typedef ulong size_t; #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_rax, %2" "syscall"); } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _FCNTL_C #define _FCNTL_C #ifdef __M2__ #if __uefi__ #include <uefi/fcntl.c> #elif __i386__ #include <x86/linux/fcntl.c> #elif __x86_64__ #include <amd64/linux/fcntl.c> #elif __arm__ #include <armv7l/linux/fcntl.c> #elif __aarch64__ #include <aarch64/linux/fcntl.c> #elif __riscv && __riscv_xlen==32 #include <riscv32/linux/fcntl.c> #elif __riscv && __riscv_xlen==64 #include <riscv64/linux/fcntl.c> #elif __knight_posix__ #include <knight/linux/fcntl.c> #elif __knight__ #include <knight/native/fcntl.c> #else #error arch not supported #endif #else extern int _open(char* name, int flag, int mode); #endif int errno; int open(char* name, int flag, int mode) { int fd = _open(name, flag, mode); if(0 > fd) { errno = -fd; fd = -1; } return fd; } #endif
/* Copyright (C) 2024 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UTSNAME_H #define _UTSNAME_H struct utsname { char sysname[65]; /* Operating system name (e.g., "Linux") */ char nodename[65]; /* Name within "some implementation-defined network" */ char release[65]; /* Operating system release (e.g., "2.6.28") */ char version[65]; /* Operating system version */ char machine[65]; /* Hardware identifier */ }; #ifdef __M2__ /* already exists in $ARCH/linux/unistd.c */ #else int uname(struct utsname*); #endif #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_C #define _UNISTD_C #include <sys/utsname.h> #define NULL 0 #define __PATH_MAX 4096 void* malloc(unsigned size); int access(char* pathname, int mode) { asm("lea_rdi,[rsp+DWORD] %16" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %8" "mov_rsi,[rsi]" "mov_rax, %21" "syscall"); } int chdir(char* path) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %80" "syscall"); } int fchdir(int fd) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %81" "syscall"); } void _exit(int value); int fork() { asm("mov_rax, %57" "mov_rdi, %0" "syscall"); } int waitpid (int pid, int* status_ptr, int options) { /* Uses wait4 with struct rusage *ru set to NULL */ asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_r10, %0" "mov_rax, %61" "syscall"); } int execve(char* file_name, char** argv, char** envp) { asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_rax, %59" "syscall"); } int read(int fd, char* buf, unsigned count) { /*maybe*/ asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_rax, %0" "syscall"); } int write(int fd, char* buf, unsigned count) {/*maybe*/ asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_rax, %1" "syscall"); } int lseek(int fd, int offset, int whence) { asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_rax, %8" "syscall"); } int close(int fd) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %3" "syscall"); } int unlink (char* filename) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %87" "syscall"); } int _getcwd(char* buf, int size) { asm("lea_rdi,[rsp+DWORD] %16" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %8" "mov_rsi,[rsi]" "mov_rax, %79" "syscall"); } char* getcwd(char* buf, unsigned size) { int c = _getcwd(buf, size); if(0 == c) return NULL; return buf; } char* getwd(char* buf) { return getcwd(buf, __PATH_MAX); } char* get_current_dir_name() { return getcwd(malloc(__PATH_MAX), __PATH_MAX); } int brk(void *addr) { asm("mov_rax,[rsp+DWORD] %8" "push_rax" "mov_rax, %12" "pop_rbx" "mov_rdi,rbx" "syscall"); } int uname(struct utsname* unameData) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %63" "syscall"); } int unshare(int flags) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %272" "syscall"); } int geteuid() { asm("mov_rax, %107" "syscall"); } int getegid() { asm("mov_rax, %108" "syscall"); } int mount(char *source, char *target, char *filesystemtype, SCM mountflags, void *data) { asm("lea_rdi,[rsp+DWORD] %40" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %32" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %24" "mov_rdx,[rdx]" "lea_r10,[rsp+DWORD] %16" "mov_r10,[r10]" "lea_r8,[rsp+DWORD] %8" "mov_r8,[r8]" "mov_rax, %165" "syscall"); } int chroot(char *path) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %161" "syscall"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_C #define _SYS_STAT_C #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode) { asm("lea_rdi,[rsp+DWORD] %16" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %8" "mov_rsi,[rsi]" "mov_rax, %90" "syscall"); } int fchmod(int a, mode_t b) { asm("lea_rdi,[rsp+DWORD] %16" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %8" "mov_rsi,[rsi]" "mov_rax, %91" "syscall"); } int mkdir(char const* a, mode_t b) { asm("lea_rdi,[rsp+DWORD] %16" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %8" "mov_rsi,[rsi]" "mov_rax, %83" "syscall"); } int mknod(char const* a, mode_t b, dev_t c) { asm("lea_rdi,[rsp+DWORD] %24" "mov_rdi,[rdi]" "lea_rsi,[rsp+DWORD] %16" "mov_rsi,[rsi]" "lea_rdx,[rsp+DWORD] %8" "mov_rdx,[rdx]" "mov_rax, %133" "syscall"); } mode_t umask(mode_t m) { asm("lea_rdi,[rsp+DWORD] %8" "mov_rdi,[rdi]" "mov_rax, %95" "syscall"); } #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include <unistd.h> #include <sys/stat.h> #include <fcntl.h> #define EXIT_FAILURE 1 #define EXIT_SUCCESS 0 #define _IN_USE 1 #define _NOT_IN_USE 0 typedef char wchar_t; void exit(int value); struct _malloc_node { struct _malloc_node *next; void* block; size_t size; int used; }; struct _malloc_node* _allocated_list; struct _malloc_node* _free_list; /******************************** * The core POSIX malloc * ********************************/ long _malloc_ptr; long _brk_ptr; void* _malloc_brk(unsigned size) { if(NULL == _brk_ptr) { _brk_ptr = brk(0); _malloc_ptr = _brk_ptr; } if(_brk_ptr < _malloc_ptr + size) { _brk_ptr = brk(_malloc_ptr + size); if(-1 == _brk_ptr) return 0; } long old_malloc = _malloc_ptr; _malloc_ptr = _malloc_ptr + size; return old_malloc; } void __init_malloc() { _free_list = NULL; _allocated_list = NULL; return; } /************************************************************************ * Handle with the tricky insert behaviors for our nodes * * As free lists must be sorted from smallest to biggest to enable * * cheap first fit logic * * The free function however is rarely called, so it can kick sand and * * do things the hard way * ************************************************************************/ void _malloc_insert_block(struct _malloc_node* n, int used) { /* Allocated block doesn't care about order */ if(_IN_USE == used) { /* Literally just be done as fast as possible */ n->next = _allocated_list; _allocated_list = n; return; } /* sanity check garbage */ if(_NOT_IN_USE != used) exit(EXIT_FAILURE); if(_NOT_IN_USE != n->used) exit(EXIT_FAILURE); if(NULL != n->next) exit(EXIT_FAILURE); /* Free block really does care about order */ struct _malloc_node* i = _free_list; struct _malloc_node* last = NULL; while(NULL != i) { /* sort smallest to largest */ if(n->size <= i->size) { /* Connect */ n->next = i; /* If smallest yet */ if(NULL == last) _free_list = n; /* or just another average block */ else last->next = n; return; } /* iterate */ last = i; i = i->next; } /* looks like we are the only one */ if(NULL == last) _free_list = n; /* or we are the biggest yet */ else last->next = n; } /************************************************************************ * We only mark a block as unused, we don't actually deallocate it here * * But rather shove it into our _free_list * ************************************************************************/ void free(void* ptr) { /* just in case someone needs to quickly turn it off */ #ifndef _MALLOC_DISABLE_FREE struct _malloc_node* i = _allocated_list; struct _malloc_node* last = NULL; /* walk the whole freaking list if needed to do so */ while(NULL != i) { /* did we find it? */ if(i->block == ptr) { /* detach the block */ if(NULL == last) _allocated_list = i->next; /* in a way that doesn't break the allocated list */ else last->next = i->next; /* insert into free'd list */ i->used = _NOT_IN_USE; i->next = NULL; _malloc_insert_block(i, _NOT_IN_USE); return; } /* iterate */ last = i; i = i->next; } /* we received a pointer to a block that wasn't allocated */ /* Bail *HARD* because I don't want to cover this edge case */ exit(EXIT_FAILURE); #endif /* if free is disabled, there is nothing to do */ return; } /************************************************************************ * find if there is any "FREED" blocks big enough to sit on our memory * * budget's face and ruin its life. Respectfully of course * ************************************************************************/ void* _malloc_find_free(unsigned size) { struct _malloc_node* i = _free_list; struct _malloc_node* last = NULL; /* Walk the whole list if need be */ while(NULL != i) { /* see if anything in it is equal or bigger than what I need */ if((_NOT_IN_USE == i->used) && (i->size > size)) { /* disconnect from list ensuring we don't break free doing so */ if(NULL == last) _free_list = i->next; else last->next = i->next; /* insert into allocated list */ i->used = _IN_USE; i->next = NULL; _malloc_insert_block(i, _IN_USE); return i->block; } /* iterate (will loop forever if you get this wrong) */ last = i; i = i->next; } /* Couldn't find anything big enough */ return NULL; } /************************************************************************ * Well we couldn't find any memory good enough to satisfy our needs so * * we are going to have to go beg for some memory on the street corner * ************************************************************************/ void* _malloc_add_new(unsigned size) { struct _malloc_node* n; #ifdef __uefi__ n = _malloc_uefi(sizeof(struct _malloc_node)); /* Check if we were beaten */ if(NULL == n) return NULL; n->block = _malloc_uefi(size); #else n = _malloc_brk(sizeof(struct _malloc_node)); /* Check if we were beaten */ if(NULL == n) return NULL; n->block = _malloc_brk(size); #endif /* check if we were robbed */ if(NULL == n->block) return NULL; /* Looks like we made it home safely */ n->size = size; n->next = NULL; n->used = _IN_USE; /* lets pop the cork and party */ _malloc_insert_block(n, _IN_USE); return n->block; } /************************************************************************ * Safely iterates over all malloc nodes and frees them * ************************************************************************/ void __malloc_node_iter(struct _malloc_node* node, FUNCTION _free) { struct _malloc_node* current; while(node != NULL) { current = node; node = node->next; _free(current->block); _free(current); } } /************************************************************************ * Runs a callback with all previously allocated nodes. * * This can be useful if operating system does not do any clean up. * ************************************************************************/ void* _malloc_release_all(FUNCTION _free) { __malloc_node_iter(_allocated_list, _free); __malloc_node_iter(_free_list, _free); } /************************************************************************ * Provide a POSIX standardish malloc function to keep things working * ************************************************************************/ void* malloc(unsigned size) { /* skip allocating nothing */ if(0 == size) return NULL; /* use one of the standard block sizes */ size_t max = 1 << 30; size_t used = 256; while(used < size) { used = used << 1; /* fail big allocations */ if(used > max) return NULL; } /* try the cabinets around the house */ void* ptr = _malloc_find_free(used); /* looks like we need to get some more from the street corner */ if(NULL == ptr) { ptr = _malloc_add_new(used); } /* hopefully you can handle NULL pointers, good luck */ return ptr; } /************************************************************************ * Provide a POSIX standardish memset function to keep things working * ************************************************************************/ void* memset(void* ptr, int value, int num) { char* s; /* basically walk the block 1 byte at a time and set it to any value you want */ for(s = ptr; 0 < num; num = num - 1) { s[0] = value; s = s + 1; } return ptr; } /************************************************************************ * Provide a POSIX standardish calloc function to keep things working * ************************************************************************/ void* calloc(int count, int size) { /* if things get allocated, we are good*/ void* ret = malloc(count * size); /* otherwise good luck */ if(NULL == ret) return NULL; memset(ret, 0, (count * size)); return ret; } /* USED EXCLUSIVELY BY MKSTEMP */ void __set_name(char* s, int i) { s[5] = '0' + (i % 10); i = i / 10; s[4] = '0' + (i % 10); i = i / 10; s[3] = '0' + (i % 10); i = i / 10; s[2] = '0' + (i % 10); i = i / 10; s[1] = '0' + (i % 10); i = i / 10; s[0] = '0' + i; } /************************************************************************ * Provide a POSIX standardish mkstemp function to keep things working * ************************************************************************/ int mkstemp(char *template) { /* get length of template */ int i = 0; while(0 != template[i]) i = i + 1; i = i - 1; /* String MUST be more than 6 characters in length */ if(i < 6) return -1; /* Sanity check the string matches the template requirements */ int count = 6; int c; while(count > 0) { c = template[i]; /* last 6 chars must be X */ if('X' != c) return -1; template[i] = '0'; i = i - 1; count = count - 1; } int fd = -1; count = -1; /* open will return -17 or other values */ while(0 > fd) { /* Just give up after the planet has blown up */ if(9000 < count) return -1; /* Try up to 9000 unique filenames before stopping */ count = count + 1; __set_name(template+i+1, count); /* Pray we can */ fd = open(template, O_RDWR | O_CREAT | O_EXCL, 00600); } /* well that only took count many tries */ return fd; } /************************************************************************ * wcstombs - convert a wide-character string to a multibyte string * * because seriously UEFI??? UTF-16 is a bad design choice but I guess * * they were drinking pretty hard when they designed UEFI; it is DOS * * but somehow they magically found ways of making it worse * ************************************************************************/ size_t wcstombs(char* dest, char* src, size_t n) { int i = 0; do { /* UTF-16 is 2bytes per char and that first byte maps good enough to ASCII */ dest[i] = src[2 * i]; if(dest[i] == 0) { break; } i = i + 1; n = n - 1; } while (n != 0); return i; } /************************************************************************ * getenv - get an environmental variable * ************************************************************************/ size_t _strlen(char const* str) { size_t i = 0; while(0 != str[i]) i = i + 1; return i; } int _strncmp(char const* lhs, char const* rhs, size_t count) { size_t i = 0; while(count > i) { if(0 == lhs[i]) break; if(lhs[i] != rhs[i]) return lhs[i] - rhs[i]; i = i + 1; } return 0; } char** _envp; char* getenv (char const* name) { char** p = _envp; char* q; int length = _strlen(name); while (p[0] != 0) { if(_strncmp(name, p[0], length) == 0) { q = p[0] + length; if(q[0] == '=') return q + 1; } p += sizeof(char**); /* M2 pointer arithemtic */ } return 0; } /************************************************************************ * setenv - set an environmental variable * ************************************************************************/ char* _strcpy(char* dest, char const* src) { int i = 0; while (0 != src[i]) { dest[i] = src[i]; i = i + 1; } dest[i] = 0; return dest; } int setenv(char const *s, char const *v, int overwrite_p) { char** p = _envp; int length = _strlen(s); char* q; while (p[0] != 0) { if (_strncmp (s, p[0], length) == 0) { q = p[0] + length; if (q[0] == '=') break; } p += sizeof(char**); /* M2 pointer arithemtic */ } char *entry = malloc (length + _strlen(v) + 2); int end_p = p[0] == 0; p[0] = entry; _strcpy(entry, s); _strcpy(entry + length, "="); _strcpy(entry + length + 1, v); entry[length + _strlen(v) + 2] = 0; if (end_p != 0) p[1] = 0; return 0; }
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _STDIO_H #define _STDIO_H #ifdef __M2__ /* Actual format of FILE */ struct __IO_FILE { int fd; int bufmode; /* O_RDONLY = 0, O_WRONLY = 1 */ int bufpos; int file_pos; int buflen; char* buffer; struct __IO_FILE* next; struct __IO_FILE* prev; }; /* Now give us the FILE we all love */ typedef struct __IO_FILE FILE; #include <stdio.c> #else #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #include <stdlib.h> /* Required constants */ /* For file I/O*/ #define EOF -1 #define BUFSIZ 4096 /* For lseek */ #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 /* Actual format of FILE */ struct __IO_FILE { int fd; int bufmode; /* 0 = no buffer, 1 = read, 2 = write */ int bufpos; int buflen; char* buffer; }; /* Now give us the FILE we all love */ typedef struct __IO_FILE FILE; /* Required variables */ extern FILE* stdin; extern FILE* stdout; extern FILE* stderr; /* Standard C functions */ /* Getting */ extern int fgetc(FILE* f); extern int getchar(); extern char* fgets(char* str, int count, FILE* stream); extern size_t fread( void* buffer, size_t size, size_t count, FILE* stream ); /* Putting */ extern void fputc(char s, FILE* f); extern void putchar(char s); extern int fputs(char const* str, FILE* stream); extern int puts(char const* str); extern size_t fwrite(void const* buffer, size_t size, size_t count, FILE* stream ); /* File management */ extern FILE* fopen(char const* filename, char const* mode); extern int fclose(FILE* stream); extern int fflush(FILE* stream); /* File Positioning */ extern int ungetc(int ch, FILE* stream); extern long ftell(FILE* stream); extern int fseek(FILE* f, long offset, int whence); extern void rewind(FILE* f); #endif #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include <stddef.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #include <stdlib.h> /* Required constants */ /* For file I/O*/ #define EOF 0xFFFFFFFF #define BUFSIZ 0x1000 /* For lseek */ #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 /* Required variables */ FILE* stdin; FILE* stdout; FILE* stderr; FILE* __list; void __init_io() { __list = NULL; stdin = calloc(1, sizeof(FILE)); stdin->fd = STDIN_FILENO; stdin->bufmode = O_RDONLY; stdin->buflen = 1; stdin->buffer = calloc(2, sizeof(char)); stdout = calloc(1, sizeof(FILE)); stdout->fd = STDOUT_FILENO; stdout->bufmode = O_WRONLY; stdout->buflen = 512; stdout->buffer = calloc(514, sizeof(char)); stderr = calloc(1, sizeof(FILE)); stderr->fd = STDERR_FILENO; stderr->bufmode = O_WRONLY; stderr->buflen = 512; stderr->buffer = calloc(514, sizeof(char)); } /* Flush all IO on exit */ int fflush(FILE* stream); void __kill_io() { fflush(stdout); fflush(stderr); while(NULL != __list) { fflush(__list); __list = __list->next; } } /* Standard C functions */ /* Getting */ int read(int fd, char* buf, unsigned count); int fgetc(FILE* f) { /* Only read on read buffers */ if(O_WRONLY == f->bufmode) return EOF; /* Deal with stdin */ if(STDIN_FILENO == f->fd) { f->bufpos = 0; int r = read(f->fd, f->buffer, 1); /* Catch special case of STDIN gets nothing (AN EOF) */ if(0 == r) return EOF; } /* Catch EOF */ if(f->buflen <= f->bufpos) return EOF; /* Deal with standard case */ int ret = f->buffer[f->bufpos]; f->bufpos = f->bufpos + 1; /* Ensure 0xFF doesn't return EOF */ return (ret & 0xFF); } size_t fread( void* buffer, size_t size, size_t count, FILE* stream ) { if(0 == size) return 0; if(0 == count) return 0; long n = size + count - 1; char* p = buffer; long i; unsigned c; for(i = 0; i < n; i = i + 1) { c = fgetc(stream); if(EOF == c) return (i/size); p[i] = c; } return (i/size); } int getchar() { return fgetc(stdin); } char* fgets(char* str, int count, FILE* stream) { int i = 0; int ch; while(i < count) { ch = fgetc(stream); if(EOF == ch) { /* Return null if EOF is first char read */ if (i == 0) return NULL; break; } str[i] = ch; i = i + 1; if('\n' == ch) break; } return str; } /* Putting */ void fputc(char s, FILE* f) { /* Only write on write buffers */ if(O_RDONLY == f->bufmode) return; /* Add to buffer */ f->buffer[f->bufpos] = s; f->bufpos = f->bufpos + 1; /* Flush if full or '\n' */ if(f->bufpos == f->buflen) fflush(f); else if(('\n' == s) && (2 >= f->fd)) fflush(f); } size_t fwrite(void const* buffer, size_t size, size_t count, FILE* stream ) { long n = size * count; if(0 == n) return 0; char* p = buffer; int c; long i; for(i=0; i < n; i = i + 1) { c = p[i]; fputc(c, stream); } return (i/size); } void putchar(char s) { fputc(s, stdout); } int fputs(char const* str, FILE* stream) { while(0 != str[0]) { fputc(str[0], stream); str = str + 1; } return 0; } int puts(char const* str) { fputs(str, stdout); fputc('\n', stdout); return 0; } int lseek(int fd, int offset, int whence); /* File management */ FILE* fopen(char const* filename, char const* mode) { int f; FILE* fi = calloc(1, sizeof(FILE)); fi->next = __list; if(NULL != __list) __list->prev = fi; __list = fi; int size; if('w' == mode[0]) f = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 00600); else f = open(filename, 0, 0); /* Everything else is a read */ /* Negative numbers are error codes */ if(0 > f) { return 0; } if('w' == mode[0]) { /* Buffer as much as possible */ fi->buffer = malloc(BUFSIZ * sizeof(char)); fi->buflen = BUFSIZ; fi->bufmode = O_WRONLY; } else { /* Get enough buffer to read it all */ size = lseek(f, 0, SEEK_END); fi->buffer = malloc((size + 1) * sizeof(char)); fi->buflen = size; fi->bufmode = O_RDONLY; /* Now read it all */ lseek(f, 0, SEEK_SET); read(f, fi->buffer, size); } fi->fd = f; return fi; } FILE* fdopen(int fd, char* mode) { FILE* fi = calloc(1, sizeof(FILE)); fi->next = __list; if(NULL != __list) __list->prev = fi; __list = fi; int size; if('w' == mode[0]) { /* Buffer as much as possible */ fi->buffer = malloc(BUFSIZ * sizeof(char)); fi->buflen = BUFSIZ; fi->bufmode = O_WRONLY; } else { /* Get enough buffer to read it all */ size = lseek(fd, 0, SEEK_END); fi->buffer = malloc((size + 1) * sizeof(char)); fi->buflen = size; fi->bufmode = O_RDONLY; /* Now read it all */ lseek(fd, 0, SEEK_SET); read(fd, fi->buffer, size); } fi->fd = fd; return fi; } int write(int fd, char* buf, unsigned count); int fflush(FILE* stream) { /* We only need to flush on writes */ if(O_RDONLY == stream->bufmode) return 0; /* If nothing to flush */ if(0 ==stream->bufpos) return 0; /* The actual flushing */ int error = write(stream->fd, stream->buffer, stream->bufpos); /* Keep track of position */ stream->file_pos = stream->file_pos + stream->bufpos; stream->bufpos = 0; return error; } int close(int fd); int fclose(FILE* stream) { /* Deal with STDIN, STDOUT and STDERR */ /* No close for you */ if(2 >= stream->fd) return 0; /* We only need to flush on writes */ if(O_WRONLY == stream->bufmode) { fflush(stream); } /* Need to keep the File Descriptor for a moment */ int fd = stream->fd; /* Remove from __list */ if(NULL != stream->prev) stream->prev->next = stream->next; if(NULL != stream->next) stream->next->prev = stream->prev; /* Deal with special case of first node in __list */ if (__list == stream) __list = __list->next; /* Free up the buffer and struct used for FILE */ free(stream->buffer); free(stream); /* Do the actual closing */ return close(fd); } int unlink(char* filename); /* File Removal */ int remove(char *pathname) { return unlink(pathname); } /* File Positioning */ int ungetc(int ch, FILE* stream) { /* Deal with STDIN, STDOUT and STDERR */ /* No ungetc for you */ if(2 >= stream->fd) return EOF; /* You can't unget on a write stream! */ if(O_WRONLY == stream->bufmode) return EOF; /* Don't underflow */ if(0 == stream->bufpos) return EOF; /* Don't let crap be shoved into read stream */ if(stream->buffer[stream->bufpos - 1] != ch) return EOF; stream->bufpos = stream->bufpos - 1; return ch; } long ftell(FILE* stream) { /* Deal with STDIN, STDOUT and STDERR */ /* No ftell for you */ if(2 >= stream->fd) return 0; /* Deal with buffered output */ if(O_WRONLY == stream->bufmode) return stream->file_pos + stream->bufpos; /* Deal with read */ return stream->bufpos; } int fseek(FILE* f, long offset, int whence) { /* Deal with STDIN, STDOUT and STDERR */ /* No seek and destroy missions */ if(2 >= f->fd) return 0; /* Deal with ugly case */ if(O_WRONLY == f->bufmode) { fflush(f); return lseek(f->fd, offset, whence); } /* Deal with read mode */ int pos; if(SEEK_SET == whence) { pos = offset; } else if(SEEK_CUR == whence) { pos = f->bufpos + offset; } else if(SEEK_END == whence) { pos = f->buflen + offset; } else return -1; if(pos < 0) return -1; if(pos > f->buflen) return -1; f->bufpos = pos; return pos; } void rewind(FILE* f) { fseek(f, 0, SEEK_SET); }
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2017 Jeremiah Orians * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/stat.h> #include "M2libc/bootstrappable.h" #define max_string 4096 #define TRUE 1 #define FALSE 0 #define KNIGHT 0 #define X86 0x03 #define AMD64 0x3E #define ARMV7L 0x28 #define AARM64 0xB7 #define PPC64LE 0x15 #define RISCV32 0xF3 #define RISCV64 0x100F3 /* Because RISC-V unlike all other architectures does get a seperate e_machine when changing from 32 to 64bit */ #define HEX 16 #define OCTAL 8 #define BINARY 2 struct input_files { struct input_files* next; char* filename; }; struct entry { struct entry* next; unsigned target; char* name; };
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2017 Jeremiah Orians * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include "hex2_globals.h" /* Globals */ FILE* output; struct entry** jump_tables; int BigEndian; int Base_Address; int Architecture; int ByteMode; int exec_enable; int ip; char* scratch; char* filename; int linenumber; int ALIGNED; /* For processing bytes */ int hold; int toggle; void line_error() { fputs(filename, stderr); fputs(":", stderr); fputs(int2str(linenumber, 10, FALSE), stderr); fputs(" :", stderr); } int consume_token(FILE* source_file) { int i = 0; int c = fgetc(source_file); while(!in_set(c, " \t\n>")) { scratch[i] = c; i = i + 1; c = fgetc(source_file); require(max_string > i, "Consumed token exceeds length restriction\n"); if(EOF == c) break; } return c; } int Throwaway_token(FILE* source_file) { int c; do { c = fgetc(source_file); if(EOF == c) break; } while(!in_set(c, " \t\n>")); return c; } int length(char* s) { int i = 0; while(0 != s[i]) i = i + 1; return i; } void Clear_Scratch(char* s) { do { s[0] = 0; s = s + 1; } while(0 != s[0]); } void Copy_String(char* a, char* b) { while(0 != a[0]) { b[0] = a[0]; a = a + 1; b = b + 1; } } int GetHash(char* s) { int i = 5381; while(0 != s[0]) { i = i * 31 + s[0]; s = s + 1; } return (i & 0xFFFF); } unsigned GetTarget(char* c) { struct entry* i; for(i = jump_tables[GetHash(c)]; NULL != i; i = i->next) { if(match(c, i->name)) { return i->target; } } fputs("Target label ", stderr); fputs(c, stderr); fputs(" is not valid\n", stderr); exit(EXIT_FAILURE); } int storeLabel(FILE* source_file, int ip) { struct entry* entry = calloc(1, sizeof(struct entry)); require(NULL != entry, "failed to allocate entry\n"); /* Ensure we have target address */ entry->target = ip; /* Store string */ int c = consume_token(source_file); entry->name = calloc(length(scratch) + 1, sizeof(char)); require(NULL != entry->name, "failed to allocate entry->name\n"); Copy_String(scratch, entry->name); Clear_Scratch(scratch); /* Prepend to list */ int h = GetHash(entry->name); entry->next = jump_tables[h]; jump_tables[h] = entry; return c; } void range_check(int displacement, int number_of_bytes, int absolute) { if(4 == number_of_bytes) return; else if (absolute && (3 == number_of_bytes)) { /* Deal with unsigned */ if((16777215 < displacement) || (displacement < 0)) { fputs("An absolute displacement of ", stderr); fputs(int2str(displacement, 10, TRUE), stderr); fputs(" does not fit in 3 bytes\n", stderr); exit(EXIT_FAILURE); } return; } else if (3 == number_of_bytes) { /* Deal with signed */ if((8388607 < displacement) || (displacement < -8388608)) { fputs("A relative displacement of ", stderr); fputs(int2str(displacement, 10, TRUE), stderr); fputs(" does not fit in 3 bytes\n", stderr); exit(EXIT_FAILURE); } return; } else if (absolute && (2 == number_of_bytes)) { /* Deal with unsigned */ if((65535 < displacement) || (displacement < 0)) { fputs("An absolute displacement of ", stderr); fputs(int2str(displacement, 10, TRUE), stderr); fputs(" does not fit in 2 bytes\n", stderr); exit(EXIT_FAILURE); } return; } else if (2 == number_of_bytes) { /* Deal with signed */ if((32767 < displacement) || (displacement < -32768)) { fputs("A relative displacement of ", stderr); fputs(int2str(displacement, 10, TRUE), stderr); fputs(" does not fit in 2 bytes\n", stderr); exit(EXIT_FAILURE); } return; } else if (absolute && (1 == number_of_bytes)) { /* Deal with unsigned */ if((255 < displacement) || (displacement < 0)) { fputs("An absolute displacement of ", stderr); fputs(int2str(displacement, 10, TRUE), stderr); fputs(" does not fit in 1 byte\n", stderr); exit(EXIT_FAILURE); } return; } else if (1 == number_of_bytes) { /* Deal with signed */ if((127 < displacement) || (displacement < -128)) { fputs("A relative displacement of ", stderr); fputs(int2str(displacement, 10, TRUE), stderr); fputs(" does not fit in 1 byte\n", stderr); exit(EXIT_FAILURE); } return; } fputs("Invalid number of bytes given\n", stderr); exit(EXIT_FAILURE); } void outputPointer(int displacement, int number_of_bytes, int absolute) { unsigned value = displacement; /* HALT HARD if we are going to do something BAD*/ range_check(displacement, number_of_bytes, absolute); if(BigEndian) { /* Deal with BigEndian */ if(4 == number_of_bytes) fputc((value >> 24), output); if(3 <= number_of_bytes) fputc(((value >> 16)%256), output); if(2 <= number_of_bytes) fputc(((value >> 8)%256), output); if(1 <= number_of_bytes) fputc((value % 256), output); } else { /* Deal with LittleEndian */ unsigned byte; while(number_of_bytes > 0) { byte = value % 256; value = value / 256; fputc(byte, output); number_of_bytes = number_of_bytes - 1; } } } int Architectural_displacement(int target, int base) { if(KNIGHT == Architecture) return (target - base); else if(X86 == Architecture) return (target - base); else if(AMD64 == Architecture) return (target - base); else if(ALIGNED && (ARMV7L == Architecture)) { ALIGNED = FALSE; /* Note: Branch displacements on ARM are in number of instructions to skip, basically. */ if (target & 3) { line_error(); fputs("error: Unaligned branch target: ", stderr); fputs(scratch, stderr); fputs(", aborting\n", stderr); exit(EXIT_FAILURE); } /* * The "fetch" stage already moved forward by 8 from the * beginning of the instruction because it is already * prefetching the next instruction. * Compensate for it by subtracting the space for * two instructions (including the branch instruction). * and the size of the aligned immediate. */ return (((target - base + (base & 3)) >> 2) - 2); } else if(ARMV7L == Architecture) { /* * The size of the offset is 8 according to the spec but that value is * based on the end of the immediate, which the documentation gets wrong * and needs to be adjusted to the size of the immediate. * Eg 1byte immediate => -8 + 1 = -7 */ return ((target - base) - 8 + (3 & base)); } else if(ALIGNED && (AARM64 == Architecture)) { ALIGNED = FALSE; return (target - (~3 & base)) >> 2; } else if (AARM64 == Architecture) { return ((target - base) - 8 + (3 & base)); } else if(ALIGNED && (PPC64LE == Architecture)) { ALIGNED = FALSE; /* set Link register with branch */ return (target - (base & 0xFFFFFFFC )) | 1; } else if(PPC64LE == Architecture) { /* DO *NOT* set link register with branch */ return (target - (base & 0xFFFFFFFC)); } else if(RISCV32 == Architecture || RISCV64 == Architecture) return (target - base); fputs("Unknown Architecture, aborting before harm is done\n", stderr); exit(EXIT_FAILURE); } void Update_Pointer(char ch) { /* Calculate pointer size*/ if(in_set(ch, "%&")) ip = ip + 4; /* Deal with % and & */ else if(in_set(ch, "@$")) ip = ip + 2; /* Deal with @ and $ */ else if('~' == ch) ip = ip + 3; /* Deal with ~ */ else if('!' == ch) ip = ip + 1; /* Deal with ! */ else { line_error(); fputs("storePointer given unknown\n", stderr); exit(EXIT_FAILURE); } } void storePointer(char ch, FILE* source_file) { /* Get string of pointer */ Clear_Scratch(scratch); Update_Pointer(ch); int base_sep_p = consume_token(source_file); /* Lookup token */ int target = GetTarget(scratch); int displacement; int base = ip; /* Change relative base address to :<base> */ if ('>' == base_sep_p) { Clear_Scratch(scratch); consume_token (source_file); base = GetTarget (scratch); /* Force universality of behavior */ displacement = (target - base); } else { displacement = Architectural_displacement(target, base); } /* output calculated difference */ if('!' == ch) outputPointer(displacement, 1, FALSE); /* Deal with ! */ else if('$' == ch) outputPointer(target, 2, TRUE); /* Deal with $ */ else if('@' == ch) outputPointer(displacement, 2, FALSE); /* Deal with @ */ else if('~' == ch) outputPointer(displacement, 3, FALSE); /* Deal with ~ */ else if('&' == ch) outputPointer(target, 4, TRUE); /* Deal with & */ else if('%' == ch) outputPointer(displacement, 4, FALSE); /* Deal with % */ else { line_error(); fputs("error: storePointer reached impossible case: ch=", stderr); fputc(ch, stderr); fputs("\n", stderr); exit(EXIT_FAILURE); } } void line_Comment(FILE* source_file) { int c = fgetc(source_file); while(!in_set(c, "\n\r")) { if(EOF == c) break; c = fgetc(source_file); } linenumber = linenumber + 1; } int hex(int c, FILE* source_file) { if (in_set(c, "0123456789")) return (c - 48); else if (in_set(c, "abcdef")) return (c - 87); else if (in_set(c, "ABCDEF")) return (c - 55); else if (in_set(c, "#;")) line_Comment(source_file); else if ('\n' == c) linenumber = linenumber + 1; return -1; } int octal(int c, FILE* source_file) { if (in_set(c, "01234567")) return (c - 48); else if (in_set(c, "#;")) line_Comment(source_file); else if ('\n' == c) linenumber = linenumber + 1; return -1; } int binary(int c, FILE* source_file) { if (in_set(c, "01")) return (c - 48); else if (in_set(c, "#;")) line_Comment(source_file); else if ('\n' == c) linenumber = linenumber + 1; return -1; } void process_byte(char c, FILE* source_file, int write) { if(HEX == ByteMode) { if(0 <= hex(c, source_file)) { if(toggle) { if(write) fputc(((hold * 16)) + hex(c, source_file), output); ip = ip + 1; hold = 0; } else { hold = hex(c, source_file); } toggle = !toggle; } } else if(OCTAL ==ByteMode) { if(0 <= octal(c, source_file)) { if(2 == toggle) { if(write) fputc(((hold * 8)) + octal(c, source_file), output); ip = ip + 1; hold = 0; toggle = 0; } else if(1 == toggle) { hold = ((hold * 8) + octal(c, source_file)); toggle = 2; } else { hold = octal(c, source_file); toggle = 1; } } } else if(BINARY == ByteMode) { if(0 <= binary(c, source_file)) { if(7 == toggle) { if(write) fputc((hold * 2) + binary(c, source_file), output); ip = ip + 1; hold = 0; toggle = 0; } else { hold = ((hold * 2) + binary(c, source_file)); toggle = toggle + 1; } } } } void pad_to_align(int write) { if((ARMV7L == Architecture) || (AARM64 == Architecture) || (RISCV32 == Architecture) || (RISCV64 == Architecture)) { if(1 == (ip & 0x1)) { ip = ip + 1; if(write) fputc('\0', output); } if(2 == (ip & 0x2)) { ip = ip + 2; if(write) { fputc('\0', output); fputc('\0', output); } } } } void first_pass(struct input_files* input) { if(NULL == input) return; first_pass(input->next); filename = input->filename; linenumber = 1; FILE* source_file = fopen(filename, "r"); if(NULL == source_file) { fputs("The file: ", stderr); fputs(input->filename, stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } toggle = FALSE; int c; for(c = fgetc(source_file); EOF != c; c = fgetc(source_file)) { /* Check for and deal with label */ if(':' == c) { c = storeLabel(source_file, ip); } /* check for and deal with relative/absolute pointers to labels */ if(in_set(c, "!@$~%&")) { /* deal with 1byte pointer !; 2byte pointers (@ and $); 3byte pointers ~; 4byte pointers (% and &) */ Update_Pointer(c); c = Throwaway_token(source_file); if ('>' == c) { /* deal with label>base */ c = Throwaway_token(source_file); } } else if('<' == c) { pad_to_align(FALSE); } else if('^' == c) { /* Just ignore */ continue; } else process_byte(c, source_file, FALSE); } fclose(source_file); } void second_pass(struct input_files* input) { if(NULL == input) return; second_pass(input->next); filename = input->filename; linenumber = 1; FILE* source_file = fopen(filename, "r"); /* Something that should never happen */ if(NULL == source_file) { fputs("The file: ", stderr); fputs(input->filename, stderr); fputs(" can not be opened!\nWTF-pass2\n", stderr); exit(EXIT_FAILURE); } toggle = FALSE; hold = 0; int c; for(c = fgetc(source_file); EOF != c; c = fgetc(source_file)) { if(':' == c) c = Throwaway_token(source_file); /* Deal with : */ else if(in_set(c, "!@$~%&")) storePointer(c, source_file); /* Deal with !, @, $, ~, % and & */ else if('<' == c) pad_to_align(TRUE); else if('^' == c) ALIGNED = TRUE; else process_byte(c, source_file, TRUE); } fclose(source_file); }
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2017 Jeremiah Orians * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include "hex2_globals.h" unsigned shiftregister; unsigned tempword; int updates; void outOfRange(char* s, int value) { line_error(); fputs("error: value ", stderr); fputs(int2str(value, 10, TRUE), stderr); fputs(" out of range for field type ", stderr); fputs(s, stderr); fputs("\n", stderr); exit(EXIT_FAILURE); } void UpdateShiftRegister(char ch, int value) { if ('.' == ch) { unsigned swap; /* Assume the user knows what they are doing */ if(!BigEndian) { /* Swap from big-endian to little endian order */ swap = (((value >> 24) & 0xFF) | ((value << 8) & 0xFF0000) | ((value >> 8) & 0xFF00) | ((value & 0xFF) << 24)); } else { /* Big endian needs no change */ swap = value; } /* we just take the 4 bytes after the . and shove in the shift register */ swap = swap & ((0xFFFF << 16) | 0xFFFF); shiftregister = shiftregister ^ swap; } else if ('!' == ch) { /* Corresponds to RISC-V I format */ /* Will need architecture specific logic if more architectures go this route */ /* no range check because it needs to work with labels for lui/addi + AUIPC combos */ /* !label is used in the second instruction of AUIPC combo but we want an offset from */ /* the first instruction */ value = value + 4; tempword = (value & 0xFFF) << 20; /* Update shift register */ tempword = tempword & ((0xFFFF << 16) | 0xFFFF); shiftregister = shiftregister ^ tempword; } else if ('@' == ch) { /* Corresponds to RISC-V B format (formerly known as SB) */ /* Will need architecture specific logic if more architectures go this route */ if ((value < -0x1000 || value > 0xFFF) || (value & 1)) outOfRange("B", value); /* Prepare the immediate's word */ tempword = ((value & 0x1E) << 7) | ((value & 0x7E0) << (31 - 11)) | ((value & 0x800) >> 4) | ((value & 0x1000) << (31 - 12)); tempword = tempword & ((0xFFFF << 16) | 0xFFFF); /* Update shift register */ shiftregister = shiftregister ^ tempword; } else if ('$' == ch) { /* Corresponds with RISC-V J format (formerly known as UJ) */ /* Will need architecture specific logic if more architectures go this route */ if ((value < -0x100000 || value > 0xFFFFF) || (value & 1)) outOfRange("J", value); tempword = ((value & 0x7FE) << (30 - 10)) | ((value & 0x800) << (20 - 11)) | ((value & 0xFF000)) | ((value & 0x100000) << (31 - 20)); tempword = tempword & ((0xFFFF << 16) | 0xFFFF); shiftregister = shiftregister ^ tempword; } else if ('~' == ch) { /* Corresponds with RISC-V U format */ /* Will need architecture specific logic if more architectures go this route */ if ((value & 0xFFF) < 0x800) tempword = value & (0xFFFFF << 12); else tempword = (value & (0xFFFFF << 12)) + 0x1000; tempword = tempword & ((0xFFFF << 16) | 0xFFFF); shiftregister = shiftregister ^ tempword; } else { line_error(); fputs("error: UpdateShiftRegister reached impossible case: ch=", stderr); fputc(ch, stderr); fputs("\n", stderr); exit(EXIT_FAILURE); } } void WordStorePointer(char ch, FILE* source_file) { /* Get string of pointer */ ip = ip + 4; Clear_Scratch(scratch); int base_sep_p = consume_token(source_file); /* Lookup token */ int target = GetTarget(scratch); int displacement; int base = ip; /* Change relative base address to :<base> */ if ('>' == base_sep_p) { Clear_Scratch(scratch); consume_token (source_file); base = GetTarget (scratch); /* Force universality of behavior */ displacement = (target - base); } else { displacement = Architectural_displacement(target, base); } /* output calculated difference */ if('&' == ch) outputPointer(target, 4, TRUE); /* Deal with & */ else if('%' == ch) outputPointer(displacement, 4, FALSE); /* Deal with % */ else { line_error(); fputs("error: WordStorePointer reached impossible case: ch=", stderr); fputc(ch, stderr); fputs("\n", stderr); exit(EXIT_FAILURE); } } unsigned sr_nextb() { unsigned rv = shiftregister & 0xff; shiftregister = shiftregister >> 8; return rv; } void DoByte(char c, FILE* source_file, int write, int update) { if(HEX == ByteMode) { if(0 <= hex(c, source_file)) { if(toggle) { if(write) fputc(((hold * 16)) + hex(c, source_file) ^ sr_nextb(), output); ip = ip + 1; if(update) { hold = (hold * 16) + hex(c, source_file); tempword = (tempword << 8) ^ hold; updates = updates + 1; } hold = 0; } else { hold = hex(c, source_file); } toggle = !toggle; } } else if(OCTAL ==ByteMode) { if(0 <= octal(c, source_file)) { if(2 == toggle) { if(write) fputc(((hold * 8)) + octal(c, source_file) ^ sr_nextb(), output); ip = ip + 1; if(update) { hold = ((hold * 8) + octal(c, source_file)); tempword = (tempword << 8) ^ hold; updates = updates + 1; } hold = 0; toggle = 0; } else if(1 == toggle) { hold = ((hold * 8) + octal(c, source_file)); toggle = 2; } else { hold = octal(c, source_file); toggle = 1; } } } else if(BINARY == ByteMode) { if(0 <= binary(c, source_file)) { if(7 == toggle) { if(write) fputc((hold * 2) + binary(c, source_file) ^ sr_nextb(), output); ip = ip + 1; if(update) { hold = ((hold * 2) + binary(c, source_file)); tempword = (tempword << 8) ^ hold; updates = updates + 1; } hold = 0; toggle = 0; } else { hold = ((hold * 2) + binary(c, source_file)); toggle = toggle + 1; } } } } void WordFirstPass(struct input_files* input) { if(NULL == input) return; WordFirstPass(input->next); filename = input->filename; linenumber = 1; FILE* source_file = fopen(filename, "r"); if(NULL == source_file) { fputs("The file: ", stderr); fputs(input->filename, stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } toggle = FALSE; int c; for(c = fgetc(source_file); EOF != c; c = fgetc(source_file)) { /* Check for and deal with label */ if(':' == c) { c = storeLabel(source_file, ip); } /* check for and deal with relative/absolute pointers to labels */ if('.' == c) { /* Read architecture specific number of bytes for what is defined as a word */ /* 4bytes in RISC-V's case */ updates = 0; tempword = 0; while (updates < 4) { c = fgetc(source_file); DoByte(c, source_file, FALSE, TRUE); } ip = ip - 4; } else if(in_set(c, "!@$~")) { /* Don't update IP */ c = Throwaway_token(source_file); } else if(in_set(c, "%&")) { ip = ip + 4; c = Throwaway_token(source_file); if ('>' == c) { /* deal with label>base */ c = Throwaway_token(source_file); } } else if('<' == c) { pad_to_align(FALSE); } else if('^' == c) { /* Just ignore */ continue; } else DoByte(c, source_file, FALSE, FALSE); } fclose(source_file); } void WordSecondPass(struct input_files* input) { shiftregister = 0; tempword = 0; if(NULL == input) return; WordSecondPass(input->next); filename = input->filename; linenumber = 1; FILE* source_file = fopen(filename, "r"); /* Something that should never happen */ if(NULL == source_file) { fputs("The file: ", stderr); fputs(input->filename, stderr); fputs(" can not be opened!\nWTF-pass2\n", stderr); exit(EXIT_FAILURE); } toggle = FALSE; hold = 0; int c; for(c = fgetc(source_file); EOF != c; c = fgetc(source_file)) { if(':' == c) c = Throwaway_token(source_file); /* Deal with : */ else if('.' == c) { /* Read architecture specific number of bytes for what is defined as a word */ /* 4bytes in RISC-V's case */ updates = 0; tempword = 0; while (updates < 4) { c = fgetc(source_file); DoByte(c, source_file, FALSE, TRUE); } UpdateShiftRegister('.', tempword); ip = ip - 4; } else if(in_set(c, "%&")) WordStorePointer(c, source_file); /* Deal with % and & */ else if(in_set(c, "!@$~")) { Clear_Scratch(scratch); consume_token(source_file); UpdateShiftRegister(c, Architectural_displacement(GetTarget(scratch), ip)); /* Play with shift register */ } else if('<' == c) pad_to_align(TRUE); else if('^' == c) ALIGNED = TRUE; else DoByte(c, source_file, TRUE, FALSE); } fclose(source_file); }
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2017 Jeremiah Orians * Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org> * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include "hex2_globals.h" /* The essential functions */ void first_pass(struct input_files* input); void second_pass(struct input_files* input); void WordFirstPass(struct input_files* input); void WordSecondPass(struct input_files* input); /* Standard C main program */ int main(int argc, char **argv) { int InsaneArchitecture = FALSE; ALIGNED = FALSE; BigEndian = TRUE; jump_tables = calloc(65537, sizeof(struct entry*)); require(NULL != jump_tables, "Failed to allocate our jump_tables\n"); Architecture = KNIGHT; Base_Address = 0; struct input_files* input = NULL; output = stdout; char* output_file = ""; exec_enable = TRUE; ByteMode = HEX; scratch = calloc(max_string + 1, sizeof(char)); require(NULL != scratch, "failed to allocate our scratch buffer\n"); char* arch; struct input_files* temp; int option_index = 1; while(option_index <= argc) { if(NULL == argv[option_index]) { option_index = option_index + 1; } else if(match(argv[option_index], "--big-endian")) { BigEndian = TRUE; option_index = option_index + 1; } else if(match(argv[option_index], "--little-endian")) { BigEndian = FALSE; option_index = option_index + 1; } else if(match(argv[option_index], "--non-executable")) { exec_enable = FALSE; option_index = option_index + 1; } else if(match(argv[option_index], "-A") || match(argv[option_index], "--architecture")) { arch = argv[option_index + 1]; if(match("knight-native", arch) || match("knight-posix", arch)) Architecture = KNIGHT; else if(match("x86", arch)) Architecture = X86; else if(match("amd64", arch)) Architecture = AMD64; else if(match("armv7l", arch)) Architecture = ARMV7L; else if(match("aarch64", arch)) Architecture = AARM64; else if(match("ppc64le", arch)) Architecture = PPC64LE; else if(match("riscv32", arch)) Architecture = RISCV32; else if(match("riscv64", arch)) Architecture = RISCV64; else { fputs("Unknown architecture: ", stderr); fputs(arch, stderr); fputs(" know values are: knight-native, knight-posix, x86, amd64, armv7l, riscv32 and riscv64", stderr); } option_index = option_index + 2; } else if(match(argv[option_index], "-b") || match(argv[option_index], "--binary")) { ByteMode = BINARY; option_index = option_index + 1; } else if(match(argv[option_index], "-B") || match(argv[option_index], "--base-address")) { Base_Address = strtoint(argv[option_index + 1]); option_index = option_index + 2; } else if(match(argv[option_index], "-h") || match(argv[option_index], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" --file FILENAME1 {-f FILENAME2} (--big-endian|--little-endian)", stderr); fputs(" [--base-address 0x12345] [--architecture name]\nArchitecture:", stderr); fputs(" knight-native, knight-posix, x86, amd64, armv7l, aarch64, riscv32 and riscv64\n", stderr); fputs("To leverage octal or binary input: --octal, --binary\n", stderr); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "-f") || match(argv[option_index], "--file")) { temp = calloc(1, sizeof(struct input_files)); require(NULL != temp, "failed to allocate file for processing\n"); temp->filename = argv[option_index + 1]; temp->next = input; input = temp; option_index = option_index + 2; } else if(match(argv[option_index], "-o") || match(argv[option_index], "--output")) { output_file = argv[option_index + 1]; output = fopen(output_file, "w"); if(NULL == output) { fputs("The file: ", stderr); fputs(argv[option_index + 1], stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } option_index = option_index + 2; } else if(match(argv[option_index], "-O") || match(argv[option_index], "--octal")) { ByteMode = OCTAL; option_index = option_index + 1; } else if(match(argv[option_index], "-V") || match(argv[option_index], "--version")) { fputs("hex2 1.5.0\n", stdout); exit(EXIT_SUCCESS); } else { fputs("Unknown option\n", stderr); exit(EXIT_FAILURE); } } if((Architecture == RISCV32) || (Architecture == RISCV64)) { /* Forcing me to use words instead of just byting into the problem */ InsaneArchitecture = TRUE; } /* Catch a common mistake */ if((KNIGHT != Architecture) && (0 == Base_Address)) { fputs(">> WARNING <<\n>> WARNING <<\n>> WARNING <<\n", stderr); fputs("If you are not generating a ROM image this binary will likely not work\n", stderr); } /* Catch implicitly false assumptions */ if(BigEndian && ((X86 == Architecture) || ( AMD64 == Architecture) || (ARMV7L == Architecture) || (AARM64 == Architecture) || (RISCV32 == Architecture) || (RISCV64 == Architecture))) { fputs(">> WARNING <<\n>> WARNING <<\n>> WARNING <<\n", stderr); fputs("You have specified big endian output on likely a little endian processor\n", stderr); fputs("if this is a mistake please pass --little-endian next time\n", stderr); } /* Make sure we have a program tape to run */ if (NULL == input) { return EXIT_FAILURE; } /* Get all of the labels */ ip = Base_Address; if(InsaneArchitecture) WordFirstPass(input); else first_pass(input); /* Fix all the references*/ ip = Base_Address; if(InsaneArchitecture) WordSecondPass(input); else second_pass(input); /* flush all writes */ fflush(output); /* Set file as executable */ if(exec_enable && (output != stdout)) { /* Close output file */ fclose(output); if(0 != chmod(output_file, 0750)) { fputs("Unable to change permissions\n", stderr); exit(EXIT_FAILURE); } } return EXIT_SUCCESS; }
## Copyright (C) 2016 Jeremiah Orians ## This file is part of M2-Planet. ## ## M2-Planet is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## M2-Planet is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. :_start mov_rbp,rsp ; Protect rsp ;; Prepare argv lea_rax,[rbp+DWORD] %8 ; ARGV_address = RBP + 8 push_rax ; Put argv on the stack ;; Prepare envp mov_rax,rbp ; Address we need to load from mov_rax,[rax] ; Get ARGC add_rax, %2 ; OFFSET = ARGC + 2 sal_rax, !3 ; OFFSET = OFFSET * WORDSIZE add_rax,rbp ; ENVP_address = RSP + OFFSET push_rax ; Put envp on the stack lea_rbx,[rip+DWORD] %GLOBAL__envp ; Get _envp global mov_[rbx],rax ; Save environment to _envp ;; Stack offset add_rbp, %8 ; Fix rbp ;; Setup for malloc call %FUNCTION___init_malloc ;; Setup for FILE* call %FUNCTION___init_io ;; Perform the main loop call %FUNCTION_main push_rax ; Put return value on the stack push_rax ; So that _exit gets it :FUNCTION_exit call %FUNCTION___kill_io :FUNCTION__exit pop_rbx pop_rdi mov_rax, %0x3C syscall :GLOBAL__envp NULL
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include <stddef.h> char* strcpy(char* dest, char const* src) { int i = 0; while (0 != src[i]) { dest[i] = src[i]; i = i + 1; } dest[i] = 0; return dest; } char* strncpy(char* dest, char const* src, size_t count) { if(0 == count) return dest; size_t i = 0; while(0 != src[i]) { dest[i] = src[i]; i = i + 1; if(count == i) return dest; } while(i <= count) { dest[i] = 0; i = i + 1; } return dest; } char* strcat(char* dest, char const* src) { int i = 0; int j = 0; while(0 != dest[i]) i = i + 1; while(0 != src[j]) { dest[i] = src[j]; i = i + 1; j = j + 1; } dest[i] = 0; return dest; } char* strncat(char* dest, char const* src, size_t count) { size_t i = 0; size_t j = 0; while(0 != dest[i]) i = i + 1; while(0 != src[j]) { if(count == j) { dest[i] = 0; return dest; } dest[i] = src[j]; i = i + 1; j = j + 1; } dest[i] = 0; return dest; } size_t strlen(char const* str ) { size_t i = 0; while(0 != str[i]) i = i + 1; return i; } size_t strnlen_s(char const* str, size_t strsz ) { size_t i = 0; while(0 != str[i]) { if(strsz == i) return i; i = i + 1; } return i; } int strcmp(char const* lhs, char const* rhs ) { int i = 0; while(0 != lhs[i]) { if(lhs[i] != rhs[i]) return lhs[i] - rhs[i]; i = i + 1; } return lhs[i] - rhs[i]; } int strncmp(char const* lhs, char const* rhs, size_t count) { if(count == 0) return 0; size_t i = 0; while(0 != lhs[i]) { if(lhs[i] != rhs[i]) return lhs[i] - rhs[i]; i = i + 1; if(count <= i) return 0; } return lhs[i] - rhs[i]; } char* strchr(char const* str, int ch) { char* p = str; while(ch != p[0]) { if(0 == p[0]) return NULL; p = p + 1; } if(0 == p[0]) return NULL; return p; } char* strrchr(char const* str, int ch) { char* p = str; int i = 0; while(0 != p[i]) i = i + 1; while(ch != p[i]) { if(0 == i) return NULL; i = i - 1; } return (p + i); } size_t strspn(char const* dest, char const* src) { if(0 == dest[0]) return 0; int i = 0; while(NULL != strchr(src, dest[i])) i = i + 1; return i; } size_t strcspn(char const* dest, char const* src) { int i = 0; while(NULL == strchr(src, dest[i])) i = i + 1; return i; } char* strpbrk(char const* dest, char const* breakset) { char* p = dest; char* s; while(0 != p[0]) { s = strchr(breakset, p[0]); if(NULL != s) return strchr(p, s[0]); p = p + 1; } return p; } void* memset(void* dest, int ch, size_t count) { if(NULL == dest) return dest; size_t i = 0; char* s = dest; while(i < count) { s[i] = ch; i = i + 1; } return dest; } void* memcpy(void* dest, void const* src, size_t count) { if(NULL == dest) return dest; if(NULL == src) return NULL; char* s1 = dest; char const* s2 = src; size_t i = 0; while(i < count) { s1[i] = s2[i]; i = i + 1; } return dest; } void* memmove(void* dest, void const* src, size_t count) { if (dest < src) return memcpy (dest, src, count); char *p = dest; char const *q = src; count = count - 1; while (count >= 0) { p[count] = q[count]; count = count - 1; } return dest; } int memcmp(void const* lhs, void const* rhs, size_t count) { if(0 == count) return 0; size_t i = 0; count = count - 1; char const* s1 = lhs; char const* s2 = rhs; while(i < count) { if(s1[i] != s2[i]) break; i = i + 1; } return (s1[i] - s2[i]); } char* strstr(char* haystack, char* needle) { int hl = strlen(haystack); int sl = strlen(needle); int i = 0; int max = hl - sl; if(hl < sl) return NULL; else if(hl == sl) { if(0 == strncmp(haystack, needle, hl)) return haystack; return NULL; } else { while(i <= max) { if(0 == strncmp(haystack+i, needle, hl)) return haystack+i; i = i + 1; } return NULL; } }
/* Copyright (C) 2016-2020 Jeremiah Orians * Copyright (C) 2020 fosslinux * This file is part of mescc-tools. * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include "../M2libc/bootstrappable.h" /* * DEFINES */ #define FALSE 0 #define TRUE 1 // CONSTANT SUCCESS 0 #define SUCCESS 0 // CONSTANT FAILURE 1 #define FAILURE 1 #define MAX_STRING 4096 #define MAX_ARRAY 512 /* * Here is the token struct. It is used for both the token linked-list and * env linked-list. */ struct Token { /* * For the token linked-list, this stores the token; for the env linked-list * this stores the value of the variable. */ char* value; /* * Used only for the env linked-list. It holds a string containing the * name of the var. */ char* var; /* * This struct stores a node of a singly linked list, store the pointer to * the next node. */ struct Token* next; }; #include "kaem_globals.h"
/* * Copyright (C) 2020 fosslinux * This file is part of mescc-tools. * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <string.h> #include "kaem.h" /* Prototypes from other files */ int array_length(char** array); char* env_lookup(char* variable); /* * VARIABLE HANDLING FUNCTIONS */ /* Substitute a variable into n->value */ int run_substitution(char* var_name, struct Token* n) { char* value = env_lookup(var_name); /* If there is nothing to substitute, don't substitute anything! */ if(value != NULL) { char* s = calloc(MAX_STRING, sizeof(char)); s = strcat(s, n->value); s = strcat(s, value); n->value = s; return TRUE; } return FALSE; } /* Handle ${var:-text} format of variables - i.e. ifset format */ int variable_substitute_ifset(char* input, struct Token* n, int index) { /* * In ${var:-text} format, we evaluate like follows. * If var is set as an envar, then we substitute the contents of that * envar. If it is not set, we substitute alternative text. * * In this function, we assume that input is the raw token, * n->value is everything already done in variable_substitute, * index is where we are up to in input. offset is for n->value. */ /* * Check if we should even be performing this function. * We perform this function when we come across ${var:-text} syntax. */ int index_old = index; int perform = FALSE; int input_length = strlen(input); while(index < input_length) { /* Loop over each character */ if(input[index] == ':' && input[index + 1] == '-') { /* Yes, this is (most likely) ${var:-text} format. */ perform = TRUE; break; } index = index + 1; } /* Don't perform it if we shouldn't */ if(perform == FALSE) return index_old; index = index_old; /* * Get offset. * offset is the difference between the index of the variable we write to * in the following blocks and input. * This stays relatively constant. */ int offset = index; /* Get the variable name */ char* var_name = calloc(MAX_STRING, sizeof(char)); require(var_name != NULL, "Memory initialization of var_name in variable_substitute_ifset failed\n"); while(input[index] != ':') { /* Copy into var_name until :- */ var_name[index - offset] = input[index]; index = index + 1; } /* Skip over :- */ index = index + 2; offset = index; /* Get the alternative text */ char* text = calloc(MAX_STRING, sizeof(char)); require(text != NULL, "Memory initialization of text in variable_substitute_ifset failed\n"); while(input[index] != '}') { /* Copy into text until } */ require(input_length > index, "IMPROPERLY TERMINATED VARIABLE\nABORTING HARD\n"); text[index - offset] = input[index]; index = index + 1; } /* Do the substitution */ if(run_substitution(var_name, n) == FALSE) { /* The variable was not found. Substitute the alternative text. */ char* s = calloc(MAX_STRING, sizeof(char)); s = strcat(s, n->value); s = strcat(s, text); n->value = s; } return index; } /* Controls substitution for ${variable} and derivatives */ int variable_substitute(char* input, struct Token* n, int index) { /* NOTE: index is the pos of input */ index = index + 1; /* We don't want the { */ /* * Check for "special" types * If we do find a special type we delegate the substitution to it * and return here; as we are done... there's nothing more do do in * that case. */ int index_old = index; index = variable_substitute_ifset(input, n, index); if(index != index_old) return index; /* Reset index */ index = index_old; /* * If we reach here it is a normal substitution * Let's do it! */ /* Initialize var_name and offset */ char* var_name = calloc(MAX_STRING, sizeof(char)); require(var_name != NULL, "Memory initialization of var_name in variable_substitute failed\n"); int offset = index; /* Get the variable name */ int substitute_done = FALSE; char c; while(substitute_done == FALSE) { c = input[index]; require(MAX_STRING > index, "LINE IS TOO LONG\nABORTING HARD\n"); if(EOF == c || '\n' == c || index > strlen(input)) { /* We never should hit EOF, EOL or run past the end of the line while collecting a variable */ fputs("IMPROPERLY TERMINATED VARIABLE!\nABORTING HARD\n", stderr); exit(EXIT_FAILURE); } else if('\\' == c) { /* Drop the \ - poor mans escaping. */ index = index + 1; } else if('}' == c) { /* End of variable name */ substitute_done = TRUE; } else { var_name[index - offset] = c; index = index + 1; } } /* Substitute the variable */ run_substitution(var_name, n); return index; } /* Function to concatenate all command line arguments */ void variable_all(char** argv, struct Token* n) { fflush(stdout); /* index refernences the index of n->value, unlike other functions */ int index = 0; int argv_length = array_length(argv); int i = 0; char* argv_element = calloc(MAX_STRING, sizeof(char)); char* hold = argv[i]; n->value = argv_element; /* Assuming the form kaem -f script or kaem -f script -- 123 we want matching results to bash, so skip the kaem, -f and script */ while(!match("--", hold)) { i = i + 1; hold = argv[i]; if(argv_length == i) break; } /* put i = i + 1 in the for initialization to skip past the -- */ for(; i < argv_length; i = i + 1) { /* Ends up with (n->value) (argv[i]) */ /* If we don't do this we get jumbled results in M2-Planet */ hold = argv[i]; strcpy(argv_element + index, hold); index = index + strlen(hold); /* Add space on the end */ n->value[index] = ' '; index = index + 1; } /* Remove trailing space */ index = index - 1; n->value[index] = 0; } /* Function controlling substitution of variables */ void handle_variables(char** argv, struct Token* n) { /* NOTE: index is the position of input */ int index = 0; /* Create input */ char* input = calloc(MAX_STRING, sizeof(char)); require(input != NULL, "Memory initialization of input in collect_variable failed\n"); strcpy(input, n->value); /* Reset n->value */ n->value = calloc(MAX_STRING, sizeof(char)); require(n->value != NULL, "Memory initialization of n->value in collect_variable failed\n"); /* Copy everything up to the $ */ /* * TODO: Not need allocation of input before this check if there is no * variable in it. */ while(input[index] != '$') { if(input[index] == 0) { /* No variable in it */ n->value = input; return; /* We don't need to do anything more */ } n->value[index] = input[index]; index = index + 1; } /* Must be outside the loop */ int offset; substitute: index = index + 1; /* We are uninterested in the $ */ /* Run the substitution */ if(input[index] == '{') { /* Handle everything ${ related */ index = variable_substitute(input, n, index); index = index + 1; /* We don't want the closing } */ } else if(input[index] == '@') { /* Handles $@ */ index = index + 1; /* We don't want the @ */ variable_all(argv, n); } else { /* We don't know that */ fputs("IMPROPERLY USED VARIABLE!\nOnly ${foo} and $@ format are accepted at this time.\nABORTING HARD\n", stderr); exit(EXIT_FAILURE); } offset = strlen(n->value) - index; /* Copy everything from the end of the variable to the end of the token */ while(input[index] != 0) { if(input[index] == '$') { /* We have found another variable */ fflush(stdout); goto substitute; } n->value[index + offset] = input[index]; index = index + 1; } }
/* Copyright (C) 2016-2020 Jeremiah Orians * Copyright (C) 2020 fosslinux * This file is part of mescc-tools. * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include "kaem.h" int command_done; int VERBOSE; int VERBOSE_EXIT; int STRICT; int INIT_MODE; int FUZZING; int WARNINGS; char* KAEM_BINARY; char* PATH; /* Token linked-list; stores the tokens of each line */ struct Token* token; /* Env linked-list; stores the environment variables */ struct Token* env; /* Alias linked-list; stores the aliases */ struct Token* alias;
/* Copyright (C) 2016-2020 Jeremiah Orians * Copyright (C) 2020 fosslinux * Copyright (C) 2021 Andrius Å tikonas * This file is part of mescc-tools. * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <sys/wait.h> #include <string.h> #include "kaem.h" /* Prototypes from other files */ void handle_variables(char** argv, struct Token* n); /* * UTILITY FUNCTIONS */ /* Function to find a character in a string */ char* find_char(char* string, char a) { if(0 == string[0]) { return NULL; } while(a != string[0]) { string = string + 1; if(0 == string[0]) { return string; } } return string; } /* Function to find the length of a char**; an array of strings */ int array_length(char** array) { int length = 0; while(array[length] != NULL) { length = length + 1; } return length; } /* Search for a variable in the token linked-list */ char* token_lookup(char* variable, struct Token* token) { /* Start at the head */ struct Token* n = token; /* Loop over the linked-list */ while(n != NULL) { if(match(variable, n->var)) { /* We have found the correct node */ return n->value; /* Done */ } /* Nope, try the next */ n = n->next; } /* We didn't find anything! */ return NULL; } /* Search for a variable in the env linked-list */ char* env_lookup(char* variable) { return token_lookup(variable, env); } /* Search for a variable in the alias linked-list */ char* alias_lookup(char* variable) { return token_lookup(variable, alias); } /* Find the full path to an executable */ char* find_executable(char* name) { if(match("", name)) { return NULL; } if(('.' == name[0]) || ('/' == name[0])) { /* assume names that start with . or / are relative or absolute */ return name; } char* trial = calloc(MAX_STRING, sizeof(char)); char* MPATH = calloc(MAX_STRING, sizeof(char)); /* Modified PATH */ require(MPATH != NULL, "Memory initialization of MPATH in find_executable failed\n"); strcpy(MPATH, PATH); FILE* t; char* next = find_char(MPATH, ':'); int index; int offset; int mpath_length; int name_length; int trial_length; while(NULL != next) { /* Reset trial */ trial_length = strlen(trial); for(index = 0; index < trial_length; index = index + 1) { trial[index] = 0; } next[0] = 0; /* prepend_string(MPATH, prepend_string("/", name)) */ mpath_length = strlen(MPATH); for(index = 0; index < mpath_length; index = index + 1) { require(MAX_STRING > index, "Element of PATH is too long\n"); trial[index] = MPATH[index]; } trial[index] = '/'; offset = strlen(trial); name_length = strlen(name); for(index = 0; index < name_length; index = index + 1) { require(MAX_STRING > index, "Element of PATH is too long\n"); trial[index + offset] = name[index]; } /* Try the trial */ require(strlen(trial) < MAX_STRING, "COMMAND TOO LONG!\nABORTING HARD\n"); t = fopen(trial, "r"); if(NULL != t) { fclose(t); return trial; } MPATH = next + 1; next = find_char(MPATH, ':'); } return NULL; } /* Function to convert a Token linked-list into an array of strings */ char** list_to_array(struct Token* s) { struct Token* n; n = s; char** array = calloc(MAX_ARRAY, sizeof(char*)); require(array != NULL, "Memory initialization of array in conversion of list to array failed\n"); char* element = calloc(MAX_STRING, sizeof(char)); require(element != NULL, "Memory initialization of element in conversion of list to array failed\n"); int index = 0; int i; int value_length; int var_length; int offset; while(n != NULL) { /* Loop through each node and assign it to an array index */ array[index] = calloc(MAX_STRING, sizeof(char)); require(array[index] != NULL, "Memory initialization of array[index] in conversion of list to array failed\n"); /* Bounds checking */ /* No easy way to tell which it is, output generic message */ require(index < MAX_ARRAY, "SCRIPT TOO LONG or TOO MANY ENVARS\nABORTING HARD\n"); if(n->var == NULL) { /* It is a line */ array[index] = n->value; } else { /* It is a var */ /* prepend_string(n->var, prepend_string("=", n->value)) */ var_length = strlen(n->var); for(i = 0; i < var_length; i = i + 1) { element[i] = n->var[i]; } element[i] = '='; i = i + 1; offset = i; value_length = strlen(n->value); for(i = 0; i < value_length; i = i + 1) { element[i + offset] = n->value[i]; } } /* Insert elements if not empty */ if(!match("", element)) { strcpy(array[index], element); } n = n->next; index = index + 1; /* Reset element */ for(i = 0; i < MAX_STRING; i = i + 1) { element[i] = 0; } } return array; } /* Function to handle the correct options for escapes */ int handle_escape(int c) { if(c == '\n') { /* Do nothing - eat up the newline */ return -1; } else if('n' == c) { /* Add a newline to the token */ return '\n'; } else if('r' == c) { /* Add a return to the token */ return '\r'; } else if('\\' == c) { /* Add a real backslash to the token */ return '\\'; } else { /* Just add it to the token (eg, quotes) */ return c; } } /* * TOKEN COLLECTION FUNCTIONS */ /* Function for skipping over line comments */ void collect_comment(FILE* input) { int c; /* Eat up the comment, one character at a time */ /* * Sanity check that the comment ends with \n. * Remove the comment from the FILE* */ do { c = fgetc(input); /* We reached an EOF!! */ require(EOF != c, "IMPROPERLY TERMINATED LINE COMMENT!\nABORTING HARD\n"); } while('\n' != c); /* We can now be sure it ended with \n -- and have purged the comment */ } /* Function for collecting strings and removing the "" pair that goes with them */ int collect_string(FILE* input, char* n, int index) { int string_done = FALSE; int c; do { /* Bounds check */ require(MAX_STRING > index, "LINE IS TOO LONG\nABORTING HARD\n"); c = fgetc(input); require(EOF != c, "IMPROPERLY TERMINATED STRING!\nABORTING HARD\n"); if('\\' == c) { /* We are escaping the next character */ /* This correctly handles escaped quotes as it just returns the quote */ c = fgetc(input); c = handle_escape(c); n[index] = c; index = index + 1; } else if('"' == c) { /* End of string */ string_done = TRUE; } else { n[index] = c; index = index + 1; } } while(string_done == FALSE); return index; } /* Function to parse and assign token->value */ int collect_token(FILE* input, char* n, int last_index) { int c; int cc; int token_done = FALSE; int index = 0; do { /* Loop over each character in the token */ c = fgetc(input); /* Bounds checking */ require(MAX_STRING > index, "LINE IS TOO LONG\nABORTING HARD\n"); if(EOF == c) { /* End of file -- this means script complete */ /* We don't actually exit here. This logically makes more sense; * let the code follow its natural path of execution and exit * sucessfuly at the end of main(). */ token_done = TRUE; command_done = TRUE; return -1; } else if((' ' == c) || ('\t' == c)) { /* Space and tab are token separators */ token_done = TRUE; } else if(('\n' == c) || (';' == c)) { /* Command terminates at the end of a line or at semicolon */ command_done = TRUE; token_done = TRUE; if(0 == index) { index = last_index; } } else if('"' == c) { /* Handle strings -- everything between a pair of "" */ index = collect_string(input, n, index); token_done = TRUE; } else if('#' == c) { /* Handle line comments */ collect_comment(input); command_done = TRUE; token_done = TRUE; if(0 == index) { index = last_index; } } else if('\\' == c) { /* Support for escapes */ c = fgetc(input); /* Skips over \, gets the next char */ cc = handle_escape(c); if(-1 != cc) { /* We need to put it into the token */ n[index] = cc; } index = index + 1; } else if(0 == c) { /* We have come to the end of the token */ token_done = TRUE; } else { /* It's a character to assign */ n[index] = c; index = index + 1; } } while(token_done == FALSE); return index; } /* Function to parse string and assign token->value */ int collect_alias_token(char* input, char* n, int index) { int c; int cc; int token_done = FALSE; int output_index = 0; do { /* Loop over each character in the token */ c = input[index]; index = index + 1; if((' ' == c) || ('\t' == c)) { /* Space and tab are token separators */ token_done = TRUE; } else if('\\' == c) { /* Support for escapes */ c = input[index]; index = index + 1; cc = handle_escape(c); /* We need to put it into the token */ n[output_index] = cc; output_index = output_index + 1; } else if(0 == c) { /* We have come to the end of the token */ token_done = TRUE; index = 0; } else { /* It's a character to assign */ n[output_index] = c; output_index = output_index + 1; } } while(token_done == FALSE); /* Terminate the output with a NULL */ n[output_index] = 0; return index; } /* * EXECUTION FUNCTIONS * Note: All of the builtins return SUCCESS (0) when they exit successfully * and FAILURE (1) when they fail. */ /* Function to check if the token is an envar */ int is_envar(char* token) { int i = 0; int token_length = strlen(token); while(i < token_length) { if(token[i] == '=') { return FAILURE; } i = i + 1; } return SUCCESS; } /* Add an envar */ void add_envar() { /* Pointers to strings we want */ char* name = calloc(strlen(token->value) + 4, sizeof(char)); char* value = token->value; char* newvalue; int i = 0; /* Isolate the name */ while('=' != value[i]) { name[i] = value[i]; i = i + 1; } /* Isolate the value */ newvalue = name + i + 2; value = value + i + 1; i = 0; require(0 != value[i], "add_envar received improper variable\n"); while(0 != value[i]) { newvalue[i] = value[i]; i = i + 1; } /* If we are in init-mode and this is the first var env == NULL, rectify */ if(env == NULL) { env = calloc(1, sizeof(struct Token)); require(env != NULL, "Memory initialization of env failed\n"); env->var = name; /* Add our first variable */ } /* * If the name of the envar is PATH, then we need to set our (internal) * global PATH value. */ if(match(name, "PATH")) { strcpy(PATH, newvalue); } struct Token* n = env; /* Find match if possible */ while(!match(name, n->var)) { if(NULL == n->next) { n->next = calloc(1, sizeof(struct Token)); require(n->next != NULL, "Memory initialization of next env node in add_envar failed\n"); n->next->var = name; } /* Loop will match and exit */ n = n->next; } /* Since we found the variable we need only to set it to its new value */ n->value = newvalue; } /* Add an alias */ void add_alias() { token = token->next; /* Skip the actual alias */ if(token->next == NULL) { /* No arguments */ char** array = list_to_array(alias); int index = 0; while(array[index] != NULL) { fputs(array[index], stdout); fputc('\n', stdout); index = index + 1; } fflush(stdout); return; } if(!is_envar(token->value)) { char** array = list_to_array(token); int index = 0; while(array[index] != NULL) { fputs(array[index], stdout); fputc(' ', stdout); index = index + 1; } fputc('\n', stdout); fflush(stdout); return; } /* Pointers to strings we want */ char* name = calloc(strlen(token->value) + 4, sizeof(char)); char* value = token->value; char* newvalue; int i = 0; /* Isolate the name */ while('=' != value[i]) { name[i] = value[i]; i = i + 1; } /* Isolate the value */ newvalue = name + i + 2; value = value + i + 1; i = 0; require(0 != value[i], "add_alias received improper variable\n"); while(0 != value[i]) { newvalue[i] = value[i]; i = i + 1; } /* If this is the first alias, rectify */ if(alias == NULL) { alias = calloc(1, sizeof(struct Token)); require(alias != NULL, "Memory initialization of alias failed\n"); alias->var = name; /* Add our first variable */ } struct Token* n = alias; /* Find match if possible */ while(!match(name, n->var)) { if(NULL == n->next) { n->next = calloc(1, sizeof(struct Token)); require(n->next != NULL, "Memory initialization of next alias node in alias failed\n"); n->next->var = name; } /* Loop will match and exit */ n = n->next; } /* Since we found the variable we need only to set it to its new value */ n->value = newvalue; } /* cd builtin */ int cd() { if(NULL == token->next) { return FAILURE; } token = token->next; if(NULL == token->value) { return FAILURE; } int ret = chdir(token->value); if(0 > ret) { return FAILURE; } return SUCCESS; } /* pwd builtin */ int pwd() { char* path = calloc(MAX_STRING, sizeof(char)); require(path != NULL, "Memory initialization of path in pwd failed\n"); getcwd(path, MAX_STRING); require(!match("", path), "getcwd() failed\n"); fputs(path, stdout); fputs("\n", stdout); return SUCCESS; } /* set builtin */ int set() { /* Get the options */ int i; if(NULL == token->next) { goto cleanup_set; } token = token->next; if(NULL == token->value) { goto cleanup_set; } char* options = calloc(MAX_STRING, sizeof(char)); require(options != NULL, "Memory initialization of options in set failed\n"); int last_position = strlen(token->value) - 1; for(i = 0; i < last_position; i = i + 1) { options[i] = token->value[i + 1]; } /* Parse the options */ int options_length = strlen(options); for(i = 0; i < options_length; i = i + 1) { if(options[i] == 'a') { /* set -a is on by default and cannot be disabled at this time */ if(WARNINGS) { fputs("set -a is on by default and cannot be disabled\n", stdout); } continue; } else if(options[i] == 'e') { /* Fail on failure */ STRICT = TRUE; } else if(options[i] == 'x') { /* Show commands as executed */ /* TODO: this currently behaves like -v. Make it do what it should */ VERBOSE = TRUE; /* * Output the set -x because VERBOSE didn't catch it before. * We don't do just -x because we support multiple options in one command, * eg set -ex. */ fputs(" +> set -", stdout); fputs(options, stdout); fputs("\n", stdout); fflush(stdout); } else { /* Invalid */ fputc(options[i], stderr); fputs(" is an invalid set option!\n", stderr); exit(EXIT_FAILURE); } } return SUCCESS; cleanup_set: return FAILURE; } /* echo builtin */ void echo() { if(token->next == NULL) { /* No arguments */ fputs("\n", stdout); return; } if(token->next->value == NULL) { /* No arguments */ fputs("\n", stdout); return; } token = token->next; /* Skip the actual echo */ while(token != NULL) { /* Output each argument to echo to stdout */ if(token->value == NULL) { break; } fputs(token->value, stdout); if(NULL != token->next) { /* M2-Planet doesn't short circuit */ if(NULL != token->next->value) fputc(' ', stdout); } token = token->next; } fputs("\n", stdout); } /* unset builtin */ void unset() { struct Token* e; /* We support multiple variables on the same line */ struct Token* t; for(t = token->next; t != NULL; t = t->next) { if(NULL == t->value) { continue; } e = env; /* Look for the variable; we operate on ->next because we need to remove ->next */ while(e->next != NULL) { if(match(e->next->var, t->value)) { break; } e = e->next; } if(e->next != NULL) { /* There is something to unset */ e->next = e->next->next; } } } void execute(FILE* script, char** argv); int _execute(FILE* script, char** argv); int collect_command(FILE* script, char** argv); /* if builtin */ void if_cmd(FILE* script, char** argv) { int index; int old_VERBOSE; token = token->next; /* Skip the actual if */ /* Do not check for successful exit status */ int if_status = _execute(script, argv); old_VERBOSE = VERBOSE; VERBOSE = VERBOSE && !if_status; do { index = collect_command(script, argv); require(index != -1, "Unexpected EOF, improperly terminated if statement.\n"); if(0 == index) { continue; } if(0 == if_status) { /* Stuff to exec */ execute(script, argv); } if(match(token->value, "else")) { if_status = !if_status; } } while(!match(token->value, "fi")); VERBOSE = old_VERBOSE; } int what_exit(char* program, int status) { /*********************************************************************************** * If the low-order 8 bits of w_status are equal to 0x7F or zero, the child * * process has stopped. If the low-order 8 bits of w_status are non-zero and are * * not equal to 0x7F, the child process terminated due to a signal otherwise, the * * child process terminated due to an exit() call. * * * * In the event it was a signal that stopped the process the top 8 bits of * * w_status contain the signal that caused the process to stop. * * * * In the event it was terminated the bottom 7 bits of w_status contain the * * terminating error number for the process. * * * * If bit 0x80 of w_status is set, a core dump was produced. * ***********************************************************************************/ int WIFEXITED = !(status & 0x7F); int WEXITSTATUS = (status & 0xFF00) >> 8; int WTERMSIG = status & 0x7F; int WCOREDUMP = status & 0x80; int WIFSIGNALED = !((0x7F == WTERMSIG) || (0 == WTERMSIG)); int WIFSTOPPED = ((0x7F == WTERMSIG) && (0 == WCOREDUMP)); if(WIFEXITED) { if(VERBOSE_EXIT) { fputc('\n', stderr); fputs(program, stderr); fputs(" normal termination, exit status = ", stderr); fputs(int2str(WEXITSTATUS, 10, TRUE), stderr); fputs("\n\n\n", stderr); } return WEXITSTATUS; } else if (WIFSIGNALED) { fputc('\n', stderr); fputs(program, stderr); fputs(" abnormal termination, signal number = ", stderr); fputs(int2str(WTERMSIG, 10, TRUE), stderr); fputc('\n', stderr); if(WCOREDUMP) fputs("core dumped\n", stderr); return WTERMSIG; } else if(WIFSTOPPED) { fputc('\n', stderr); fputs(program, stderr); fputs(" child stopped, signal number = ", stderr); fputs(int2str(WEXITSTATUS, 10, TRUE), stderr); fputc('\n', stderr); return WEXITSTATUS; } fputc('\n', stderr); fputs(program, stderr); fputs(" :: something crazy happened with execve\nI'm just gonna get the hell out of here\n", stderr); exit(EXIT_FAILURE); } /* Execute program and check for error */ void execute(FILE* script, char** argv) { int status = _execute(script, argv); if(STRICT == TRUE && (0 != status)) { /* Clearly the script hit an issue that should never have happened */ fputs("Subprocess error ", stderr); fputs(int2str(status, 10, TRUE), stderr); fputs("\nABORTING HARD\n", stderr); exit(EXIT_FAILURE); } } /* Execute program */ int _execute(FILE* script, char** argv) { /* Run the command */ /* rc = return code */ int rc; /* exec without forking */ int exec = FALSE; /* Actually do the execution */ if(is_envar(token->value) == TRUE) { add_envar(); return 0; } else if(match(token->value, "cd")) { rc = cd(); if(STRICT) { require(rc == SUCCESS, "cd failed!\n"); } return 0; } else if(match(token->value, "set")) { rc = set(); if(STRICT) { require(rc == SUCCESS, "set failed!\n"); } return 0; } else if(match(token->value, "alias")) { add_alias(); return 0; } else if(match(token->value, "pwd")) { rc = pwd(); if(STRICT) { require(rc == SUCCESS, "pwd failed!\n"); } return 0; } else if(match(token->value, "echo")) { echo(); return 0; } else if(match(token->value, "unset")) { unset(); return 0; } else if(match(token->value, "exec")) { token = token->next; /* Skip the actual exec */ exec = TRUE; } else if(match(token->value, "if")) { if_cmd(script, argv); return 0; } else if(match(token->value, "then")) { /* ignore */ return 0; } else if(match(token->value, "else")) { /* ignore */ return 0; } else if(match(token->value, "fi")) { /* ignore */ return 0; } /* If it is not a builtin, run it as an executable */ int status; /* i.e. return code */ char** array; char** envp; /* Get the full path to the executable */ char* program = find_executable(token->value); /* Check we can find the executable */ if(NULL == program) { if(STRICT == TRUE) { fputs("WHILE EXECUTING ", stderr); fputs(token->value, stderr); fputs(" NOT FOUND!\nABORTING HARD\n", stderr); exit(EXIT_FAILURE); } /* If we are not strict simply return */ return 0; } int f = 0; #ifdef __uefi__ array = list_to_array(token); envp = list_to_array(env); return spawn(program, array, envp); #else if(!exec) { f = fork(); } /* Ensure fork succeeded */ if(f == -1) { fputs("WHILE EXECUTING ", stderr); fputs(token->value, stderr); fputs(" fork() FAILED\nABORTING HARD\n", stderr); exit(EXIT_FAILURE); } else if(f == 0) { /* Child */ /************************************************************** * Fuzzing produces random stuff; we don't want it running * * dangerous commands. So we just don't execve. * * But, we still do the list_to_array calls to check for * * segfaults. * **************************************************************/ array = list_to_array(token); envp = list_to_array(env); if(FALSE == FUZZING) { /* We are not fuzzing */ /* execve() returns only on error */ execve(program, array, envp); } /* Prevent infinite loops */ _exit(EXIT_FAILURE); } /* Otherwise we are the parent */ /* And we should wait for it to complete */ waitpid(f, &status, 0); return what_exit(program, status); #endif } int collect_command(FILE* script, char** argv) { command_done = FALSE; /* Initialize token */ struct Token* n; n = calloc(1, sizeof(struct Token)); require(n != NULL, "Memory initialization of token in collect_command failed\n"); char* s = calloc(MAX_STRING, sizeof(char)); require(s != NULL, "Memory initialization of token in collect_command failed\n"); token = n; int index = 0; int alias_index; char* alias_string; /* Get the tokens */ while(command_done == FALSE) { index = collect_token(script, s, index); /* Don't allocate another node if the current one yielded nothing, OR * if we are done. */ if(match(s, "")) { continue; } alias_string = alias_lookup(s); alias_index = 0; do { if(alias_string != NULL) { alias_index = collect_alias_token(alias_string, s, alias_index); } /* add to token */ n->value = s; s = calloc(MAX_STRING, sizeof(char)); require(s != NULL, "Memory initialization of next token node in collect_command failed\n"); /* Deal with variables */ handle_variables(argv, n); /* If the variable expands into nothing */ if(match(n->value, " ")) { n->value = NULL; continue; } /* Prepare for next loop */ n->next = calloc(1, sizeof(struct Token)); require(n->next != NULL, "Memory initialization of next token node in collect_command failed\n"); n = n->next; } while(alias_index != 0); } /* -1 means the script is done */ if(EOF == index) { return index; } /* Output the command if verbose is set */ /* Also if there is nothing in the command skip over */ if(VERBOSE && !match(token->value, "") && !match(token->value, NULL)) { n = token; fputs(" +>", stdout); while(n != NULL) { /* Print out each token token */ fputs(" ", stdout); /* M2-Planet doesn't let us do this in the while */ if(n->value != NULL) { if(!match(n->value, "")) { fputs(n->value, stdout); } } n = n->next; } fputc('\n', stdout); fflush(stdout); } return index; } /* Function for executing our programs with desired arguments */ void run_script(FILE* script, char** argv) { int index; while(TRUE) { /* * Tokens has to be reset each time, as we need a new linked-list for * each line. * See, the program flows like this as a high level overview: * Get line -> Sanitize line and perform variable replacement etc -> * Execute line -> Next. * We don't need the previous lines once they are done with, so tokens * are hence for each line. */ index = collect_command(script, argv); /* -1 means the script is done */ if(EOF == index) { break; } if(0 == index) { continue; } /* Stuff to exec */ execute(script, argv); } } /* Function to populate env */ void populate_env(char** envp) { /* You can't populate a NULL environment */ if(NULL == envp) { return; } /* avoid empty arrays */ int max = array_length(envp); if(0 == max) { return; } /* Initialize env and n */ env = calloc(1, sizeof(struct Token)); require(env != NULL, "Memory initialization of env failed\n"); struct Token* n; n = env; int i; int j; int k; char* envp_line; for(i = 0; i < max; i = i + 1) { n->var = calloc(MAX_STRING, sizeof(char)); require(n->var != NULL, "Memory initialization of n->var in population of env failed\n"); n->value = calloc(MAX_STRING, sizeof(char)); require(n->value != NULL, "Memory initialization of n->var in population of env failed\n"); j = 0; /* * envp is weird. * When referencing envp[i]'s characters directly, they were all jumbled. * So just copy envp[i] to envp_line, and work with that - that seems * to fix it. */ envp_line = calloc(MAX_STRING, sizeof(char)); require(envp_line != NULL, "Memory initialization of envp_line in population of env failed\n"); require(strlen(envp[i]) < MAX_STRING, "Environment variable exceeds length restriction\n"); strcpy(envp_line, envp[i]); while(envp_line[j] != '=') { /* Copy over everything up to = to var */ n->var[j] = envp_line[j]; j = j + 1; } /* If we get strange input, we need to ignore it */ if(n->var == NULL) { continue; } j = j + 1; /* Skip over = */ k = 0; /* As envp[i] will continue as j but n->value begins at 0 */ while(envp_line[j] != 0) { /* Copy everything else to value */ n->value[k] = envp_line[j]; j = j + 1; k = k + 1; } /* Sometimes, we get lines like VAR=, indicating nothing is in the variable */ if(n->value == NULL) { n->value = ""; } /* Advance to next part of linked list */ n->next = calloc(1, sizeof(struct Token)); require(n->next != NULL, "Memory initialization of n->next in population of env failed\n"); n = n->next; } /* Get rid of node on the end */ n = NULL; /* Also destroy the n->next reference */ n = env; while(n->next->var != NULL) { n = n->next; } n->next = NULL; } int main(int argc, char** argv, char** envp) { VERBOSE = FALSE; VERBOSE_EXIT = FALSE; STRICT = TRUE; FUZZING = FALSE; WARNINGS = FALSE; char* filename = "kaem.run"; FILE* script = NULL; /* Initalize structs */ token = calloc(1, sizeof(struct Token)); require(token != NULL, "Memory initialization of token failed\n"); if(NULL != argv[0]) KAEM_BINARY = argv[0]; else KAEM_BINARY = "./bin/kaem"; int i = 1; /* Loop over arguments */ while(i <= argc) { if(NULL == argv[i]) { /* Ignore the argument */ i = i + 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { /* Help information */ fputs("Usage: ", stdout); fputs(argv[0], stdout); fputs(" [-h | --help] [-V | --version] [--file filename | -f filename] [-i | --init-mode] [-v | --verbose] [--non-strict] [--warn] [--fuzz]\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-f") || match(argv[i], "--file")) { /* Set the filename */ if(argv[i + 1] != NULL) { filename = argv[i + 1]; } i = i + 2; } else if(match(argv[i], "-i") || match(argv[i], "--init-mode")) { /* init mode does not populate env */ INIT_MODE = TRUE; i = i + 1; } else if(match(argv[i], "-V") || match(argv[i], "--version")) { /* Output version */ fputs("kaem version 1.5.0\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-v") || match(argv[i], "--verbose")) { /* Set verbose */ VERBOSE = TRUE; i = i + 1; } else if(match(argv[i], "--strict")) { /* it is a NOP */ STRICT = TRUE; i = i + 1; } else if(match(argv[i], "--non-strict")) { /* Set strict */ STRICT = FALSE; i = i + 1; } else if(match(argv[i], "--warn")) { /* Set warnings */ WARNINGS = TRUE; i = i + 1; } else if(match(argv[i], "--fuzz")) { /* Set fuzzing */ FUZZING = TRUE; i = i + 1; } else if(match(argv[i], "--show-exit-codes")) { /* show exit codes */ VERBOSE_EXIT = TRUE; i = i + 1; } else if(match(argv[i], "--")) { /* Nothing more after this */ break; } else { /* We don't know this argument */ fputs("UNKNOWN ARGUMENT\n", stdout); exit(EXIT_FAILURE); } } /* Populate env */ if(INIT_MODE == FALSE) { populate_env(envp); } /* make sure SHELL is set */ if(NULL == env_lookup("SHELL")) { struct Token* shell = calloc(1, sizeof(struct Token)); require(NULL != shell, "unable to create SHELL environment variable\n"); shell->next = env; shell->var = "SHELL"; shell->value= KAEM_BINARY; env = shell; } /* Populate PATH variable * We don't need to calloc() because env_lookup() does this for us. */ PATH = env_lookup("PATH"); /* Populate USERNAME variable */ char* USERNAME = env_lookup("LOGNAME"); /* Handle edge cases */ if((NULL == PATH) && (NULL == USERNAME)) { /* We didn't find either of PATH or USERNAME -- use a generic PATH */ PATH = calloc(MAX_STRING, sizeof(char)); require(PATH != NULL, "Memory initialization of PATH failed\n"); strcpy(PATH, "/root/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"); } else if(NULL == PATH) { /* We did find a username but not a PATH -- use a generic PATH but with /home/USERNAME */ PATH = calloc(MAX_STRING, sizeof(char)); PATH = strcat(PATH, "/home/"); PATH = strcat(PATH, USERNAME); PATH = strcat(PATH, "/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games"); } /* Open the script */ script = fopen(filename, "r"); if(NULL == script) { fputs("The file: ", stderr); fputs(filename, stderr); fputs(" can not be opened!\n", stderr); exit(EXIT_FAILURE); } /* Run the commands */ run_script(script, argv); /* Cleanup */ fclose(script); return EXIT_SUCCESS; }
#! /usr/bin/env bash # Mes --- Maxwell Equations of Software # Copyright © 2017,2019 Jan Nieuwenhuizen <janneke@gnu.org> # Copyright © 2017,2019 Jeremiah Orians # # This file is part of Mes. # # Mes is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at # your option) any later version. # # Mes is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Mes. If not, see <http://www.gnu.org/licenses/>. # To run in kaem simply: kaem --verbose --strict ARCH="amd64" ARCH_DIR="AMD64" M2LIBC="../M2libc" TOOLS="../${ARCH_DIR}/bin" BLOOD_FLAG="--64" BASE_ADDRESS="0x00600000" ENDIAN_FLAG="--little-endian" BINDIR="../${ARCH_DIR}/bin" BUILDDIR="../${ARCH_DIR}/artifact" TMPDIR="${BUILDDIR}" OPERATING_SYSTEM="Linux" ################################################ # Phase 12-15 Rebuild M2-Planet from C sources # ################################################ ./${ARCH_DIR}/bin/kaem --verbose --strict --file ${ARCH_DIR}/mescc-tools-full-kaem.kaem ###################################################### # Phase 16-23 Build mescc-tools-extra from M2-Planet # ###################################################### cd mescc-tools-extra ${BINDIR}/kaem --verbose --strict --file mescc-tools-extra.kaem cd .. ./${ARCH_DIR}/bin/sha256sum -c ${ARCH}.answers ######################### # Load after.kaem hook # ######################### exec ./${ARCH_DIR}/bin/kaem --verbose --strict --file ./after.kaem
#!/usr/bin/env bash # Mes --- Maxwell Equations of Software # Copyright © 2017,2019 Jan Nieuwenhuizen <janneke@gnu.org> # Copyright © 2017,2019 Jeremiah Orians # # This file is part of Mes. # # Mes is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at # your option) any later version. # # Mes is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Mes. If not, see <http://www.gnu.org/licenses/>. cd ${ARCH_DIR} ############################################### # Phase-12 Build M2-Mesoplanet from M2-Planet # ############################################### ./artifact/M2 --architecture ${ARCH} \ -f ../M2libc/sys/types.h \ -f ../M2libc/stddef.h \ -f ../M2libc/${ARCH}/linux/fcntl.c \ -f ../M2libc/fcntl.c \ -f ../M2libc/sys/utsname.h \ -f ../M2libc/${ARCH}/linux/unistd.c \ -f ../M2libc/${ARCH}/linux/sys/stat.c \ -f ../M2libc/stdlib.c \ -f ../M2libc/stdio.h \ -f ../M2libc/stdio.c \ -f ../M2libc/string.c \ -f ../M2libc/bootstrappable.c \ -f ../M2-Mesoplanet/cc.h \ -f ../M2-Mesoplanet/cc_globals.c \ -f ../M2-Mesoplanet/cc_env.c \ -f ../M2-Mesoplanet/cc_reader.c \ -f ../M2-Mesoplanet/cc_spawn.c \ -f ../M2-Mesoplanet/cc_core.c \ -f ../M2-Mesoplanet/cc_macro.c \ -f ../M2-Mesoplanet/cc.c \ --debug \ -o ./artifact/M2-Mesoplanet-1.M1 ./artifact/blood-elf-0 ${ENDIAN_FLAG} ${BLOOD_FLAG} -f ./artifact/M2-Mesoplanet-1.M1 -o ./artifact/M2-Mesoplanet-1-footer.M1 ./bin/M1 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ -f ../M2libc/${ARCH}/${ARCH}_defs.M1 \ -f ../M2libc/${ARCH}/libc-full.M1 \ -f ./artifact/M2-Mesoplanet-1.M1 \ -f ./artifact/M2-Mesoplanet-1-footer.M1 \ -o ./artifact/M2-Mesoplanet-1.hex2 ./bin/hex2 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ --base-address ${BASE_ADDRESS} \ -f ../M2libc/${ARCH}/ELF-${ARCH}-debug.hex2 \ -f ./artifact/M2-Mesoplanet-1.hex2 \ -o ./bin/M2-Mesoplanet ################################################# # Phase-13 Build final blood-elf from C sources # ################################################# ./artifact/M2 --architecture ${ARCH} \ -f ../M2libc/sys/types.h \ -f ../M2libc/stddef.h \ -f ../M2libc/${ARCH}/linux/fcntl.c \ -f ../M2libc/fcntl.c \ -f ../M2libc/sys/utsname.h \ -f ../M2libc/${ARCH}/linux/unistd.c \ -f ../M2libc/stdlib.c \ -f ../M2libc/stdio.h \ -f ../M2libc/stdio.c \ -f ../M2libc/bootstrappable.c \ -f ../mescc-tools/stringify.c \ -f ../mescc-tools/blood-elf.c \ --debug \ -o ./artifact/blood-elf-1.M1 ./artifact/blood-elf-0 ${BLOOD_FLAG} ${ENDIAN_FLAG} -f ./artifact/blood-elf-1.M1 -o ./artifact/blood-elf-1-footer.M1 ./bin/M1 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ -f ../M2libc/${ARCH}/${ARCH}_defs.M1 \ -f ../M2libc/${ARCH}/libc-full.M1 \ -f ./artifact/blood-elf-1.M1 \ -f ./artifact/blood-elf-1-footer.M1 \ -o ./artifact/blood-elf-1.hex2 ./bin/hex2 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ --base-address ${BASE_ADDRESS} \ -f ../M2libc/${ARCH}/ELF-${ARCH}-debug.hex2 \ -f ./artifact/blood-elf-1.hex2 \ -o ./bin/blood-elf # Now we have our shipping debuggable blood-elf, the rest will be down hill from # here as we have ALL of the core pieces of compiling and assembling debuggable # programs in a debuggable form with corresponding C source code. ############################################# # Phase-14 Build get_machine from C sources # ############################################# ./artifact/M2 --architecture ${ARCH} \ -f ../M2libc/sys/types.h \ -f ../M2libc/stddef.h \ -f ../M2libc/sys/utsname.h \ -f ../M2libc/${ARCH}/linux/unistd.c \ -f ../M2libc/${ARCH}/linux/fcntl.c \ -f ../M2libc/fcntl.c \ -f ../M2libc/stdlib.c \ -f ../M2libc/stdio.h \ -f ../M2libc/stdio.c \ -f ../M2libc/bootstrappable.c \ -f ../mescc-tools/get_machine.c \ --debug \ -o artifact/get_machine.M1 ./bin/blood-elf ${BLOOD_FLAG} ${ENDIAN_FLAG} -f ./artifact/get_machine.M1 -o ./artifact/get_machine-footer.M1 ./bin/M1 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ -f ../M2libc/${ARCH}/${ARCH}_defs.M1 \ -f ../M2libc/${ARCH}/libc-full.M1 \ -f ./artifact/get_machine.M1 \ -f ./artifact/get_machine-footer.M1 \ -o ./artifact/get_machine.hex2 ./bin/hex2 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ --base-address ${BASE_ADDRESS} \ -f ../M2libc/${ARCH}/ELF-${ARCH}-debug.hex2 \ -f ./artifact/get_machine.hex2 \ -o ./bin/get_machine ############################################ # Phase-15 Build M2-Planet from M2-Planet # ############################################ ./artifact/M2 --architecture ${ARCH} \ -f ../M2libc/sys/types.h \ -f ../M2libc/stddef.h \ -f ../M2libc/sys/utsname.h \ -f ../M2libc/${ARCH}/linux/unistd.c \ -f ../M2libc/${ARCH}/linux/fcntl.c \ -f ../M2libc/fcntl.c \ -f ../M2libc/stdlib.c \ -f ../M2libc/stdio.h \ -f ../M2libc/stdio.c \ -f ../M2libc/bootstrappable.c \ -f ../M2-Planet/cc.h \ -f ../M2-Planet/cc_globals.c \ -f ../M2-Planet/cc_reader.c \ -f ../M2-Planet/cc_strings.c \ -f ../M2-Planet/cc_types.c \ -f ../M2-Planet/cc_core.c \ -f ../M2-Planet/cc_macro.c \ -f ../M2-Planet/cc.c \ --debug \ -o ./artifact/M2-1.M1 ./bin/blood-elf ${ENDIAN_FLAG} ${BLOOD_FLAG} -f ./artifact/M2-1.M1 -o ./artifact/M2-1-footer.M1 ./bin/M1 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ -f ../M2libc/${ARCH}/${ARCH}_defs.M1 \ -f ../M2libc/${ARCH}/libc-full.M1 \ -f ./artifact/M2-1.M1 \ -f ./artifact/M2-1-footer.M1 \ -o ./artifact/M2-1.hex2 ./bin/hex2 --architecture ${ARCH} \ ${ENDIAN_FLAG} \ --base-address ${BASE_ADDRESS} \ -f ../M2libc/${ARCH}/ELF-${ARCH}-debug.hex2 \ -f ./artifact/M2-1.hex2 \ -o ./bin/M2-Planet cd ..
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include <stdlib.h> #include <stdio.h> #include <string.h> #define FALSE 0 #define TRUE 1 int in_set(int c, char* s); int match(char* a, char* b); void require(int bool, char* error); char* int2str(int x, int base, int signed_p); void reset_hold_string(); struct type { struct type* next; int size; int offset; int is_signed; struct type* indirect; struct type* members; struct type* type; char* name; }; struct token_list { struct token_list* next; union { struct token_list* locals; struct token_list* prev; }; char* s; union { struct type* type; char* filename; }; union { struct token_list* arguments; struct token_list* expansion; int depth; int linenumber; }; }; #include "cc_globals.h"
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ /* What types we have */ struct type* global_types; struct type* prim_types; /* What we are currently working on */ struct token_list* global_token; /* Output reorder collections*/ struct token_list* output_list; struct token_list* strings_list; struct token_list* globals_list; /* Make our string collection more efficient */ char* hold_string; int string_index; int MAX_STRING; /* enable preprocessor-only mode */ int PREPROCESSOR_MODE; /* enable spawn behavior to be effective */ char* M2LIBC_PATH; char* Architecture; char* OperatingSystem; int WORDSIZE; int ENDIAN; char* BASEADDRESS; int STDIO_USED; char* TEMPDIR; /* So we don't shoot ourself in the face */ int FUZZING; int DIRTY_MODE; int DEBUG_LEVEL;
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include"cc.h" #include <sys/utsname.h> void init_macro_env(char* sym, char* value, char* source, int num); char* env_lookup(char* variable); void clear_string(char* s); struct utsname* get_uname_data() { struct utsname* unameData = calloc(1, sizeof(struct utsname)); require(NULL != unameData, "unameData calloc failed\n"); uname(unameData); if(4 <= DEBUG_LEVEL) { fputs("utsname details: ", stderr); fputs(unameData->sysname, stderr); fputc(' ', stderr); fputs(unameData->machine, stderr); fputc('\n', stderr); } return unameData; } void setup_env() { if(2 <= DEBUG_LEVEL) fputs("Starting setup_env\n", stderr); char* ARCH; if(NULL != Architecture) { ARCH = Architecture; } else { ARCH = NULL; struct utsname* unameData = get_uname_data(); if(match("i386", unameData->machine) || match("i486", unameData->machine) || match("i586", unameData->machine) || match("i686", unameData->machine) || match("i686-pae", unameData->machine)) ARCH = "x86"; else if(match("x86_64", unameData->machine)) ARCH = "amd64"; else ARCH = unameData->machine; if(3 <= DEBUG_LEVEL) { fputs("Architecture selected: ", stderr); fputs(ARCH, stderr); fputc('\n', stderr); } /* Check for override */ char* hold = env_lookup("ARCHITECTURE_OVERRIDE"); if(NULL != hold) { ARCH = hold; if(3 <= DEBUG_LEVEL) { fputs("environmental override for ARCH: ", stderr); fputs(ARCH, stderr); fputc('\n', stderr); } } free(unameData); } /* Set desired architecture */ WORDSIZE = 32; ENDIAN = FALSE; BASEADDRESS = "0x0"; if(match("knight-native", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using knight-native architecture\n", stderr); ENDIAN = TRUE; Architecture = "knight-native"; } else if(match("knight-posix", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using knight-posix architecture\n", stderr); ENDIAN = TRUE; Architecture = "knight-posix"; } else if(match("x86", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using x86 architecture\n", stderr); BASEADDRESS = "0x8048000"; Architecture = "x86"; init_macro_env("__i386__", "1", "--architecture", 0); } else if(match("amd64", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using amd64 architecture\n", stderr); BASEADDRESS = "0x00600000"; Architecture = "amd64"; WORDSIZE = 64; init_macro_env("__x86_64__", "1", "--architecture", 0); } else if(match("armv7l", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using armv7l architecture\n", stderr); BASEADDRESS = "0x10000"; Architecture = "armv7l"; init_macro_env("__arm__", "1", "--architecture", 0); } else if(match("aarch64", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using aarch64 architecture\n", stderr); BASEADDRESS = "0x400000"; Architecture = "aarch64"; WORDSIZE = 64; init_macro_env("__aarch64__", "1", "--architecture", 0); } else if(match("riscv32", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using riscv32 architecture\n", stderr); BASEADDRESS = "0x600000"; Architecture = "riscv32"; init_macro_env("__riscv", "1", "--architecture", 0); init_macro_env("__riscv_xlen", "32", "--architecture", 1); } else if(match("riscv64", ARCH)) { if(4 <= DEBUG_LEVEL) fputs("Using riscv64 architecture\n", stderr); BASEADDRESS = "0x600000"; Architecture = "riscv64"; WORDSIZE = 64; init_macro_env("__riscv", "1", "--architecture", 0); init_macro_env("__riscv_xlen", "64", "--architecture", 1); } else { fputs("Unknown architecture: ", stderr); fputs(ARCH, stderr); fputs(" know values are: knight-native, knight-posix, x86, amd64, armv7l, aarch64, riscv32 and riscv64\n", stderr); exit(EXIT_FAILURE); } /* Setup Operating System */ if(NULL == OperatingSystem) { OperatingSystem = "Linux"; if(3 <= DEBUG_LEVEL) { fputs("Operating System selected: ", stderr); fputs(OperatingSystem, stderr); fputc('\n', stderr); } /* Check for override */ char* hold = env_lookup("OS_OVERRIDE"); if(NULL != hold) { OperatingSystem = hold; if(3 <= DEBUG_LEVEL) { fputs("environmental override for OS: ", stderr); fputs(OperatingSystem, stderr); fputc('\n', stderr); } } } if(match("UEFI", OperatingSystem)) { if(4 <= DEBUG_LEVEL) fputs("Using UEFI\n", stderr); BASEADDRESS = "0x0"; OperatingSystem = "UEFI"; init_macro_env("__uefi__", "1", "--os", 0); } if(2 <= DEBUG_LEVEL) fputs("setup_env successful\n", stderr); } struct Token { /* * For the token linked-list, this stores the token; for the env linked-list * this stores the value of the variable. */ char* value; /* * Used only for the env linked-list. It holds a string containing the * name of the var. */ char* var; /* * This struct stores a node of a singly linked list, store the pointer to * the next node. */ struct Token* next; }; struct Token* env; int array_length(char** array) { int length = 0; while(array[length] != NULL) { length = length + 1; } return length; } /* Search for a variable in the token linked-list */ char* token_lookup(char* variable, struct Token* token) { if(6 <= DEBUG_LEVEL) { fputs("in token_lookup\nLooking for: ", stderr); fputs(variable, stderr); fputc('\n', stderr); } /* Start at the head */ struct Token* n = token; /* Loop over the linked-list */ while(n != NULL) { if(15 <= DEBUG_LEVEL) { fputs(n->var, stderr); fputc('\n', stderr); } if(match(variable, n->var)) { if(6 <= DEBUG_LEVEL) fputs("match found in token_lookup\n", stderr); /* We have found the correct node */ return n->value; /* Done */ } /* Nope, try the next */ n = n->next; } /* We didn't find anything! */ return NULL; } /* Search for a variable in the env linked-list */ char* env_lookup(char* variable) { return token_lookup(variable, env); } char* envp_hold; int envp_index; void reset_envp_hold() { clear_string(envp_hold); envp_index = 0; } void push_env_byte(int c) { envp_hold[envp_index] = c; envp_index = envp_index + 1; require(4096 > envp_index, "Token exceeded 4096 char envp limit\n"); } struct Token* process_env_variable(char* envp_line, struct Token* n) { struct Token* node = calloc(1, sizeof(struct Token)); require(node != NULL, "Memory initialization of node failed\n"); reset_envp_hold(); int i = 0; while(envp_line[i] != '=') { /* Copy over everything up to = to var */ push_env_byte(envp_line[i]); i = i + 1; } node->var = calloc(i + 2, sizeof(char)); require(node->var != NULL, "Memory initialization of n->var in population of env failed\n"); strcpy(node->var, envp_hold); i = i + 1; /* Skip over = */ reset_envp_hold(); while(envp_line[i] != 0) { /* Copy everything else to value */ push_env_byte(envp_line[i]); i = i + 1; } /* Sometimes, we get lines like VAR=, indicating nothing is in the variable */ if(0 == strlen(envp_hold)) { node->value = ""; } else { /* but looks like we got something so, lets use it */ node->value = calloc(strlen(envp_hold) + 2, sizeof(char)); require(node->value != NULL, "Memory initialization of n->var in population of env failed\n"); strcpy(node->value, envp_hold); } node->next = n; return node; } void populate_env(char** envp) { if(2 <= DEBUG_LEVEL) fputs("populate_env started\n", stderr); /* You can't populate a NULL environment */ if(NULL == envp) { if(3 <= DEBUG_LEVEL) fputs("NULL envp\n", stderr); return; } /* avoid empty arrays */ int max = array_length(envp); if(0 == max) { if(3 <= DEBUG_LEVEL) fputs("Empty envp\n", stderr); return; } /* Initialize env and n */ env = NULL; int i; envp_hold = calloc(4096, sizeof(char)); require(envp_hold != NULL, "Memory initialization of envp_hold in population of env failed\n"); char* envp_line = calloc(4096, sizeof(char)); require(envp_line != NULL, "Memory initialization of envp_line in population of env failed\n"); if(3 <= DEBUG_LEVEL) fputs("starting env loop\n", stderr); for(i = 0; i < max; i = i + 1) { /* * envp is weird. * When referencing envp[i]'s characters directly, they were all jumbled. * So just copy envp[i] to envp_line, and work with that - that seems * to fix it. */ clear_string(envp_line); require(4096 > strlen(envp[i]), "envp line exceeds 4096byte limit\n"); strcpy(envp_line, envp[i]); if(9 <= DEBUG_LEVEL) { fputs("trying envp_line: ", stderr); fputs(envp_line, stderr); fputc('\n', stderr); } env = process_env_variable(envp_line, env); if(9 <= DEBUG_LEVEL) { fputs("got var of: ", stderr); fputs(env->var, stderr); fputs("\nAnd value of: ", stderr); fputs(env->value, stderr); fputc('\n', stderr); } } free(envp_line); free(envp_hold); if(3 <= DEBUG_LEVEL) { fputs("\n\nenv loop successful\n", stderr); fputs(int2str(i, 10, FALSE), stderr); fputs(" envp records processed\n\n", stderr); } require(NULL != env, "can't have an empty environment from the creation of a non-null environment\n"); if(2 <= DEBUG_LEVEL) fputs("populate_env successful\n", stderr); }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas <andrius@stikonas.eu> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" char* env_lookup(char* variable); char* int2str(int x, int base, int signed_p); struct visited { struct visited* prev; char* name; }; /* Globals */ FILE* input; struct token_list* token; int line; char* file; struct visited* vision; int previously_seen(char* s) { struct visited* v = vision; while(NULL != v) { if(match(v->name, s)) return TRUE; v = v->prev; } return FALSE; } void just_seen(char* s) { struct visited* hold = calloc(1, sizeof(struct visited)); hold->prev = vision; hold->name = s; vision = hold; } int grab_byte() { int c = fgetc(input); if(10 == c) line = line + 1; return c; } void push_byte(int c) { hold_string[string_index] = c; string_index = string_index + 1; require(MAX_STRING > string_index, "Token exceeded MAX_STRING char limit\nuse --max-string number to increase\n"); } int consume_byte(int c) { push_byte(c); return grab_byte(); } int preserve_string(int c) { int frequent = c; int escape = FALSE; do { if(!escape && '\\' == c ) escape = TRUE; else escape = FALSE; c = consume_byte(c); require(EOF != c, "Unterminated string\n"); } while(escape || (c != frequent)); c = consume_byte(frequent); return c; } void copy_string(char* target, char* source, int max) { int i = 0; while(0 != source[i]) { target[i] = source[i]; i = i + 1; if(i == max) break; } } int preserve_keyword(int c, char* S) { while(in_set(c, S)) { c = consume_byte(c); } return c; } void clear_string(char* s) { int i = 0; while(0 != s[i]) { s[i] = 0; i = i + 1; require(i < MAX_STRING, "string exceeded max string size while clearing string\n"); } } void reset_hold_string() { clear_string(hold_string); string_index = 0; } /* note if this is the first token in the list, head needs fixing up */ struct token_list* eat_token(struct token_list* token) { if(NULL != token->prev) { token->prev->next = token->next; } /* update backlinks */ if(NULL != token->next) { token->next->prev = token->prev; } return token->next; } void new_token(char* s, int size) { struct token_list* current = calloc(1, sizeof(struct token_list)); require(NULL != current, "Exhausted memory while getting token\n"); /* More efficiently allocate memory for string */ current->s = calloc(size, sizeof(char)); require(NULL != current->s, "Exhausted memory while trying to copy a token\n"); copy_string(current->s, s, MAX_STRING); current->prev = token; current->next = token; current->linenumber = line; current->filename = file; token = current; } int get_token(int c) { reset_hold_string(); if(c == EOF) { return c; } else if((32 == c) || (9 == c) || (c == '\n')) { c = consume_byte(c); } else if('#' == c) { c = consume_byte(c); c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"); } else if(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")) { c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_:"); } else if(in_set(c, "<=>|&!^%")) { c = preserve_keyword(c, "<=>|&!^%"); } else if(in_set(c, "'\"")) { c = preserve_string(c); } else if(c == '/') { c = consume_byte(c); if(c == '*') { c = consume_byte(c); while(c != '/') { while(c != '*') { c = consume_byte(c); require(EOF != c, "Hit EOF inside of block comment\n"); } c = consume_byte(c); require(EOF != c, "Hit EOF inside of block comment\n"); } c = consume_byte(c); } else if(c == '/') { while(c != '\n') { c = consume_byte(c); require(EOF != c, "Hit EOF inside of line comment\n"); } c = consume_byte(c); } else if(c == '=') { c = consume_byte(c); } } else if(c == '*') { c = consume_byte(c); if(c == '=') { c = consume_byte(c); } } else if(c == '+') { c = consume_byte(c); if(c == '=') { c = consume_byte(c); } if(c == '+') { c = consume_byte(c); } } else if(c == '-') { c = consume_byte(c); if(c == '=') { c = consume_byte(c); } if(c == '>') { c = consume_byte(c); } if(c == '-') { c = consume_byte(c); } } else { c = consume_byte(c); } return c; } struct token_list* reverse_list(struct token_list* head) { struct token_list* root = NULL; struct token_list* next; while(NULL != head) { next = head->next; head->next = root; root = head; head = next; } return root; } int read_include(int c) { reset_hold_string(); int done = FALSE; int ch; while(!done) { if(c == EOF) { fputs("we don't support EOF as a filename in #include statements\n", stderr); exit(EXIT_FAILURE); } else if((32 == c) || (9 == c) || (c == '\n')) { c = grab_byte(); } else if(('"' == c) || ('<' == c)) { if('<' == c) c = '>'; ch = c; do { c = consume_byte(c); require(EOF != c, "Unterminated filename in #include\n"); } while(c != ch); if('>' == ch) hold_string[0] = '<'; done = TRUE; } } return c; } void insert_file_header(char* name, int line) { char* hold_line = int2str(line, 10, FALSE); reset_hold_string(); strcat(hold_string, "// #FILENAME "); strcat(hold_string, name); strcat(hold_string, " "); strcat(hold_string, hold_line); new_token(hold_string, strlen(hold_string)+2); new_token("\n", 3); } struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename, int include); int include_file(int ch, int include_file) { /* The old state to restore to */ char* hold_filename = file; FILE* hold_input = input; int hold_number; /* The new file to load */ char* new_filename; FILE* new_file; require(EOF != ch, "#include failed to receive filename\n"); /* Remove the #include */ token = token->next; /* Get new filename */ read_include(ch); /* with just a little extra to put in the matching at the end */ new_token(hold_string, string_index + 3); ch = '\n'; new_filename = token->s; /* Remove name from stream */ token = token->next; /* Try to open the file */ if('<' == new_filename[0]) { if(match("stdio.h", new_filename + 1)) STDIO_USED = TRUE; reset_hold_string(); strcat(hold_string, M2LIBC_PATH); strcat(hold_string, "/"); strcat(hold_string, new_filename + 1); strcat(new_filename, ">"); if(match("Linux", OperatingSystem)) { if(NULL == strstr(hold_string, "uefi")) { new_file = fopen(hold_string, "r"); } else { puts("skipping:"); puts(hold_string); return ch; } } else if(match("UEFI", OperatingSystem)) { if(NULL == strstr(hold_string, "linux")) { new_file = fopen(hold_string, "r"); } else { puts("skipping:"); puts(hold_string); return ch; } } else { puts("unknown host"); exit(EXIT_FAILURE); } } else { if(match("M2libc/bootstrappable.h", new_filename+1)) { reset_hold_string(); strcat(hold_string, M2LIBC_PATH); strcat(hold_string, "/bootstrappable.h"); new_file = fopen(hold_string, "r"); } else new_file = fopen(new_filename+1, "r"); strcat(new_filename, "\""); } /* prevent multiple visits */ if(previously_seen(new_filename)) return ch; just_seen(new_filename); /* special case this compatibility crap */ if(match("\"../gcc_req.h\"", new_filename) || match("\"gcc_req.h\"", new_filename)) return ch; if(include_file) { fputs("reading file: ", stderr); fputs(new_filename, stderr); fputc('\n', stderr); } /* catch garbage input */ if(NULL == new_file) { fputs("unable to read file: ", stderr); fputs(new_filename, stderr); fputs("\nAborting hard!\n", stderr); exit(EXIT_FAILURE); } /* protect our current line number */ hold_number = line + 1; /* Read the new file */ if(include_file) read_all_tokens(new_file, token, new_filename, include_file); /* put back old file info */ insert_file_header(hold_filename, hold_number); /* resume reading old file */ input = hold_input; line = hold_number; file = hold_filename; return ch; } struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename, int include) { token = current; insert_file_header(filename, 1); input = a; line = 1; file = filename; int ch = grab_byte(); while(EOF != ch) { ch = get_token(ch); new_token(hold_string, string_index + 2); if(match("#include", token->s)) ch = include_file(ch, include); } return token; }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include"cc.h" #include <unistd.h> #include <sys/wait.h> #define MAX_ARRAY 256 char* env_lookup(char* variable); /* Function to find a character in a string */ char* find_char(char* string, char a) { if(0 == string[0]) { return NULL; } while(a != string[0]) { string = string + 1; if(0 == string[0]) { return string; } } return string; } /* Find the full path to an executable */ char* find_executable(char* name) { char* PATH = env_lookup("PATH"); require(NULL != PATH, "No PATH found\nAborting\n"); if(match("", name)) { return NULL; } if(('.' == name[0]) || ('/' == name[0])) { /* assume names that start with . or / are relative or absolute */ return name; } char* trial = calloc(MAX_STRING, sizeof(char)); char* MPATH = calloc(MAX_STRING, sizeof(char)); /* Modified PATH */ require(MPATH != NULL, "Memory initialization of MPATH in find_executable failed\n"); strcpy(MPATH, PATH); FILE* t; char* next = find_char(MPATH, ':'); int index; int offset; int mpath_length; int name_length; int trial_length; while(NULL != next) { /* Reset trial */ trial_length = strlen(trial); for(index = 0; index < trial_length; index = index + 1) { trial[index] = 0; } next[0] = 0; /* prepend_string(MPATH, prepend_string("/", name)) */ mpath_length = strlen(MPATH); for(index = 0; index < mpath_length; index = index + 1) { require(MAX_STRING > index, "Element of PATH is too long\n"); trial[index] = MPATH[index]; } trial[index] = '/'; offset = strlen(trial); name_length = strlen(name); for(index = 0; index < name_length; index = index + 1) { require(MAX_STRING > index, "Element of PATH is too long\n"); trial[index + offset] = name[index]; } /* Try the trial */ trial_length = strlen(trial); require(trial_length < MAX_STRING, "COMMAND TOO LONG!\nABORTING HARD\n"); t = fopen(trial, "r"); if(NULL != t) { fclose(t); return trial; } MPATH = next + 1; next = find_char(MPATH, ':'); } return NULL; } void sanity_command_check(char** array) { int i = 0; char* s = array[0]; while(NULL != s) { fputs(s, stderr); fputc(' ', stderr); i = i + 1; s = array[i]; } fputc('\n', stderr); } int what_exit(char* program, int status) { /*********************************************************************************** * If the low-order 8 bits of w_status are equal to 0x7F or zero, the child * * process has stopped. If the low-order 8 bits of w_status are non-zero and are * * not equal to 0x7F, the child process terminated due to a signal otherwise, the * * child process terminated due to an exit() call. * * * * In the event it was a signal that stopped the process the top 8 bits of * * w_status contain the signal that caused the process to stop. * * * * In the event it was terminated the bottom 7 bits of w_status contain the * * terminating error number for the process. * * * * If bit 0x80 of w_status is set, a core dump was produced. * ***********************************************************************************/ if(DEBUG_LEVEL > 6) { fputs("in what_exit with char* program of: ", stderr); fputs(program, stderr); fputs("\nAnd int status of: 0x", stderr); fputs(int2str(status, 16, FALSE), stderr); fputc('\n', stderr); } int WIFEXITED = !(status & 0x7F); int WEXITSTATUS = (status & 0xFF00) >> 8; int WTERMSIG = status & 0x7F; int WCOREDUMP = status & 0x80; int WIFSIGNALED = !((0x7F == WTERMSIG) || (0 == WTERMSIG)); int WIFSTOPPED = ((0x7F == WTERMSIG) && (0 == WCOREDUMP)); if(WIFEXITED) { if(DEBUG_LEVEL > 2) { fputc('\n', stderr); fputs(program, stderr); fputs(" normal termination, exit status = ", stderr); fputs(int2str(WEXITSTATUS, 10, TRUE), stderr); fputc('\n', stderr); } return WEXITSTATUS; } else if (WIFSIGNALED) { fputc('\n', stderr); fputs(program, stderr); fputs(" abnormal termination, signal number = ", stderr); fputs(int2str(WTERMSIG, 10, TRUE), stderr); fputc('\n', stderr); if(WCOREDUMP) fputs("core dumped\n", stderr); return WTERMSIG; } else if(WIFSTOPPED) { fputc('\n', stderr); fputs(program, stderr); fputs(" child stopped, signal number = ", stderr); fputs(int2str(WEXITSTATUS, 10, TRUE), stderr); fputc('\n', stderr); return WEXITSTATUS; } fputc('\n', stderr); fputs(program, stderr); fputs(" :: something crazy happened with execve\nI'm just gonna get the hell out of here\n", stderr); exit(EXIT_FAILURE); } void _execute(char* name, char** array, char** envp) { int status; /* i.e. return code */ /* Get the full path to the executable */ char* program = find_executable(name); /* Check we can find the executable */ if(NULL == program) { fputs("WHILE EXECUTING ", stderr); fputs(name, stderr); fputs(" NOT FOUND!\nABORTING HARD\n", stderr); exit(EXIT_FAILURE); } sanity_command_check(array); int result; #ifdef __uefi__ result = spawn(program, array, envp); #else int f = fork(); /* Ensure fork succeeded */ if(f == -1) { fputs("WHILE EXECUTING ", stderr); fputs(name, stderr); fputs("fork() FAILED\nABORTING HARD\n", stderr); exit(EXIT_FAILURE); } else if(f == 0) { /* Child */ /************************************************************** * Fuzzing produces random stuff; we don't want it running * * dangerous commands. So we just don't execve. * **************************************************************/ if(FALSE == FUZZING) { /* We are not fuzzing */ /* execve() returns only on error */ execve(program, array, envp); fputs("Unable to execute: ", stderr); fputs(program, stderr); fputs("\nPlease check file permissions and that it is a valid binary\n", stderr); } /* Prevent infinite loops */ _exit(EXIT_FAILURE); } /* Otherwise we are the parent */ /* And we should wait for it to complete */ waitpid(f, &status, 0); result = what_exit(program, status); #endif if(0 != result) { fputs("Subprocess: ", stderr); fputs(program, stderr); fputs(" error\nAborting for safety\n", stderr); exit(result); } } void insert_array(char** array, int index, char* string) { int size = strlen(string); array[index] = calloc(size+2, sizeof(char)); strcpy(array[index], string); } void spawn_hex2(char* input, char* output, char* architecture, char** envp, int debug) { char* hex2; #ifdef __uefi__ hex2 = "hex2.efi"; #else hex2 = "hex2"; #endif char* elf_header = calloc(MAX_STRING, sizeof(char)); elf_header = strcat(elf_header, M2LIBC_PATH); elf_header = strcat(elf_header, "/"); elf_header = strcat(elf_header, architecture); if(match("UEFI", OperatingSystem)) elf_header = strcat(elf_header, "/uefi/PE32-"); else elf_header = strcat(elf_header, "/ELF-"); elf_header = strcat(elf_header, architecture); if(debug) { elf_header = strcat(elf_header, "-debug.hex2"); } else { elf_header = strcat(elf_header, ".hex2"); } fputs("# starting hex2 linking\n", stdout); char** array = calloc(MAX_ARRAY, sizeof(char*)); insert_array(array, 0, hex2); insert_array(array, 1, "--file"); insert_array(array, 2, elf_header); insert_array(array, 3, "--file"); insert_array(array, 4, input); insert_array(array, 5, "--output"); insert_array(array, 6, output); insert_array(array, 7, "--architecture"); insert_array(array, 8, architecture); insert_array(array, 9, "--base-address"); insert_array(array, 10, BASEADDRESS); if(ENDIAN) { insert_array(array, 11, "--big-endian"); } else { insert_array(array, 11, "--little-endian"); } _execute(hex2, array, envp); } void spawn_M1(char* input, char* debug_file, char* output, char* architecture, char** envp, int debug_flag) { char* M1; #ifdef __uefi__ M1 = "M1.efi"; #else M1 = "M1"; #endif fputs("# starting M1 assembly\n", stdout); char* definitions = calloc(MAX_STRING, sizeof(char)); definitions = strcat(definitions, M2LIBC_PATH); definitions = strcat(definitions, "/"); definitions = strcat(definitions, architecture); definitions = strcat(definitions, "/"); definitions = strcat(definitions, architecture); definitions = strcat(definitions, "_defs.M1"); char* libc = calloc(MAX_STRING, sizeof(char)); libc = strcat(libc, M2LIBC_PATH); libc = strcat(libc, "/"); libc = strcat(libc, architecture); if(match("UEFI", OperatingSystem)) { libc = strcat(libc, "/uefi/libc-full.M1"); } else if(STDIO_USED) { libc = strcat(libc, "/libc-full.M1"); } else { libc = strcat(libc, "/libc-core.M1"); } char** array = calloc(MAX_ARRAY, sizeof(char*)); insert_array(array, 0, M1); insert_array(array, 1, "--file"); insert_array(array, 2, definitions); insert_array(array, 3, "--file"); insert_array(array, 4, libc); insert_array(array, 5, "--file"); insert_array(array, 6, input); if(ENDIAN) { insert_array(array, 7, "--big-endian"); } else { insert_array(array, 7, "--little-endian"); } insert_array(array, 8, "--architecture"); insert_array(array, 9, architecture); if(debug_flag) { insert_array(array, 10, "--file"); insert_array(array, 11, debug_file); insert_array(array, 12, "--output"); insert_array(array, 13, output); } else { insert_array(array, 10, "--output"); insert_array(array, 11, output); } _execute(M1, array, envp); } void spawn_blood_elf(char* input, char* output, char** envp, int large_flag) { char* blood_elf; #ifdef __uefi__ blood_elf = "blood-elf.efi"; #else blood_elf = "blood-elf"; #endif fputs("# starting Blood-elf stub generation\n", stdout); char** array = calloc(MAX_ARRAY, sizeof(char*)); insert_array(array, 0,blood_elf); insert_array(array, 1, "--file"); insert_array(array, 2, input); if(ENDIAN) { insert_array(array, 3, "--big-endian"); } else { insert_array(array, 3, "--little-endian"); } insert_array(array, 4, "--output"); insert_array(array, 5, output); if(large_flag) insert_array(array, 6, "--64"); _execute(blood_elf, array, envp); } void spawn_M2(char* input, char* output, char* architecture, char** envp, int debug_flag) { char* M2_Planet; #ifdef __uefi__ M2_Planet = "M2-Planet.efi"; #else M2_Planet = "M2-Planet"; #endif fputs("# starting M2-Planet build\n", stdout); char** array = calloc(MAX_ARRAY, sizeof(char*)); insert_array(array, 0, M2_Planet); insert_array(array, 1, "--file"); insert_array(array, 2, input); insert_array(array, 3, "--output"); insert_array(array, 4, output); insert_array(array, 5, "--architecture"); insert_array(array, 6, architecture); if(debug_flag) insert_array(array, 7, "--debug"); _execute(M2_Planet, array, envp); } void spawn_processes(int debug_flag, char* prefix, char* preprocessed_file, char* destination, char** envp) { int large_flag = FALSE; if(WORDSIZE > 32) large_flag = TRUE; if(match("UEFI", OperatingSystem)) { debug_flag = FALSE; } char* M2_output = calloc(100, sizeof(char)); strcpy(M2_output, prefix); strcat(M2_output, "/M2-Planet-XXXXXX"); int i = mkstemp(M2_output); if(-1 != i) { close(i); spawn_M2(preprocessed_file, M2_output, Architecture, envp, debug_flag); } else { fputs("unable to get a tempfile for M2-Planet output\n", stderr); exit(EXIT_FAILURE); } char* blood_output = ""; if(debug_flag) { blood_output = calloc(100, sizeof(char)); strcpy(blood_output, prefix); strcat(blood_output, "/blood-elf-XXXXXX"); i = mkstemp(blood_output); if(-1 != i) { close(i); spawn_blood_elf(M2_output, blood_output, envp, large_flag); } else { fputs("unable to get a tempfile for blood-elf output\n", stderr); exit(EXIT_FAILURE); } } char* M1_output = calloc(100, sizeof(char)); strcpy(M1_output, prefix); strcat(M1_output, "/M1-macro-XXXXXX"); i = mkstemp(M1_output); if(-1 != i) { close(i); spawn_M1(M2_output, blood_output, M1_output, Architecture, envp, debug_flag); } else { fputs("unable to get a tempfile for M1 output\n", stderr); exit(EXIT_FAILURE); } /* We no longer need the M2-Planet tempfile output */ if(!DIRTY_MODE) remove(M2_output); /* Nor the blood-elf output anymore if it exists */ if(!match("", blood_output)) { if(!DIRTY_MODE) remove(blood_output); } /* Build the final binary */ spawn_hex2(M1_output, destination, Architecture, envp, debug_flag); /* clean up after ourselves*/ if(!DIRTY_MODE) remove(M1_output); }
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2018 Jan (janneke) Nieuwenhuizen <janneke@gnu.org> * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" #include "gcc_req.h" #include <stdint.h> /* Imported functions */ char* int2str(int x, int base, int signed_p); void line_error_token(struct token_list *token) { if(NULL == token) { fputs("EOF reached inside of line_error\n", stderr); fputs("problem at end of file\n", stderr); return; } fputs(token->filename, stderr); fputs(":", stderr); fputs(int2str(token->linenumber, 10, TRUE), stderr); fputs(":", stderr); } void line_error() { line_error_token(global_token); } void require_match(char* message, char* required) { require(NULL != global_token, "EOF reached inside of require match\n"); if(!match(global_token->s, required)) { line_error(); fputs(message, stderr); exit(EXIT_FAILURE); } global_token = global_token->next; require(NULL != global_token, "EOF after require match occurred\n"); } void output_tokens(struct token_list *i, FILE* out) { while(NULL != i) { fputs(i->s, out); i = i->next; } }
/* Copyright (C) 2021 Sanne Wouda * Copyright (C) 2021 Andrius Å tikonas <andrius@stikonas.eu> * Copyright (C) 2022 Jan (janneke) Nieuwenhuizen <janneke@gnu.org> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include "cc.h" #include "gcc_req.h" void require(int bool, char* error); int strtoint(char* a); void line_error_token(struct token_list* list); struct token_list* eat_token(struct token_list* head); struct token_list* reverse_list(struct token_list* head); struct conditional_inclusion { struct conditional_inclusion* prev; int include; /* 1 == include, 0 == skip */ int previous_condition_matched; /* 1 == all subsequent conditions treated as FALSE */ }; struct macro_list { struct macro_list* next; char* symbol; struct token_list* expansion; struct token_list* arguments; }; struct macro_list* macro_env; struct conditional_inclusion* conditional_inclusion_top; /* point where we are currently modifying the global_token list */ struct token_list* macro_token; void init_macro_env(char* sym, char* value, char* source, int num) { struct macro_list* hold = macro_env; macro_env = calloc(1, sizeof(struct macro_list)); macro_env->symbol = sym; macro_env->next = hold; macro_env->expansion = calloc(1, sizeof(struct token_list)); macro_env->expansion->s = value; macro_env->expansion->filename = source; macro_env->expansion->linenumber = num; } void _eat_current_token(int eat_whitespace) { int update_global_token = FALSE; if (macro_token == global_token) update_global_token = TRUE; macro_token = eat_token(macro_token); if(eat_whitespace) { while (macro_token->s[0] == ' ') macro_token = eat_token(macro_token); } if(update_global_token) global_token = macro_token; } void eat_current_token() { _eat_current_token(TRUE); } void eat_current_token_without_space() { _eat_current_token(FALSE); } struct token_list* lookup_token(struct token_list* token, struct token_list* arguments) { if(NULL == token) { fputs("null token received in token\n", stderr); exit(EXIT_FAILURE); } struct token_list* hold = arguments; while (NULL != hold) { if (match(token->s, hold->s)) { /* found! */ return hold->expansion; } hold = hold->next; } /* not found! */ return NULL; } /* returns the first token inserted; inserts *before* point */ struct token_list* insert_tokens(struct token_list* point, struct token_list* token) { struct token_list* copy; struct token_list* first = NULL; while (NULL != token) { copy = calloc(1, sizeof(struct token_list)); copy->s = token->s; copy->filename = token->filename; copy->linenumber = token->linenumber; if(NULL == first) { first = copy; } copy->next = point; if (NULL != point) { copy->prev = point->prev; if(NULL != point->prev) { point->prev->next = copy; } point->prev = copy; } token = token->next; } return first; } /* returns the first token inserted; inserts *before* point */ struct token_list* copy_list(struct token_list* token) { struct token_list* copy; struct token_list* prev = NULL; while (NULL != token) { copy = calloc(1, sizeof(struct token_list)); copy->s = token->s; copy->next = prev; copy->prev = prev; prev = copy; token = token->next; } copy = reverse_list(copy); return copy; } struct macro_list* lookup_macro(struct token_list* token) { if(NULL == token) { line_error_token(macro_token); fputs("null token received in lookup_macro\n", stderr); exit(EXIT_FAILURE); } struct macro_list* hold = macro_env; while (NULL != hold) { if (match(token->s, hold->symbol)) { /* found! */ return hold; } hold = hold->next; } /* not found! */ return NULL; } void remove_macro(struct token_list* token) { if(NULL == token) { line_error_token(macro_token); fputs("received a null in remove_macro\n", stderr); exit(EXIT_FAILURE); } struct macro_list* hold = macro_env; struct macro_list* temp; /* Deal with the first element */ if (match(token->s, hold->symbol)) { macro_env = hold->next; free(hold); return; } /* Remove element form the middle of linked list */ while (NULL != hold->next) { if (match(token->s, hold->next->symbol)) { temp = hold->next; hold->next = hold->next->next; free(temp); return; } hold = hold->next; } /* nothing to undefine */ return; } int macro_expression(); int macro_variable() { int value = 0; struct macro_list* hold = lookup_macro(macro_token); if (NULL != hold) { if(NULL == hold->expansion) { line_error_token(macro_token); fputs("hold->expansion is a null\n", stderr); exit(EXIT_FAILURE); } value = strtoint(hold->expansion->s); } eat_current_token(); return value; } int macro_number() { int result = strtoint(macro_token->s); eat_current_token(); return result; } int macro_primary_expr() { int defined_has_paren = FALSE; int hold; require(NULL != macro_token, "got an EOF terminated macro primary expression\n"); if('-' == macro_token->s[0]) { eat_current_token(); return -macro_primary_expr(); } else if('!' == macro_token->s[0]) { eat_current_token(); return !macro_primary_expr(); } else if('(' == macro_token->s[0]) { eat_current_token(); hold = macro_expression(); require(')' == macro_token->s[0], "missing ) in macro expression\n"); eat_current_token(); return hold; } else if(match("defined", macro_token->s)) { eat_current_token(); require(NULL != macro_token, "got an EOF terminated macro defined expression\n"); if('(' == macro_token->s[0]) { defined_has_paren = TRUE; eat_current_token(); } if (NULL != lookup_macro(macro_token)) { hold = TRUE; } else { hold = FALSE; } eat_current_token(); if(TRUE == defined_has_paren) { if(NULL == macro_token) { line_error_token(macro_token); fputs("unterminated define ( statement\n", stderr); exit(EXIT_FAILURE); } require(')' == macro_token->s[0], "missing close parenthesis for defined()\n"); eat_current_token(); } return hold; } else if(in_set(macro_token->s[0], "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")) { return macro_variable(); } else if(in_set(macro_token->s[0], "0123456789")) { return macro_number(); } else { return 0; /* FIXME: error handling */ } } int macro_additive_expr() { int lhs = macro_primary_expr(); int hold; require(NULL != macro_token, "got an EOF terminated macro additive expression\n"); if(match("+", macro_token->s)) { eat_current_token(); return lhs + macro_additive_expr(); } else if(match("-", macro_token->s)) { eat_current_token(); return lhs - macro_additive_expr(); } else if(match("*", macro_token->s)) { eat_current_token(); return lhs * macro_additive_expr(); } else if(match("/", macro_token->s)) { eat_current_token(); hold = macro_additive_expr(); require(0 != hold, "divide by zero not valid even in C macros\n"); return lhs / hold; } else if(match("%", macro_token->s)) { eat_current_token(); hold = macro_additive_expr(); require(0 != hold, "modulus by zero not valid even in C macros\n"); return lhs % hold; } else if(match(">>", macro_token->s)) { eat_current_token(); return lhs >> macro_additive_expr(); } else if(match("<<", macro_token->s)) { eat_current_token(); return lhs << macro_additive_expr(); } else { return lhs; } } int macro_relational_expr() { int lhs = macro_additive_expr(); if(match("<", macro_token->s)) { eat_current_token(); return lhs < macro_relational_expr(); } else if(match("<=", macro_token->s)) { eat_current_token(); return lhs <= macro_relational_expr(); } else if(match(">=", macro_token->s)) { eat_current_token(); return lhs >= macro_relational_expr(); } else if(match(">", macro_token->s)) { eat_current_token(); return lhs > macro_relational_expr(); } else if(match("==", macro_token->s)) { eat_current_token(); return lhs == macro_relational_expr(); } else if(match("!=", macro_token->s)) { eat_current_token(); return lhs != macro_relational_expr(); } else { return lhs; } } int macro_bitwise_expr() { int rhs; int lhs = macro_relational_expr(); if(match("&", macro_token->s)) { eat_current_token(); return lhs & macro_bitwise_expr(); } else if(match("&&", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs && rhs; } else if(match("|", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs | rhs; } else if(match("||", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs || rhs; } else if(match("^", macro_token->s)) { eat_current_token(); rhs = macro_bitwise_expr(); return lhs ^ rhs; } else { return lhs; } } int macro_expression() { return macro_bitwise_expr(); } void handle_define() { struct macro_list* hold; struct token_list* arg; struct token_list* expansion_end = NULL; /* don't use #define statements from non-included blocks */ int conditional_define = TRUE; if(NULL != conditional_inclusion_top) { if(FALSE == conditional_inclusion_top->include) { conditional_define = FALSE; } } eat_current_token(); require(NULL != macro_token, "got an EOF terminated #define\n"); require('\n' != macro_token->s[0], "unexpected newline after #define\n"); /* insert new macro */ hold = calloc(1, sizeof(struct macro_list)); hold->symbol = macro_token->s; hold->next = macro_env; /* provided it isn't in a non-included block */ if(conditional_define) macro_env = hold; /* discard the macro name */ eat_current_token_without_space(); /* Handle macro arguments */ if(macro_token->s[0] == '(') { /* discard ( */ eat_current_token(); require(NULL != macro_token, "got an EOF terminated #define\n"); if(macro_token->s[0] != ')') { arg = calloc(1, sizeof(struct token_list)); arg->s = macro_token->s; hold->arguments = arg; eat_current_token(); require(NULL != macro_token, "incomplete macro call\n"); while(macro_token->s[0] == ',') { eat_current_token(); require(NULL != macro_token, "incomplete macro call, got an EOF instead of an argument\n"); arg = calloc(1, sizeof(struct token_list)); arg->s = macro_token->s; arg->next = hold->arguments; hold->arguments = arg; eat_current_token(); require(NULL != macro_token, "incomplete macro call\n"); } } eat_current_token(); /* Reverse argument list */ hold->arguments = reverse_list(hold->arguments); require(NULL != macro_token, "got an EOF terminated #define\n"); } else if(macro_token->s[0] == ' ') { eat_current_token(); } while (TRUE) { require(NULL != macro_token, "got an EOF terminated #define\n"); if ('\n' == macro_token->s[0]) { if(NULL == expansion_end) { hold->expansion = NULL; expansion_end = macro_token; return; } expansion_end->next = NULL; return; } else if(('/' == macro_token->s[0]) && ('*' == macro_token->s[1])) { eat_current_token(); continue; } else if(('/' == macro_token->s[0]) && ('/' == macro_token->s[1])) { macro_token->s = "\n"; if(NULL == expansion_end) { hold->expansion = NULL; expansion_end = macro_token; return; } expansion_end->next = NULL; return; } if(NULL == hold) { eat_current_token(); continue; } expansion_end = macro_token; /* in the first iteration, we set the first token of the expansion, if it exists */ if (NULL == hold->expansion) { hold->expansion = macro_token; } /* throw away if not used */ if(!conditional_define && (NULL != hold)) { free(hold); hold = NULL; } eat_current_token(); } } void handle_undef() { eat_current_token(); remove_macro(macro_token); eat_current_token(); } void handle_error(int warning_p) { /* don't use #error statements from non-included blocks */ int conditional_error = TRUE; if(NULL != conditional_inclusion_top) { if(FALSE == conditional_inclusion_top->include) { conditional_error = FALSE; } } eat_current_token(); /* provided it isn't in a non-included block */ if(conditional_error) { line_error_token(macro_token); if(warning_p) fputs(" warning: #warning ", stderr); else fputs(" error: #error ", stderr); while (TRUE) { if ('\n' == macro_token->s[0]) break; fputs(macro_token->s, stderr); macro_token = macro_token->next; } fputs("\n", stderr); if(!warning_p) exit(EXIT_FAILURE); } while (TRUE) { /* discard the error */ if ('\n' == macro_token->s[0]) { return; } eat_current_token(); } } void macro_directive() { struct conditional_inclusion *t; int result; /* FIXME: whitespace is allowed between "#"" and "if" */ if(match("#if", macro_token->s)) { eat_current_token(); /* evaluate constant integer expression */ result = macro_expression(); /* push conditional inclusion */ t = calloc(1, sizeof(struct conditional_inclusion)); t->prev = conditional_inclusion_top; conditional_inclusion_top = t; t->include = TRUE; if(FALSE == result) { t->include = FALSE; } t->previous_condition_matched = t->include; } else if(match("#ifdef", macro_token->s)) { eat_current_token(); require(NULL != macro_token, "got an EOF terminated macro defined expression\n"); if (NULL != lookup_macro(macro_token)) { result = TRUE; } else { result = FALSE; } eat_current_token(); /* push conditional inclusion */ t = calloc(1, sizeof(struct conditional_inclusion)); t->prev = conditional_inclusion_top; conditional_inclusion_top = t; t->include = TRUE; if(FALSE == result) { t->include = FALSE; } t->previous_condition_matched = t->include; } else if(match("#ifndef", macro_token->s)) { eat_current_token(); require(NULL != macro_token, "got an EOF terminated macro defined expression\n"); if (NULL != lookup_macro(macro_token)) { result = FALSE; } else { result = TRUE; } eat_current_token(); /* push conditional inclusion */ t = calloc(1, sizeof(struct conditional_inclusion)); t->prev = conditional_inclusion_top; conditional_inclusion_top = t; t->include = TRUE; if(FALSE == result) { t->include = FALSE; } t->previous_condition_matched = t->include; } else if(match("#elif", macro_token->s)) { eat_current_token(); result = macro_expression(); require(NULL != conditional_inclusion_top, "#elif without leading #if\n"); conditional_inclusion_top->include = result && !conditional_inclusion_top->previous_condition_matched; conditional_inclusion_top->previous_condition_matched = conditional_inclusion_top->previous_condition_matched || conditional_inclusion_top->include; } else if(match("#else", macro_token->s)) { eat_current_token(); require(NULL != conditional_inclusion_top, "#else without leading #if\n"); conditional_inclusion_top->include = !conditional_inclusion_top->previous_condition_matched; } else if(match("#endif", macro_token->s)) { if(NULL == conditional_inclusion_top) { line_error_token(macro_token); fputs("unexpected #endif\n", stderr); exit(EXIT_FAILURE); } eat_current_token(); /* pop conditional inclusion */ t = conditional_inclusion_top; conditional_inclusion_top = conditional_inclusion_top->prev; free(t); } else if(match("#define", macro_token->s)) { handle_define(); } else if(match("#undef", macro_token->s)) { handle_undef(); } else if(match("#error", macro_token->s)) { handle_error(FALSE); } else if(match("#warning", macro_token->s)) { handle_error(TRUE); } else if(match("#FILENAME", macro_token->s)) { while(TRUE) { if(NULL == macro_token) { return; } if('\n' == macro_token->s[0]) { return; } eat_current_token(); } } else { /* Put a big fat warning but see if we can just ignore */ fputs(">>WARNING<<\n>>WARNING<<\n", stderr); line_error_token(macro_token); fputs("feature: ", stderr); fputs(macro_token->s, stderr); fputs(" unsupported in M2-Planet\nIgnoring line, may result in bugs\n>>WARNING<<\n>>WARNING<<\n\n", stderr); /* unhandled macro directive; let's eat until a newline; om nom nom */ while(TRUE) { if(NULL == macro_token) { return; } if('\n' == macro_token->s[0]) { return; } eat_current_token(); } } } struct token_list* expand_macro_functions(struct token_list* expansion, struct token_list* arguments) { struct token_list* expanded_token; struct token_list* head; struct token_list* hold; /* Same as head unless head == NULL */ head = copy_list(expansion); while(NULL != head) { expanded_token = lookup_token(head, arguments); hold = head; if(NULL != expanded_token) { insert_tokens(head, expanded_token); hold = head->prev; head = eat_token(head); } else { head = head->next; } } while(NULL != hold->prev) hold = hold->prev; return hold; } void eat_until_endif() { /* This #if block is nested inside of an #if block that needs to be dropped, lose EVERYTHING */ do { if(match("#if", macro_token->s) || match("#ifdef", macro_token->s) || match("#ifndef", macro_token->s)) { eat_current_token(); eat_until_endif(); } eat_current_token(); require(NULL != macro_token, "Unterminated #if block\n"); } while(!match("#endif", macro_token->s)); } void eat_block() { /* This conditional #if block is wrong, drop everything until the #elif/#else/#endif */ do { if(match("#if", macro_token->s) || match("#ifdef", macro_token->s) || match("#ifndef", macro_token->s)) { eat_current_token(); eat_until_endif(); } eat_current_token(); require(NULL != macro_token, "Unterminated #if block\n"); } while(!match("#elif", macro_token->s) && !match("#else", macro_token->s) && !match("#endif", macro_token->s)); } struct token_list* maybe_expand(struct token_list* token) { if(NULL == token) { line_error_token(macro_token); fputs("maybe_expand passed a null token\n", stderr); exit(EXIT_FAILURE); } struct macro_list* hold = lookup_macro(token); struct token_list* hold2; struct token_list* hold3; struct token_list* hold4; if(NULL == token->next) { line_error_token(macro_token); fputs("we can't expand a null token: ", stderr); fputs(token->s, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } if (NULL == hold) { return token->next; } if(match("__M2__", token->s)) return token->next; token = eat_token(token); if (NULL == hold->expansion) { return token->next; } /* Match macro arguments with stored names */ hold3 = hold->arguments; if(NULL != hold3) { if(token->s[0] == ' ') { token = eat_token(token); } require('(' == token->s[0], "missing open parenthesis for macro function\n"); token = eat_token(token); require(NULL != token, "got an EOF terminated macro function\n"); do { hold2 = calloc(1, sizeof(struct token_list)); hold2->s = token->s; hold2->next = hold->arguments->expansion; hold->arguments->expansion = hold2; token = eat_token(token); require(NULL != token, "incomplete macro call\n"); if(token->s[0] == ',') { hold->arguments->expansion = reverse_list(hold->arguments->expansion); hold->arguments = hold->arguments->next; require(NULL != hold->arguments, "too many arguments in macro call\n"); token = eat_token(token); require(NULL != token, "incomplete macro call\n"); } } while(token->s[0] != ')'); hold->arguments->expansion = reverse_list(hold->arguments->expansion); hold->arguments = hold3; token = eat_token(token); } hold4 = expand_macro_functions(hold->expansion, hold->arguments); hold4 = insert_tokens(token, hold4); return hold4; } void preprocess() { int start_of_line = TRUE; macro_token = global_token; while(NULL != macro_token) { if(start_of_line && '#' == macro_token->s[0]) { macro_directive(); if(macro_token) { if('\n' != macro_token->s[0]) { line_error_token(macro_token); fputs("newline expected at end of macro directive\n", stderr); fputs("found: '", stderr); fputs(macro_token->s, stderr); fputs("'\n", stderr); exit(EXIT_FAILURE); } } } else if('\n' == macro_token->s[0]) { start_of_line = TRUE; macro_token = macro_token->next; } else { start_of_line = FALSE; if(NULL == conditional_inclusion_top) { macro_token = maybe_expand(macro_token); } else if(!conditional_inclusion_top->include) { /* rewrite the token stream to exclude the current token */ eat_block(); start_of_line = TRUE; } else { macro_token = maybe_expand(macro_token); } } } }
/* Copyright (C) 2016, 2021 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * Copyright (C) 2020 Gabriel Wicki * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include"cc.h" #include <unistd.h> /* The core functions */ void populate_env(char** envp); void setup_env(); char* env_lookup(char* variable); void initialize_types(); struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename, int include); struct token_list* reverse_list(struct token_list* head); void init_macro_env(char* sym, char* value, char* source, int num); void preprocess(); void output_tokens(struct token_list *i, FILE* out); int strtoint(char *a); void spawn_processes(int debug_flag, char* prefix, char* preprocessed_file, char* destination, char** envp); int follow_includes; void prechecks(int argc, char** argv) { int env = 0; char* hold; int i = 1; while(i <= argc) { if(NULL == argv[i]) { i += 1; } else if(match(argv[i], "--debug-mode")) { hold = argv[i+1]; require(NULL != hold, "--debug-mode requires an argument\n"); DEBUG_LEVEL = strtoint(hold); if(0 == DEBUG_LEVEL) { require(match("0", hold), "--debug-mode values must be numbers\n" "and level 0 needed to be expressed as 0\n"); } fputs("DEBUG_LEVEL set to: ", stderr); fputs(hold, stderr); fputc('\n', stderr); i+= 2; } else if(match(argv[i], "-A") || match(argv[i], "--architecture")) { hold = argv[i+1]; require(NULL != hold, "--architecture needs to be passed an architecture\n"); Architecture = hold; i += 2; } else if(match(argv[i], "--os") || match(argv[i], "--operating-system")) { hold = argv[i+1]; require(NULL != hold, "--operating-system needs to be passed an operating system\n"); OperatingSystem = hold; i += 2; } else if(match(argv[i], "--max-string")) { hold = argv[i+1]; require(NULL != hold, "--max-string requires a numeric argument\n"); MAX_STRING = strtoint(hold); require(0 < MAX_STRING, "Not a valid string size\nAbort and fix your --max-string\n"); i += 2; } else if(match(argv[i], "--no-includes")) { follow_includes = FALSE; i+= 1; } else if(match(argv[i], "-I")) { hold = argv[i+1]; if(NULL == hold) { fputs("-I requires a PATH\n", stderr); exit(EXIT_FAILURE); } if(1 <= DEBUG_LEVEL) { fputs("M2LIBC_PATH set by -I to ", stderr); fputs(hold, stderr); fputc('\n', stderr); } M2LIBC_PATH = hold; i += 2; } else if(match(argv[i], "-D")) { hold = argv[i+1]; if(NULL == hold) { fputs("-D requires an argument", stderr); exit(EXIT_FAILURE); } while(0 != hold[0]) { if('=' == hold[0]) { hold[0] = 0; hold = hold + 1; break; } hold = hold + 1; } init_macro_env(argv[i+1], hold, "__ARGV__", env); env = env + 1; i += 2; } else { i += 1; } } } int main(int argc, char** argv, char** envp) { /**************************************************************************** * Zero means no debugging messages and larger positive values means more * * chatty output. Level 15 means EVERYTHING but 7 should cover most magic * ****************************************************************************/ DEBUG_LEVEL = 0; /* Setup __M2__ (It is very very special *DO NOT MESS WITH IT* ) */ init_macro_env("__M2__", "__M2__", "__INTERNAL_M2__", 0); /* Our fun globals */ FUZZING = FALSE; MAX_STRING = 65536; PREPROCESSOR_MODE = FALSE; STDIO_USED = FALSE; DIRTY_MODE = FALSE; Architecture = NULL; OperatingSystem = NULL; /* Our fun locals */ int debug_flag = TRUE; FILE* in = stdin; FILE* tempfile; char* destination_name = "a.out"; FILE* destination_file = stdout; char* name; int DUMP_MODE = FALSE; follow_includes = TRUE; /* Try to get our needed updates */ prechecks(argc, argv); /* Get the environmental bits */ if(1 <= DEBUG_LEVEL) fputs("Starting to setup Environment\n", stderr); populate_env(envp); setup_env(); if(1 <= DEBUG_LEVEL) fputs("Environment setup\n", stderr); M2LIBC_PATH = env_lookup("M2LIBC_PATH"); if(NULL == M2LIBC_PATH) M2LIBC_PATH = "./M2libc"; else if(1 <= DEBUG_LEVEL) { fputs("M2LIBC_PATH set by environment variable to ", stderr); fputs(M2LIBC_PATH, stderr); fputc('\n', stderr); } TEMPDIR = env_lookup("TMPDIR"); if(NULL == TEMPDIR) TEMPDIR = "/tmp"; else if(1 <= DEBUG_LEVEL) { fputs("TEMPDIR set by environment variable to ", stderr); fputs(TEMPDIR, stderr); fputc('\n', stderr); } int i = 1; while(i <= argc) { if(NULL == argv[i]) { i += 1; } else if(match(argv[i], "-E") || match(argv[i], "--preprocess-only")) { PREPROCESSOR_MODE = TRUE; i += 1; } else if(match(argv[i], "--dump-mode")) { DUMP_MODE = TRUE; i+= 1; } else if(match(argv[i], "--dirty-mode")) { DIRTY_MODE = TRUE; i+= 1; } else if(match(argv[i], "--no-includes")) { /* Handled by precheck*/ i+= 1; } else if(match(argv[i], "--debug-mode")) { /* Handled by precheck */ i+= 2; } else if(match(argv[i], "-A") || match(argv[i], "--architecture")) { /* Handled by precheck */ i += 2; } else if(match(argv[i], "--os") || match(argv[i], "--operating-system")) { /* Handled by precheck */ i += 2; } else if(match(argv[i], "-f") || match(argv[i], "--file")) { if(NULL == hold_string) { hold_string = calloc(MAX_STRING + 4, sizeof(char)); require(NULL != hold_string, "Impossible Exhaustion has occured\n"); } name = argv[i + 1]; if(NULL == name) { fputs("did not receive a file name\n", stderr); exit(EXIT_FAILURE); } in = fopen(name, "r"); if(NULL == in) { fputs("Unable to open for reading file: ", stderr); fputs(name, stderr); fputs("\n Aborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } global_token = read_all_tokens(in, global_token, name, follow_includes); fclose(in); i += 2; } else if(match(argv[i], "-o") || match(argv[i], "--output")) { destination_name = argv[i + 1]; require(NULL != destination_name, "--output option requires a filename to follow\n"); destination_file = fopen(destination_name, "w"); if(NULL == destination_file) { fputs("Unable to open for writing file: ", stderr); fputs(argv[i + 1], stderr); fputs("\n Aborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } i += 2; } else if(match(argv[i], "--max-string")) { /* handled by precheck */ i += 2; } else if(match(argv[i], "-I")) { /* Handled by precheck */ i += 2; } else if(match(argv[i], "-D")) { /* Handled by precheck */ i += 2; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs(" -f input file\n -o output file\n --help for this message\n --version for file version\n-E or --preprocess-only\n--max-string N (N is a number)\n--fuzz\n--no-debug\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-V") || match(argv[i], "--version")) { fputs("M2-Mesoplanet v1.11.0\n", stderr); exit(EXIT_SUCCESS); } else if(match(argv[i], "--fuzz")) { /* Set fuzzing */ FUZZING = TRUE; i += 1; } else if(match(argv[i], "--no-debug")) { /* strip things down */ debug_flag = FALSE; i += 1; } else if(match(argv[i], "--temp-directory")) { name = argv[i+1]; if(NULL == name) { fputs("--temp-directory requires a PATH\n", stderr); exit(EXIT_FAILURE); } if(1 <= DEBUG_LEVEL) { fputs("TEMPDIR set by --temp-directory to ", stderr); fputs(name, stderr); fputc('\n', stderr); } TEMPDIR = name; i += 2; } else { if(5 <= DEBUG_LEVEL) { fputs("on index: ", stderr); fputs(int2str(i, 10, TRUE), stderr); fputc('\n', stderr); } fputs("UNKNOWN ARGUMENT: ", stdout); fputs(argv[i], stdout); fputc('\n', stdout); exit(EXIT_FAILURE); } } if(1 <= DEBUG_LEVEL) fputs("READ all files\n", stderr); /* Deal with special case of wanting to read from standard input */ if(stdin == in) { hold_string = calloc(MAX_STRING, sizeof(char)); require(NULL != hold_string, "Impossible Exhaustion has occured\n"); global_token = read_all_tokens(in, global_token, "STDIN", follow_includes); } if(NULL == global_token) { fputs("Either no input files were given or they were empty\n", stderr); exit(EXIT_FAILURE); } if(1 <= DEBUG_LEVEL) fputs("Start to reverse list\n", stderr); global_token = reverse_list(global_token); if(1 <= DEBUG_LEVEL) fputs("List reversed\n", stderr); if(DUMP_MODE) { output_tokens(global_token, destination_file); exit(EXIT_SUCCESS); } preprocess(); if(PREPROCESSOR_MODE) { fputs("/* M2-Mesoplanet Preprocessed source */\n", destination_file); output_tokens(global_token, destination_file); fclose(destination_file); } else { /* Ensure we can write to the temp directory */ int permissions = access(TEMPDIR, 0); if(0 != permissions) { fputs("unable to access: ", stderr); fputs(TEMPDIR, stderr); fputs(" for use as a temp directory\nPlease use --temp-directory to set a directory you can use or set the TMPDIR variable\n", stderr); exit(EXIT_FAILURE); } name = calloc(100, sizeof(char)); strcpy(name, TEMPDIR); strcat(name, "/M2-Mesoplanet-XXXXXX"); i = mkstemp(name); tempfile = fdopen(i, "w"); if(NULL != tempfile) { /* Our preprocessed crap */ output_tokens(global_token, tempfile); fclose(tempfile); /* Make me a real binary */ spawn_processes(debug_flag, TEMPDIR, name, destination_name, envp); /* And clean up the donkey */ if(!DIRTY_MODE) remove(name); } else { fputs("unable to get a tempfile for M2-Mesoplanet output\n", stderr); exit(EXIT_FAILURE); } } return EXIT_SUCCESS; }
/* -*- c-file-style: "linux";indent-tabs-mode:t -*- */ /* Copyright (C) 2017 Jeremiah Orians * This file is part of mescc-tools. * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #include <sys/utsname.h> int match(char* a, char* b); #define TRUE 1 //CONSTANT TRUE 1 #define FALSE 0 //CONSTANT FALSE 0 /* Standard C main program */ int main(int argc, char **argv) { int exact = FALSE; int override = FALSE; char* override_string; int option_index = 1; struct utsname* unameData = calloc(1, sizeof(struct utsname)); uname(unameData); while(option_index <= argc) { if(NULL == argv[option_index]) { option_index = option_index + 1; } else if(match(argv[option_index], "--exact")) { exact = TRUE; option_index = option_index + 1; } else if(match(argv[option_index], "--override")) { override = TRUE; if((option_index + 1) < argc) { override_string = argv[option_index + 1]; option_index = option_index + 2; } else { fputs("--override requires an actual override string\n", stderr); exit(EXIT_FAILURE); } } else if(match(argv[option_index], "--os") || match(argv[option_index], "--OS")) { if(override) fputs(override_string, stdout); else fputs(unameData->sysname, stdout); fputc('\n', stdout); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "--blood")) { if(override) fputs(override_string, stdout); else if(match("aarch64", unameData->machine) || match("amd64", unameData->machine) || match("ppc64le", unameData->machine) || match("riscv64", unameData->machine) || match("x86_64", unameData->machine)) fputs("--64", stdout); fputc('\n', stdout); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "--endian")) { if(override) fputs(override_string, stdout); else if(match("aarch64", unameData->machine) || match("amd64", unameData->machine) || match("ppc64le", unameData->machine) || match("riscv64", unameData->machine) || match("x86_64", unameData->machine) || match("i386", unameData->machine) || match("i486", unameData->machine) || match("i586", unameData->machine) || match("i686", unameData->machine) || match("i686-pae", unameData->machine))fputs("--little-endian", stdout); else fputs("--big-endian", stdout); fputc('\n', stdout); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "--hex2")) { if(override) fputs(override_string, stdout); else if(match("aarch64", unameData->machine)) fputs("0x400000", stdout); else if(match("armv7l", unameData->machine)) fputs("0x10000", stdout); else if(match("amd64", unameData->machine) || match("x86_64", unameData->machine)) fputs("0x600000", stdout); else if(match("ppc64le", unameData->machine)) fputs("0x10000", stdout); else if(match("riscv64", unameData->machine)) fputs("0x600000", stdout); else if(match("i386", unameData->machine) || match("i486", unameData->machine) || match("i586", unameData->machine) || match("i686", unameData->machine) || match("i686-pae", unameData->machine)) fputs("0x08048000", stdout); else fputs("0x0", stdout); fputc('\n', stdout); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "-V") || match(argv[option_index], "--version")) { fputs("get_machine 1.5.0\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[option_index], "-h") || match(argv[option_index], "--help")) { fputs("If you want exact architecture use --exact\n", stderr); fputs("If you want to know the Operating system use --os\n", stderr); fputs("If you wish to override the output to anything you want use --override\n", stderr); exit(EXIT_SUCCESS); } else { fputs("Unknown option\n", stderr); exit(EXIT_FAILURE); } } if(override) fputs(override_string, stdout); else if(!exact) { if(match("i386", unameData->machine) || match("i486", unameData->machine) || match("i586", unameData->machine) || match("i686", unameData->machine) || match("i686-pae", unameData->machine)) fputs("x86", stdout); else if(match("x86_64", unameData->machine)) fputs("amd64", stdout); else fputs(unameData->machine, stdout); } else fputs(unameData->machine, stdout); fputs("\n", stdout); return EXIT_SUCCESS; }
#!/usr/bin/env bash ## Copyright (C) 2017 Jeremiah Orians ## This file is part of mescc-tools. ## ## mescc-tools is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## mescc-tools is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. ## You need to set the following environmental variables to build the programs: ## ARCH="${ARCH:-x86}" ## M2LIBC="${M2libc:-./M2libc}" ## TOOLS="${TOOLS:-../bin}" ## BINDIR="${BINDIR:-../bin}" set -ex M2LIBC_PATH=${M2LIBC} PATH=${BINDIR} alias CC="${TOOLS}/M2-Mesoplanet${EXE_SUFFIX} --operating-system ${OPERATING_SYSTEM} --architecture ${ARCH} -f" CC sha256sum.c -o ${BINDIR}/sha256sum${EXE_SUFFIX} CC match.c -o ${BINDIR}/match${EXE_SUFFIX} CC mkdir.c -o ${BINDIR}/mkdir${EXE_SUFFIX} CC untar.c -o ${BINDIR}/untar${EXE_SUFFIX} CC ungz.c -o ${BINDIR}/ungz${EXE_SUFFIX} CC unbz2.c -o ${BINDIR}/unbz2${EXE_SUFFIX} CC unxz.c -o ${BINDIR}/unxz${EXE_SUFFIX} CC catm.c -o ${BINDIR}/catm${EXE_SUFFIX} CC cp.c -o ${BINDIR}/cp${EXE_SUFFIX} CC chmod.c -o ${BINDIR}/chmod${EXE_SUFFIX} CC rm.c -o ${BINDIR}/rm${EXE_SUFFIX} CC replace.c -o ${BINDIR}/replace${EXE_SUFFIX} CC wrap.c -o ${BINDIR}/wrap${EXE_SUFFIX}
/* Copyright (C) 2021 Bastian Bittorf <bb@npl.de> * Copyright (C) 2021 Alain Mosnier <alain@wanamoon.net> * Copyright (C) 2017-2021 Jan Venekamp * Copyright (C) 2021 Jeremiah Orians * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdio.h> #include <stdlib.h> #include "M2libc/bootstrappable.h" #define CHUNK_SIZE 64 #define TOTAL_LEN_LEN 8 int mask; /* * Initialize array of round constants: * (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311): */ unsigned* init_k() { unsigned* k = calloc(65, sizeof(unsigned)); k[0] = 0x428a2f98; k[1] = 0x71374491; k[2] = 0xb5c0fbcf; k[3] = 0xe9b5dba5; k[4] = 0x3956c25b; k[5] = 0x59f111f1; k[6] = 0x923f82a4; k[7] = 0xab1c5ed5; k[8] = 0xd807aa98; k[9] = 0x12835b01; k[10] = 0x243185be; k[11] = 0x550c7dc3; k[12] = 0x72be5d74; k[13] = 0x80deb1fe; k[14] = 0x9bdc06a7; k[15] = 0xc19bf174; k[16] = 0xe49b69c1; k[17] = 0xefbe4786; k[18] = 0x0fc19dc6; k[19] = 0x240ca1cc; k[20] = 0x2de92c6f; k[21] = 0x4a7484aa; k[22] = 0x5cb0a9dc; k[23] = 0x76f988da; k[24] = 0x983e5152; k[25] = 0xa831c66d; k[26] = 0xb00327c8; k[27] = 0xbf597fc7; k[28] = 0xc6e00bf3; k[29] = 0xd5a79147; k[30] = 0x06ca6351; k[31] = 0x14292967; k[32] = 0x27b70a85; k[33] = 0x2e1b2138; k[34] = 0x4d2c6dfc; k[35] = 0x53380d13; k[36] = 0x650a7354; k[37] = 0x766a0abb; k[38] = 0x81c2c92e; k[39] = 0x92722c85; k[40] = 0xa2bfe8a1; k[41] = 0xa81a664b; k[42] = 0xc24b8b70; k[43] = 0xc76c51a3; k[44] = 0xd192e819; k[45] = 0xd6990624; k[46] = 0xf40e3585; k[47] = 0x106aa070; k[48] = 0x19a4c116; k[49] = 0x1e376c08; k[50] = 0x2748774c; k[51] = 0x34b0bcb5; k[52] = 0x391c0cb3; k[53] = 0x4ed8aa4a; k[54] = 0x5b9cca4f; k[55] = 0x682e6ff3; k[56] = 0x748f82ee; k[57] = 0x78a5636f; k[58] = 0x84c87814; k[59] = 0x8cc70208; k[60] = 0x90befffa; k[61] = 0xa4506ceb; k[62] = 0xbef9a3f7; k[63] = 0xc67178f2; return k; } unsigned* init_h() { unsigned* h = calloc(9, sizeof(unsigned)); h[0] = 0x6a09e667; h[1] = 0xbb67ae85; h[2] = 0x3c6ef372; h[3] = 0xa54ff53a; h[4] = 0x510e527f; h[5] = 0x9b05688c; h[6] = 0x1f83d9ab; h[7] = 0x5be0cd19; return h; } struct buffer_state { char* p; size_t len; size_t total_len; int single_one_delivered; /* bool */ int total_len_delivered; /* bool */ }; unsigned right_rot(unsigned value, unsigned count) { /* * Defined behaviour in standard C for all count where 0 < count < 32, * which is what we need here. */ value &= mask; int hold1 = (value >> count) & mask; int hold2 = (value << (32 - count)) & mask; int hold = (hold1 | hold2) & mask; return hold; } void init_buf_state(struct buffer_state * state, char* input, size_t len) { state->p = input; state->len = len; state->total_len = len; state->single_one_delivered = 0; state->total_len_delivered = 0; } /* Return value: bool */ int calc_chunk(char* chunk, struct buffer_state * state) { size_t space_in_chunk; if(state->total_len_delivered) { return 0; } if(state->len >= CHUNK_SIZE) { memcpy(chunk, state->p, CHUNK_SIZE); state->p += CHUNK_SIZE; state->len -= CHUNK_SIZE; return 1; } memcpy(chunk, state->p, state->len); chunk += state->len; space_in_chunk = CHUNK_SIZE - state->len; state->p += state->len; state->len = 0; /* If we are here, space_in_chunk is one at minimum. */ if(!state->single_one_delivered) { chunk[0] = 0x80; chunk += 1; space_in_chunk -= 1; state->single_one_delivered = 1; } /* * Now: * - either there is enough space left for the total length, and we can conclude, * - or there is too little space left, and we have to pad the rest of this chunk with zeroes. * In the latter case, we will conclude at the next invocation of this function. */ if(space_in_chunk >= TOTAL_LEN_LEN) { size_t left = space_in_chunk - TOTAL_LEN_LEN; size_t len = state->total_len; int i; memset(chunk, 0x00, left); chunk += left; /* Storing of len * 8 as a big endian 64-bit without overflow. */ chunk[7] = (len << 3); len >>= 5; for(i = 6; i >= 0; i -= 1) { chunk[i] = len; len >>= 8; } state->total_len_delivered = 1; } else { memset(chunk, 0x00, space_in_chunk); } return 1; } /* * Limitations: * - Since input is a pointer in RAM, the data to hash should be in RAM, which could be a problem * for large data sizes. * - SHA algorithms theoretically operate on bit strings. However, this implementation has no support * for bit string lengths that are not multiples of eight, and it really operates on arrays of bytes. * In particular, the len parameter is a number of bytes. */ void calc_sha_256(char* hash, char* input, size_t len) { /* * Note 1: All integers (expect indexes) are 32-bit unsigned integers and addition is calculated modulo 2^32. * Note 2: For each round, there is one round constant k[i] and one entry in the message schedule array w[i], 0 = i = 63 * Note 3: The compression function uses 8 working variables, a through h * Note 4: Big-endian convention is used when expressing the constants in this pseudocode, * and when parsing message block data from bytes to words, for example, * the first word of the input message "abc" after padding is 0x61626380 */ /* * Initialize hash values: * (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19): */ unsigned* k = init_k(); unsigned* h = init_h(); unsigned i; unsigned j; unsigned hold1; unsigned hold2; /* 512-bit chunks is what we will operate on. */ char* chunk = calloc(65, sizeof(char)); struct buffer_state* state = calloc(1, sizeof(struct buffer_state)); init_buf_state(state, input, len); unsigned* ah = calloc(9, sizeof(unsigned)); char *p; unsigned* w = calloc(17, sizeof(unsigned)); unsigned s0; unsigned s1; unsigned ch; unsigned temp1; unsigned temp2; unsigned maj; while(calc_chunk(chunk, state)) { p = chunk; /* Initialize working variables to current hash value: */ for(i = 0; i < 8; i += 1) { ah[i] = h[i]; } /* Compression function main loop: */ for(i = 0; i < 4; i += 1) { /* * The w-array is really w[64], but since we only need * 16 of them at a time, we save stack by calculating * 16 at a time. * * This optimization was not there initially and the * rest of the comments about w[64] are kept in their * initial state. */ /* * create a 64-entry message schedule array w[0..63] of 32-bit words * (The initial values in w[0..63] don't matter, so many implementations zero them here) * copy chunk into first 16 words w[0..15] of the message schedule array */ for(j = 0; j < 16; j += 1) { if(i == 0) { w[j] = ((p[0] & 0xFF) << 24) | ((p[1] & 0xFF) << 16) | ((p[2] & 0xFF) << 8) | (p[3] & 0xFF); p += 4; } else { /* Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: */ hold1 = (j + 1) & 0xf; hold2 = w[hold1]; s0 = right_rot(hold2, 7) ^ right_rot(hold2, 18) ^ ((hold2 & mask) >> 3); hold1 = (j + 14) & 0xf; hold2 = w[hold1]; s1 = right_rot(hold2, 17) ^ right_rot(hold2, 19) ^ ((hold2 & mask) >> 10); w[j] += s0 + w[(j + 9) & 0xf] + s1; } s1 = right_rot(ah[4], 6) ^ right_rot(ah[4], 11) ^ right_rot(ah[4], 25); ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]); temp1 = ah[7] + s1 + ch + k[i << 4 | j] + w[j]; s0 = right_rot(ah[0], 2) ^ right_rot(ah[0], 13) ^ right_rot(ah[0], 22); maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]); temp2 = s0 + maj; ah[7] = ah[6]; ah[6] = ah[5]; ah[5] = ah[4]; ah[4] = ah[3] + temp1; ah[3] = ah[2]; ah[2] = ah[1]; ah[1] = ah[0]; ah[0] = temp1 + temp2; } } /* Add the compressed chunk to the current hash value: */ for(i = 0; i < 8; i += 1) { h[i] += ah[i]; } } /* Produce the final hash value (big-endian): */ i = 0; for(j = 0; i < 8; i += 1) { hash[j] = ((h[i] >> 24) & 0xFF); j += 1; hash[j] = ((h[i] >> 16) & 0xFF); j += 1; hash[j] = ((h[i] >> 8) & 0xFF); j += 1; hash[j] = (h[i] & 0xFF); j += 1; } } struct list { int found; char* name; FILE* f; size_t size; char* buffer; char* hash; struct list* next; }; void bad_checkfile(char* filename) { fputs(filename, stdout); puts(": no properly formatted SHA256 checksum lines found"); } int hex2int(char c, char* filename) { if((c >= '0') && (c <= '9')) return (c - 48); else if((c >= 'a') && (c <= 'f')) return (c - 87); else if ((c >= 'F') && (c <= 'F')) return (c - 55); bad_checkfile(filename); exit(EXIT_FAILURE); } char* hash_to_string(char* a) { char* table = "0123456789abcdef"; char* r = calloc(66, sizeof(char)); int i; int j = 0; int c; for(i = 0; i < 32; i += 1) { c = a[i] & 0xFF; r[j] = table[(c >> 4)]; j += 1; r[j] = table[(c & 0xF)]; j += 1; } return r; } int check_file(char* b, char* filename) { int r = TRUE; size_t i; int hold1; int hold2; FILE* f; char* name = calloc(4097, sizeof(char)); char* hash = calloc(33, sizeof(char)); char* hash2 = calloc(33, sizeof(char)); size_t size; char* buffer; go_again: for(i = 0; i < 32; i += 1) { hold1 = hex2int(b[0], filename); hold2 = hex2int(b[1], filename); hash[i] = (hold1 << 4) + hold2; b += 2; } if((' ' != b[0]) || (' ' != b[1])) { bad_checkfile(filename); exit(EXIT_FAILURE); } b += 2; for(i = 0; i < 4096; i += 1) { if('\n' == b[0]) { name[i] = 0; b += 1; break; } name[i] = b[0]; b += 1; } f = fopen(name, "r"); if(NULL == f) { fputs(name, stdout); puts(": No such file or directory"); exit(EXIT_FAILURE); } else { fseek(f, 0, SEEK_END); size = ftell(f); rewind(f); buffer = calloc(size + 1, sizeof(char)); fread(buffer, sizeof(char), size, f); calc_sha_256(hash2, buffer, size); if(match(hash_to_string(hash), hash_to_string(hash2))) { fputs(name, stdout); puts(": OK"); } else { fputs(name, stdout); fputs(": FAILED\nWanted: ", stdout); fputs(hash_to_string(hash), stdout); fputs("\nReceived: ", stdout); puts(hash_to_string(hash2)); r = FALSE; } } if(0 == b[0]) return r; goto go_again; } /* reverse the linked list */ void reverse(struct list** head) { struct list* prev = NULL; struct list* current = *head; struct list* next = NULL; while (current != NULL) { next = current->next; current->next = prev; prev = current; current = next; } *head = prev; } int main(int argc, char **argv) { struct list* l = NULL; struct list* t = NULL; size_t read; int check = FALSE; int r = TRUE; char* output_file = ""; FILE* output = stdout; mask = (0x7FFFFFFF << 1) | 0x1; int i = 1; while(i <= argc) { if(NULL == argv[i]) { i += 1; } else if(match(argv[i], "-c") || match(argv[i], "--check")) { check = TRUE; i += 1; } else if (match(argv[i], "-o") || match(argv[i], "--output")) { output_file = argv[i + 1]; i += 2; if (output != stdout) { fclose(output); } output = fopen(output_file, "w"); require(output != NULL, "Output file cannot be opened!\n"); } else if(match(argv[i], "-h") || match(argv[i], "--help")) { puts("Usage: sha256sum <file> [--check]"); exit(EXIT_SUCCESS); } else { t = calloc(1, sizeof(struct list)); t->hash = calloc(33, sizeof(char)); t->name = argv[i]; t->f = fopen(t->name, "r"); if(NULL != t->f) { t->found = TRUE; fseek(t->f, 0, SEEK_END); t->size = ftell(t->f); rewind(t->f); t->buffer = calloc(t->size + 1, sizeof(char)); read = fread(t->buffer, sizeof(char), t->size, t->f); } t->next = l; l = t; i += 1; } } reverse(&l); if(check) { while(NULL != l) { if(l->found) { if(!check_file(l->buffer, l->name)) r = FALSE; } else { fputs(l->name, stdout); puts(": No such file or directory"); exit(EXIT_FAILURE); } l = l->next; } } else { while(NULL != l) { if(l->found) { calc_sha_256(l->hash, l->buffer, l->size); fputs(hash_to_string(l->hash), output); fputs(" ", output); fputs(l->name, output); fputc('\n', output); } else { fputs(l->name, output); fputs(": No such file or directory\n", output); exit(EXIT_FAILURE); } l = l->next; } } if (output != stdout) { fclose(output); } if(r) return 0; else return 1; }
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _STRING_H #define _STRING_H #ifdef __M2__ #include <string.c> #else #include <stddef.h> /* String manipulation */ char* strcpy(char* dest, char const* src); char* strncpy(char* dest, char const* src, size_t count); char* strcat(char* dest, char const* src); char* strncat(char* dest, char const* src, size_t count); /* String examination */ size_t strlen(char const* str ); size_t strnlen_s(char const* str, size_t strsz ); int strcmp(char const* lhs, char const* rhs ); int strncmp(char const* lhs, char const* rhs, size_t count); char* strchr(char const* str, int ch); char* strrchr(char const* str, int ch); size_t strspn(char const* dest, char const* src); size_t strcspn(char const* dest, char const* src); char* strpbrk(char const* dest, char const* breakset); /* Memory manipulation */ void* memset(void* dest, int ch, size_t count); void* memcpy(void* dest, void const* src, size_t count); void* memmove(void* dest, void const* src, size_t count); int memcmp(void const* lhs, void const* rhs, size_t count); void* memchr(void const* ptr, int ch, size_t count); char* strstr(const char* haystack, const char* needle); #endif #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_H #define _SYS_STAT_H #ifdef __M2__ #if __uefi__ #include <uefi/sys/stat.c> #elif __i386__ #include <x86/linux/sys/stat.c> #elif __x86_64__ #include <amd64/linux/sys/stat.c> #elif __arm__ #include <armv7l/linux/sys/stat.c> #elif __aarch64__ #include <aarch64/linux/sys/stat.c> #elif __riscv && __riscv_xlen==32 #include <riscv32/linux/sys/stat.c> #elif __riscv && __riscv_xlen==64 #include <riscv64/linux/sys/stat.c> #else #error arch not supported #endif #else #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode); int fchmod(int a, mode_t b); int mkdir(char const* a, mode_t b); int mknod(char const* a, mode_t b, dev_t c); mode_t umask(mode_t m); #endif #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_C #define _SYS_STAT_C #include <uefi/uefi.c> #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode) { return 0; } int fchmod(int a, mode_t b) { return 0; } int __open(struct efi_file_protocol* _rootdir, char* name, long mode, long attributes); int mkdir(char const* name, mode_t _mode) { struct efi_file_protocol* new_directory; long mode = EFI_FILE_MODE_CREATE | EFI_FILE_MODE_WRITE | EFI_FILE_MODE_READ; long attributes = EFI_FILE_DIRECTORY; long new_directory = __open(_rootdir, name, mode, attributes); if(new_directory != -1) { _close(new_directory); return 0; } return -1; } int mknod(char const* a, mode_t b, dev_t c) { return -1; } mode_t umask(mode_t m) { return 0; } #endif
/* Copyright (C) 2022 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #define __uefi__ 1 #ifndef _UEFI_C #define _UEFI_C #include <ctype.h> #include <uefi/string_p.h> #define PAGE_SIZE 4096 #define USER_STACK_SIZE 8388608 #define EFI_OPEN_PROTOCOL_BY_HANDLE_PROTOCOL 1 #define EFI_FILE_MODE_READ 1 #define EFI_FILE_MODE_WRITE 2 #define EFI_FILE_MODE_CREATE (1 << 63) #define EFI_FILE_READ_ONLY 1 #define EFI_FILE_DIRECTORY 0x10 #define EFI_LOADER_DATA 2 #define EFI_VARIABLE_BOOTSERVICE_ACCESS 2 #define EFI_SUCCESS 0 #define EFI_LOAD_ERROR (1 << 63) | 1 #define EFI_INVALID_PARAMETER (1 << 63) | 2 #define EFI_UNSUPPORTED (1 << 63) | 3 #define EFI_BUFFER_TOO_SMALL (1 << 63) | 5 #define EFI_NOT_FOUND (1 << 31) | 14 #define __PATH_MAX 4096 #define __ENV_NAME_MAX 4096 #define HARDWARE_DEVICE_PATH 1 #define MEMORY_MAPPED 3 #define END_HARDWARE_DEVICE_PATH 0x7F #define END_ENTIRE_DEVICE_PATH 0xFF #define TPL_APPLICATION 4 #define TPL_CALLBACK 8 #define TPL_NOTIFY 16 #define TPL_HIGH_LEVEL 31 void* _image_handle; void* _root_device; void* __user_stack; int _argc; char** _argv; char** _envp; char* _cwd; char* _root; struct efi_simple_text_output_protocol { void* reset; void* output_string; void* test_string; void* query_mode; void* set_mode; void* set_attribute; void* clear_screen; void* set_cursor; void* enable_cursor; void* mode; }; struct efi_table_header { unsigned signature; unsigned revision_and_header_size; unsigned crc32_and_reserved; }; struct efi_boot_table { struct efi_table_header header; /* Task Priority Services */ void* raise_tpl; void* restore_tpl; /* Memory Services */ void* allocate_pages; void* free_pages; void* get_memory_map; void* allocate_pool; void* free_pool; /* Event & Timer Services */ void* create_event; void* set_timer; void* wait_for_event; void* signal_event; void* close_event; void* check_event; /* Protocol Handler Services */ void* install_protocol_interface; void* reinstall_protocol_interface; void* uninstall_protocol_interface; void* handle_protocol; void* reserved; void* register_protocol_notify; void* locate_handle; void* locate_device_path; void* install_configuration_table; /* Image Services */ void* load_image; void* start_image; void* exit; void* unload_image; void* exit_boot_services; /* Miscellaneous Services */ void* get_next_monotonic_count; void* stall; void* set_watchdog_timer; /* DriverSupport Services */ void* connect_controller; void* disconnect_controller; /* Open and Close Protocol Services */ void* open_protocol; void* close_protocol; void* open_protocol_information; /* Library Services */ void* protocols_per_handle; void* locate_handle_buffer; void* locate_protocol; void* install_multiple_protocol_interfaces; void* uninstall_multiple_protocol_interfaces; /* 32-bit CRC Services */ void* copy_mem; void* set_mem; void* create_event_ex; }; struct efi_runtime_table { struct efi_table_header header; /* Time Services */ void* get_time; void* set_time; void* get_wakeup_time; void* set_wakeup_time; /* Virtual Memory Services */ void* set_virtual_address_map; void* convert_pointer; /* Variable Services */ void* get_variable; void* get_next_variable_name; void* set_variable; /* Miscellaneous Services */ void* get_next_high_monotonic_count; void* reset_system; /* UEFI 2.0 Capsule Services */ void* update_capsule; void* query_capsule_capabilities; /* Miscellaneous UEFI 2.0 Services */ void* query_variable_info; }; struct efi_system_table { struct efi_table_header header; char* firmware_vendor; unsigned firmware_revision; void* console_in_handle; void* con_in; void* console_out_handle; struct efi_simple_text_output_protocol* con_out; void *standard_error_handle; struct efi_simple_text_output_protocol* std_err; struct efi_runtime_table* runtime_services; struct efi_boot_table* boot_services; unsigned number_table_entries; void *configuration_table; }; struct efi_system_table* _system; struct efi_guid { uint32_t data1; uint16_t data2; uint16_t data3; uint8_t data4[8]; }; struct efi_guid EFI_LOADED_IMAGE_PROTOCOL_GUID; struct efi_guid EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID; struct efi_guid EFI_FILE_INFO_GUID; struct efi_guid EFI_SHELL_VARIABLE_GUID; struct efi_loaded_image_protocol { unsigned revision; void* parent; void* system; void* device; void* filepath; void* reserved; /* Image's load options */ unsigned load_options_size; void* load_options; /* Location of the image in memory */ void* image_base; unsigned image_size; unsigned image_code_type; unsigned image_data_type; void* unload; }; struct efi_loaded_image_protocol* _image; struct efi_simple_file_system_protocol { unsigned revision; void* open_volume; }; struct efi_file_protocol { unsigned revision; void* open; void* close; void* delete; void* read; void* write; void* get_position; void* set_position; void* get_info; void* set_info; void* flush; void* open_ex; void* read_ex; void* write_ex; void* flush_ex; }; struct efi_file_protocol* _rootdir; struct efi_time { uint16_t year; uint8_t month; uint8_t day; uint8_t hour; uint8_t minute; uint8_t second; uint8_t pad1; uint32_t nanosecond; uint16_t time_zone; uint8_t daylight; uint8_t pad2; }; struct efi_file_info { unsigned size; unsigned file_size; unsigned physical_size; struct efi_time create_time; struct efi_time last_access_time; struct efi_time modifiction_time; unsigned attribute; char file_name[__PATH_MAX]; }; struct efi_device_path_protocol { uint8_t type; uint8_t subtype; uint16_t length; uint32_t memory_type; unsigned start_address; unsigned end_address; }; unsigned __uefi_1(void*, void*, FUNCTION f) { #ifdef __x86_64__ asm("lea_rcx,[rbp+DWORD] %-8" "mov_rcx,[rcx]" "lea_rax,[rbp+DWORD] %-16" "mov_rax,[rax]" "push_rsp" "push_[rsp]" "and_rsp, %-16" "sub_rsp, %32" "call_rax" "mov_rsp,[rsp+BYTE] %40"); #else #error unsupported arch #endif } unsigned __uefi_2(void*, void*, FUNCTION f) { #ifdef __x86_64__ asm("lea_rcx,[rbp+DWORD] %-8" "mov_rcx,[rcx]" "lea_rdx,[rbp+DWORD] %-16" "mov_rdx,[rdx]" "lea_rax,[rbp+DWORD] %-24" "mov_rax,[rax]" "push_rsp" "push_[rsp]" "and_rsp, %-16" "sub_rsp, %32" "call_rax" "mov_rsp,[rsp+BYTE] %40"); #else #error unsupported arch #endif } unsigned __uefi_3(void*, void*, void*, FUNCTION f) { #ifdef __x86_64__ asm("lea_rcx,[rbp+DWORD] %-8" "mov_rcx,[rcx]" "lea_rdx,[rbp+DWORD] %-16" "mov_rdx,[rdx]" "lea_r8,[rbp+DWORD] %-24" "mov_r8,[r8]" "lea_rax,[rbp+DWORD] %-32" "mov_rax,[rax]" "push_rsp" "push_[rsp]" "and_rsp, %-16" "sub_rsp, %32" "call_rax" "mov_rsp,[rsp+BYTE] %40"); #else #error unsupported arch #endif } unsigned __uefi_4(void*, void*, void*, void*, FUNCTION f) { #ifdef __x86_64__ asm("lea_rcx,[rbp+DWORD] %-8" "mov_rcx,[rcx]" "lea_rdx,[rbp+DWORD] %-16" "mov_rdx,[rdx]" "lea_r8,[rbp+DWORD] %-24" "mov_r8,[r8]" "lea_r9,[rbp+DWORD] %-32" "mov_r9,[r9]" "lea_rax,[rbp+DWORD] %-40" "mov_rax,[rax]" "push_rsp" "push_[rsp]" "and_rsp, %-16" "sub_rsp, %32" "call_rax" "mov_rsp,[rsp+BYTE] %40"); #else #error unsupported arch #endif } unsigned __uefi_5(void*, void*, void*, void*, void*, FUNCTION f) { #ifdef __x86_64__ asm("lea_rcx,[rbp+DWORD] %-8" "mov_rcx,[rcx]" "lea_rdx,[rbp+DWORD] %-16" "mov_rdx,[rdx]" "lea_r8,[rbp+DWORD] %-24" "mov_r8,[r8]" "lea_r9,[rbp+DWORD] %-32" "mov_r9,[r9]" "push_rsp" "push_[rsp]" "and_rsp, %-16" "push_rax" "lea_rax,[rbp+DWORD] %-40" "mov_rax,[rax]" "push_rax" "lea_rax,[rbp+DWORD] %-48" "mov_rax,[rax]" "sub_rsp, %32" "call_rax" "mov_rsp,[rsp+BYTE] %56"); #else #error unsupported arch #endif } unsigned __uefi_6(void*, void*, void*, void*, void*, void*, FUNCTION f) { #ifdef __x86_64__ asm("lea_rcx,[rbp+DWORD] %-8" "mov_rcx,[rcx]" "lea_rdx,[rbp+DWORD] %-16" "mov_rdx,[rdx]" "lea_r8,[rbp+DWORD] %-24" "mov_r8,[r8]" "lea_r9,[rbp+DWORD] %-32" "mov_r9,[r9]" "push_rsp" "push_[rsp]" "and_rsp, %-16" "lea_rax,[rbp+DWORD] %-48" "mov_rax,[rax]" "push_rax" "lea_rax,[rbp+DWORD] %-40" "mov_rax,[rax]" "push_rax" "lea_rax,[rbp+DWORD] %-56" "mov_rax,[rax]" "sub_rsp, %32" "call_rax" "mov_rsp,[rsp+BYTE] %56"); #else #error unsupported arch #endif } unsigned _allocate_pool(unsigned memory_type, unsigned size, void* pool) { return __uefi_3(memory_type, size, pool, _system->boot_services->allocate_pool); } void _free_pool(void* memory) { return __uefi_1(memory, _system->boot_services->free_pool); } unsigned _open_protocol(void* handle, struct efi_guid* protocol, void* agent_handle, void** interface, void* controller_handle, long attributes, FUNCTION open_protocol) { return __uefi_6(handle, protocol, agent_handle, interface, controller_handle, attributes, _system->boot_services->open_protocol); } unsigned _close_protocol(void* handle, struct efi_guid* protocol, void* agent_handle, void* controller_handle) { return __uefi_4(handle, protocol, agent_handle, controller_handle, _system->boot_services->close_protocol); } unsigned _open_volume(struct efi_simple_file_system_protocol* rootfs, struct efi_file_protocol** rootdir) { return __uefi_2(rootfs, rootdir, rootfs->open_volume); } unsigned _close(struct efi_file_protocol* file) { return __uefi_1(file, file->close); } unsigned _get_next_variable_name(unsigned* size, char* name, struct efi_guid* vendor_guid) { return __uefi_3(size, name, vendor_guid, _system->runtime_services->get_next_variable_name); } unsigned _get_variable(char* name, struct efi_guid* vendor_guid, uint32_t* attributes, unsigned* data_size, void* data) { return __uefi_5(name, vendor_guid, attributes, data_size, data, _system->runtime_services->get_variable); } char* _string2wide(char* narrow_string); size_t strlen(char const* str); void free(void* ptr); unsigned _set_variable(char* name, void* data) { char* wide_name = _string2wide(name); char* wide_data = _string2wide(data); unsigned data_size = strlen(data) * 2; uint32_t attributes = EFI_VARIABLE_BOOTSERVICE_ACCESS; unsigned rval = __uefi_5(wide_name, &EFI_SHELL_VARIABLE_GUID, attributes, data_size, wide_data, _system->runtime_services->set_variable); free(wide_name); free(wide_data); return rval; } void exit(unsigned value) { goto FUNCTION__exit; } char* strcat(char* dest, char const* src); char* strcpy(char* dest, char const* src); size_t strlen(char const* str); void* calloc(int count, int size); char* _relative_path_to_absolute(char* narrow_string) { char* absolute_path = calloc(__PATH_MAX, 1); if(narrow_string[0] != '/' && narrow_string[0] != '\\') { strcat(absolute_path, _cwd); if(_cwd[strlen(_cwd) - 1] != '/' && _cwd[strlen(_cwd) - 1] != '\\') { strcat(absolute_path, "/"); } } else { strcat(absolute_path, _root); } strcat(absolute_path, narrow_string); return absolute_path; } char* _posix_path_to_uefi(char* narrow_string) { char* absolute_path = _relative_path_to_absolute(narrow_string); unsigned length = strlen(absolute_path); unsigned in = 0; unsigned out = 0; while(in < length) { if(absolute_path[in] == '/') { absolute_path[out] = '\\'; // Deal with /./ in paths. if((in < (length - 1)) && (absolute_path[in + 1] == '.') && (absolute_path[in + 2] == '/')) { in += 2; } } else { absolute_path[out] = absolute_path[in]; } in += 1; out += 1; } absolute_path[out] = 0; char* wide_string = _string2wide(absolute_path); free(absolute_path); return wide_string; } char* _string2wide(char* narrow_string) { unsigned length = strlen(narrow_string); char* wide_string = calloc(length + 1, 2); unsigned i; for(i = 0; i < length; i += 1) { wide_string[2 * i] = narrow_string[i]; } return wide_string; } int isspace(char _c); void _process_load_options(char* load_options) { /* Determine argc */ _argc = 1; /* command name */ char *i = load_options; unsigned was_space = 0; do { if(isspace(i[0])) { if(!was_space) { _argc += 1; was_space = 1; } } else { was_space = 0; } i += 1; } while(i[0] != 0); /* Collect argv */ _argv = calloc(_argc + 1, sizeof(char*)); i = load_options; unsigned j; for(j = 0; j < _argc; j += 1) { _argv[j] = i; do { i += 1; } while(!isspace(i[0]) && i[0] != 0); i[0] = 0; do { i += 1; } while(isspace(i[0])); } } /* Function to find the length of a char**; an array of strings */ unsigned _array_length(char** array) { unsigned length = 0; while(array[length] != NULL) { length += 1; } return length; } size_t wcstombs(char* dest, char* src, size_t n); char* _get_environmental_variable(struct efi_guid* vendor_guid, char* name, unsigned size) { unsigned data_size; char* data; char* variable_data; char* envp_line = NULL; /* Call with data=NULL to obtain data size that we need to allocate */ _get_variable(name, vendor_guid, NULL, &data_size, NULL); data = calloc(data_size + 1, 1); _get_variable(name, vendor_guid, NULL, &data_size, data); variable_data = calloc((data_size / 2) + 1, 1); wcstombs(variable_data, data, (data_size / 2) + 1); envp_line = calloc((size / 2) + (data_size / 2) + 1, 1); wcstombs(envp_line, name, size / 2); strcat(envp_line, "="); strcat(envp_line, variable_data); free(data); free(variable_data); return envp_line; } int memcmp(void const* lhs, void const* rhs, size_t count); char** _get_environmental_variables(char** envp) { EFI_SHELL_VARIABLE_GUID.data1 = 0x158def5a; EFI_SHELL_VARIABLE_GUID.data2 = 0xf656; EFI_SHELL_VARIABLE_GUID.data3 = 0x419c; EFI_SHELL_VARIABLE_GUID.data4[0] = 0xb0; EFI_SHELL_VARIABLE_GUID.data4[1] = 0x27; EFI_SHELL_VARIABLE_GUID.data4[2] = 0x7a; EFI_SHELL_VARIABLE_GUID.data4[3] = 0x31; EFI_SHELL_VARIABLE_GUID.data4[4] = 0x92; EFI_SHELL_VARIABLE_GUID.data4[5] = 0xc0; EFI_SHELL_VARIABLE_GUID.data4[6] = 0x79; EFI_SHELL_VARIABLE_GUID.data4[7] = 0xd2; unsigned size = __ENV_NAME_MAX; unsigned rval; unsigned envc = 0; char* name = calloc(size, 1); struct efi_guid vendor_guid; /* First count the number of environmental variables */ do { size = __ENV_NAME_MAX; rval = _get_next_variable_name(&size, name, &vendor_guid); if(rval == EFI_SUCCESS) { if(memcmp(&vendor_guid, &EFI_SHELL_VARIABLE_GUID, sizeof(struct efi_guid)) == 0) { envc += 1; } } } while(rval == EFI_SUCCESS); /* Now redo the search but this time populate envp array */ envp = calloc(sizeof(char*), envc + 1); name[0] = 0; name[1] = 0; unsigned j = 0; do { size = __ENV_NAME_MAX; rval = _get_next_variable_name(&size, name, &vendor_guid); if(rval == EFI_SUCCESS) { if(memcmp(&vendor_guid, &EFI_SHELL_VARIABLE_GUID, sizeof(struct efi_guid)) == 0) { envp[j] = _get_environmental_variable(&vendor_guid, name, size); j += 1; } } } while(rval == EFI_SUCCESS); envp[j] = 0; free(name); return envp; } void _wipe_environment() { char** envp = _get_environmental_variables(envp); unsigned i = 0; unsigned j; char* name; while(envp[i] != 0) { j = 0; name = envp[i]; while(envp[i][j] != '=') { j += 1; } envp[i][j] = 0; _set_variable(name, ""); i += 1; } free(envp); } int strcmp(char const* lhs, char const* rhs); char* strchr(char const* str, int ch); void _setup_current_working_directory(char** envp) { _cwd = calloc(__PATH_MAX, 1); _root = calloc(__PATH_MAX, 1); unsigned i = 0; unsigned j; unsigned k; char* value; char* match; while(envp[i] != 0) { j = 0; while(envp[i][j] != '=') { j += 1; } envp[i][j] = 0; if(strcmp(envp[i], "root") == 0) { value = envp[i] + j + 1; match = strchr(value, ':'); /* strip uefi device, e.g. fs0: */ if(match != NULL) { value = match + 1; } strcpy(_root, value); k = 0; while(_root[k] != '\0') { if(_root[k] == '\\') { _root[k] = '/'; } k += 1; } } else if(strcmp(envp[i], "cwd") == 0) { value = envp[i] + j + 1; match = strchr(value, ':'); /* strip uefi device, e.g. fs0: */ if(match != NULL) { value = match + 1; } strcpy(_cwd, value); k = 0; while(_cwd[k] != '\0') { if(_cwd[k] == '\\') { _cwd[k] = '/'; } k += 1; } } envp[i][j] = '='; i += 1; } if(strcmp(_cwd, "") == 0) { strcpy(_cwd, "/"); } } void* malloc(unsigned size); void __init_io(); void _init() { /* Allocate user stack, UEFI stack is not big enough for compilers */ __user_stack = malloc(USER_STACK_SIZE) + USER_STACK_SIZE; /* Process command line arguments */ EFI_LOADED_IMAGE_PROTOCOL_GUID.data1 = 0x5b1b31a1; EFI_LOADED_IMAGE_PROTOCOL_GUID.data2 = 0x9562; EFI_LOADED_IMAGE_PROTOCOL_GUID.data3 = 0x11d2; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[0] = 0x8e; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[1] = 0x3f; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[2] = 0; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[3] = 0xa0; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[4] = 0xc9; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[5] = 0x69; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[6] = 0x72; EFI_LOADED_IMAGE_PROTOCOL_GUID.data4[7] = 0x3b; __init_io(); _open_protocol(_image_handle, &EFI_LOADED_IMAGE_PROTOCOL_GUID, &_image, _image_handle, 0, EFI_OPEN_PROTOCOL_BY_HANDLE_PROTOCOL); char* load_options = calloc(_image->load_options_size, 1); wcstombs(load_options, _image->load_options, _image->load_options_size); _process_load_options(load_options); EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data1 = 0x964E5B22; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data2 = 0x6459; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data3 = 0x11d2; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[0] = 0x8e; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[1] = 0x39; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[2] = 0; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[3] = 0xa0; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[4] = 0xc9; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[5] = 0x69; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[6] = 0x72; EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID.data4[7] = 0x3b; _root_device = _image->device; struct efi_simple_file_system_protocol* rootfs; _open_protocol(_root_device, &EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID, &rootfs, _image_handle, 0, EFI_OPEN_PROTOCOL_BY_HANDLE_PROTOCOL); _open_volume(rootfs, &_rootdir); EFI_FILE_INFO_GUID.data1 = 0x09576e92; EFI_FILE_INFO_GUID.data2 = 0x6d3f; EFI_FILE_INFO_GUID.data3 = 0x11d2; EFI_FILE_INFO_GUID.data4[0] = 0x8e; EFI_FILE_INFO_GUID.data4[1] = 0x39; EFI_FILE_INFO_GUID.data4[2] = 0; EFI_FILE_INFO_GUID.data4[3] = 0xa0; EFI_FILE_INFO_GUID.data4[4] = 0xc9; EFI_FILE_INFO_GUID.data4[5] = 0x69; EFI_FILE_INFO_GUID.data4[6] = 0x72; EFI_FILE_INFO_GUID.data4[7] = 0x3b; _envp = _get_environmental_variables(_envp); _setup_current_working_directory(_envp); } void __kill_io(); void* _malloc_release_all(FUNCTION _free); void _cleanup() { __kill_io(); _close(_rootdir); _close_protocol(_root_device, &EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID, _image_handle, 0); _close_protocol(_image_handle, &EFI_LOADED_IMAGE_PROTOCOL_GUID, _image_handle, 0); _malloc_release_all(_free_pool); } void* _malloc_uefi(unsigned size) { void* memory_block; if(_allocate_pool(EFI_LOADER_DATA, size, &memory_block) != EFI_SUCCESS) { return 0; } return memory_block; } #endif
/* Copyright (C) 2022 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _CTYPE_H #define _CTYPE_H #ifdef __M2__ #include <ctype.c> #else #endif #endif
/* Copyright (C) 2022 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ int isspace(char _c) { return _c == ' ' || _c - '\t' < 5; }
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #include <stddef.h> char* strcpy(char* dest, char const* src) { int i = 0; while (0 != src[i]) { dest[i] = src[i]; i = i + 1; } dest[i] = 0; return dest; } char* strncpy(char* dest, char const* src, size_t count) { if(0 == count) return dest; size_t i = 0; while(0 != src[i]) { dest[i] = src[i]; i = i + 1; if(count == i) return dest; } while(i <= count) { dest[i] = 0; i = i + 1; } return dest; } char* strcat(char* dest, char const* src) { int i = 0; int j = 0; while(0 != dest[i]) i = i + 1; while(0 != src[j]) { dest[i] = src[j]; i = i + 1; j = j + 1; } dest[i] = 0; return dest; } char* strncat(char* dest, char const* src, size_t count) { size_t i = 0; size_t j = 0; while(0 != dest[i]) i = i + 1; while(0 != src[j]) { if(count == j) { dest[i] = 0; return dest; } dest[i] = src[j]; i = i + 1; j = j + 1; } dest[i] = 0; return dest; } size_t strlen(char const* str ) { size_t i = 0; while(0 != str[i]) i = i + 1; return i; } size_t strnlen_s(char const* str, size_t strsz ) { size_t i = 0; while(0 != str[i]) { if(strsz == i) return i; i = i + 1; } return i; } int strcmp(char const* lhs, char const* rhs ) { int i = 0; while(0 != lhs[i]) { if(lhs[i] != rhs[i]) return lhs[i] - rhs[i]; i = i + 1; } return lhs[i] - rhs[i]; } int strncmp(char const* lhs, char const* rhs, size_t count) { size_t i = 0; while(count > i) { if(0 == lhs[i]) break; if(lhs[i] != rhs[i]) return lhs[i] - rhs[i]; i = i + 1; } return 0; } char* strchr(char const* str, int ch) { char* p = str; while(ch != p[0]) { if(0 == p[0]) return NULL; p = p + 1; } if(0 == p[0]) return NULL; return p; } char* strrchr(char const* str, int ch) { char* p = str; int i = 0; while(0 != p[i]) i = i + 1; while(ch != p[i]) { if(0 == i) return NULL; i = i - 1; } return (p + i); } size_t strspn(char const* dest, char const* src) { if(0 == dest[0]) return 0; int i = 0; while(NULL != strchr(src, dest[i])) i = i + 1; return i; } size_t strcspn(char const* dest, char const* src) { int i = 0; while(NULL == strchr(src, dest[i])) i = i + 1; return i; } char* strpbrk(char const* dest, char const* breakset) { char* p = dest; char* s; while(0 != p[0]) { s = strchr(breakset, p[0]); if(NULL != s) return strchr(p, s[0]); p = p + 1; } return p; } void* memset(void* dest, int ch, size_t count) { if(NULL == dest) return dest; size_t i = 0; char* s = dest; while(i < count) { s[i] = ch; i = i + 1; } return dest; } void* memcpy(void* dest, void const* src, size_t count) { if(NULL == dest) return dest; if(NULL == src) return NULL; char* s1 = dest; char const* s2 = src; size_t i = 0; while(i < count) { s1[i] = s2[i]; i = i + 1; } return dest; } void* memmove(void* dest, void const* src, size_t count) { if (dest < src) return memcpy (dest, src, count); char *p = dest; char const *q = src; count = count - 1; while (count >= 0) { p[count] = q[count]; count = count - 1; } return dest; } int memcmp(void const* lhs, void const* rhs, size_t count) { if(0 == count) return 0; size_t i = 0; count = count - 1; char const* s1 = lhs; char const* s2 = rhs; while(i < count) { if(s1[i] != s2[i]) break; i = i + 1; } return (s1[i] - s2[i]); }
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_C #define _SYS_STAT_C #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode) { asm("lea_ebx,[esp+DWORD] %8" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %4" "mov_ecx,[ecx]" "mov_eax, %15" "int !0x80"); } int fchmod(int a, mode_t b) { asm("lea_ebx,[esp+DWORD] %8" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %4" "mov_ecx,[ecx]" "mov_eax, %94" "int !0x80"); } int mkdir(char const* a, mode_t b) { asm("lea_ebx,[esp+DWORD] %8" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %4" "mov_ecx,[ecx]" "mov_eax, %39" "int !0x80"); } int mknod(char const* a, mode_t b, dev_t c) { asm("lea_ebx,[esp+DWORD] %12" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %8" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %4" "mov_edx,[edx]" "mov_eax, %14" "int !0x80"); } mode_t umask(mode_t m) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %60" "int !0x80"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_C #define _SYS_STAT_C #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode) { asm("!15 R7 LOADI8_ALWAYS" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int fchmod(int a, mode_t b) { asm("!94 R7 LOADI8_ALWAYS" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int mkdir(char const* a, mode_t b) { asm("!39 R7 LOADI8_ALWAYS" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int mknod(char const* a, mode_t b, dev_t c) { asm("!14 R7 LOADI8_ALWAYS" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } mode_t umask(mode_t m) { asm("!60 R7 LOADI8_ALWAYS" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_C #define _SYS_STAT_C #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode) { asm("SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_TO_0" "SET_X3_FROM_X0" "SET_X0_TO_FCNTL_H_AT_FDCWD" "SET_X8_TO_SYS_FCHMODAT" "SYSCALL"); } int fchmod(int a, mode_t b) { asm("SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_TO_0" "SET_X3_FROM_X0" "SET_X0_TO_FCNTL_H_AT_FDCWD" "SET_X8_TO_SYS_FCHMOD" "SYSCALL"); } int mkdir(char const* a, mode_t b) { asm("SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_TO_0" "SET_X3_FROM_X0" "SET_X0_TO_FCNTL_H_AT_FDCWD" "SET_X8_TO_SYS_MKDIRAT" "SYSCALL"); } int mknod(char const* a, mode_t b, dev_t c) { asm("SET_X0_TO_MINUS_1" "SET_X3_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_MKNOD" "SYSCALL"); } mode_t umask(mode_t m) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_UMASK" "SYSCALL"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_C #define _SYS_STAT_C #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-4 lw" "rd_a2 rs1_fp !-8 lw" "rd_a7 !53 addi" "ecall"); } int fchmod(int a, mode_t b) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-4 lw" "rd_a2 rs1_fp !-8 lw" "rd_a7 !52 addi" "ecall"); } int mkdir(char const* a, mode_t b) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-4 lw" "rd_a2 rs1_fp !-8 lw" "rd_a7 !34 addi" "ecall"); } int mknod(char const* a, mode_t b, dev_t c) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-4 lw" "rd_a2 rs1_fp !-8 lw" "rd_a3 rs1_fp !-12 lw" "rd_a7 !33 addi" "ecall"); } mode_t umask(mode_t m) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !166 addi" "ecall"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SYS_STAT_C #define _SYS_STAT_C #include <sys/types.h> #define S_IRWXU 00700 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_ISUID 04000 #define S_ISGID 02000 #define S_IXGRP 00010 #define S_IXOTH 00001 #define S_IRGRP 00040 #define S_IROTH 00004 #define S_IWGRP 00020 #define S_IWOTH 00002 #define S_IRWXG 00070 #define S_IRWXO 00007 int chmod(char *pathname, int mode) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-8 ld" "rd_a2 rs1_fp !-16 ld" "rd_a7 !53 addi" "ecall"); } int fchmod(int a, mode_t b) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-8 ld" "rd_a2 rs1_fp !-16 ld" "rd_a7 !52 addi" "ecall"); } int mkdir(char const* a, mode_t b) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-8 ld" "rd_a2 rs1_fp !-16 ld" "rd_a7 !34 addi" "ecall"); } int mknod(char const* a, mode_t b, dev_t c) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-8 ld" "rd_a2 rs1_fp !-16 ld" "rd_a3 rs1_fp !-24 ld" "rd_a7 !33 addi" "ecall"); } mode_t umask(mode_t m) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !166 addi" "ecall"); } #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _FCNTL_H #define _FCNTL_H #ifdef __M2__ #include <sys/types.h> #include <stddef.h> #include <fcntl.c> #else #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 extern int open(char* name, int flag, int mode); #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 #include <uefi/uefi.c> void free(void* l); int __open(struct efi_file_protocol* _rootdir, char* name, long mode, long attributes) { struct efi_file_protocol* new_handle; char* wide_name = _posix_path_to_uefi(name); unsigned rval = __uefi_5(_rootdir, &new_handle, wide_name, mode, attributes, _rootdir->open); free(wide_name); if(rval != EFI_SUCCESS) { return -1; } return new_handle; } void _set_file_size(struct efi_file_protocol* f, unsigned new_size) { /* Preallocate some extra space for file_name */ size_t file_info_size = sizeof(struct efi_file_info); struct efi_file_info* file_info = calloc(1, file_info_size); unsigned r = __uefi_4(f, &EFI_FILE_INFO_GUID, &file_info_size, file_info, f->get_info); if(r != EFI_SUCCESS) { free(file_info); return; } file_info->file_size = new_size; __uefi_4(f, &EFI_FILE_INFO_GUID, file_info_size, file_info, f->set_info); free(file_info); } int _open(char* name, int flag, int mode) { long mode = 0; long attributes = 0; if ((flag == (O_WRONLY | O_CREAT | O_TRUNC)) || (flag == (O_RDWR | O_CREAT | O_EXCL))) { mode = EFI_FILE_MODE_CREATE | EFI_FILE_MODE_WRITE | EFI_FILE_MODE_READ; } else { /* Everything else is a read */ mode = EFI_FILE_MODE_READ; attributes = EFI_FILE_READ_ONLY; } int handle = __open(_rootdir, name, mode, attributes); if (flag & O_TRUNC) { _set_file_size(handle, 0); } return handle; } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { asm("lea_ebx,[esp+DWORD] %12" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %8" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %4" "mov_edx,[edx]" "mov_eax, %5" "int !0x80"); } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!5 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { asm("SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X3_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_TO_FCNTL_H_AT_FDCWD" "SET_X8_TO_SYS_OPENAT" "SYSCALL"); } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-4 lw" "rd_a2 rs1_fp !-8 lw" "rd_a3 rs1_fp !-12 lw" "rd_a7 !56 addi" "ecall"); } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-8 ld" "rd_a2 rs1_fp !-16 ld" "rd_a3 rs1_fp !-24 ld" "rd_a7 !56 addi" "ecall"); } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FCNTL_C #define __FCNTL_C #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { asm("LOAD R0 R14 0" "LOAD R1 R14 4" "LOAD R2 R14 8" "SYS_OPEN"); } #define STDIN_FILENO 0 #define STDOUT_FILENO 1 #define STDERR_FILENO 2 #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 #define O_CREAT 00100 #define O_EXCL 00200 #define O_TRUNC 001000 #define O_APPEND 002000 #define S_IXUSR 00100 #define S_IWUSR 00200 #define S_IRUSR 00400 #define S_IRWXU 00700 int _open(char* name, int flag, int mode) { if((0 != flag) && (0 != mode)) { asm("LOAD R0 R14 0" "FOPEN_WRITE"); return 0x1101; } else { asm("LOAD R0 R14 0" "FOPEN_READ"); return 0x1100; } } #define STDIN_FILENO 0x1100 #define STDOUT_FILENO 0x1101 #define STDERR_FILENO 0
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_H #define _UNISTD_H #include <sys/utsname.h> #ifdef __M2__ #if __uefi__ #include <uefi/unistd.c> #elif __i386__ #include <x86/linux/unistd.c> #elif __x86_64__ #include <amd64/linux/unistd.c> #elif __arm__ #include <armv7l/linux/unistd.c> #elif __aarch64__ #include <aarch64/linux/unistd.c> #elif __riscv && __riscv_xlen==32 #include <riscv32/linux/unistd.c> #elif __riscv && __riscv_xlen==64 #include <riscv64/linux/unistd.c> #else #error arch not supported #endif #else #define NULL 0 #define __PATH_MAX 4096 void* malloc(unsigned size); int access(char* pathname, int mode); int chdir(char* path); int fchdir(int fd); void _exit(int value); int fork(); int waitpid (int pid, int* status_ptr, int options); int execve(char* file_name, char** argv, char** envp); int read(int fd, char* buf, unsigned count); int write(int fd, char* buf, unsigned count); int lseek(int fd, int offset, int whence); int close(int fd); int unlink (char *filename); int _getcwd(char* buf, int size); char* getcwd(char* buf, unsigned size); char* getwd(char* buf); char* get_current_dir_name(); int brk(void *addr); int uname(struct utsname* unameData); int unshare(int flags); int geteuid(); int getegid(); int chroot(char const *path); int mount(char const *source, char const *target, char const *filesystemtype, SCM mountflags, void const *data); #endif #endif
/* Copyright (C) 2022 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_C #define _UNISTD_C #include <uefi/uefi.c> #include <sys/utsname.h> #include <stdio.h> #define NULL 0 #define EOF 0xFFFFFFFF /* For lseek */ #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 void* malloc(unsigned size); size_t strlen(char const* str); char* strncpy(char* dest, char const* src, size_t count); char* strncat(char* dest, char const* src, size_t count); void* memcpy(void* dest, void const* src, size_t count); int open(char* name, int flag, int mode); int close(int fd); int access(char* pathname, int mode) { int fd = open(pathname, 0, 0); if (fd == -1) { return -1; } close(fd); return 0; } int chdir(char* path) { if (access(path, 0) == -1) { return -1; } char* absolute_path = _relative_path_to_absolute(path); strncpy(_cwd, absolute_path, __PATH_MAX); if(_cwd[strlen(_cwd) - 1] != '\\') { strncat(_cwd, "/", __PATH_MAX); } free(absolute_path); return 0; } int fchdir(int fd) { /* TODO: not yet implemented. */ return -1; } int _get_file_size(struct efi_file_protocol* f) { /* Preallocate some extra space for file_name */ size_t file_info_size = sizeof(struct efi_file_info); struct efi_file_info* file_info = calloc(1, file_info_size); unsigned rval = __uefi_4(f, &EFI_FILE_INFO_GUID, &file_info_size, file_info, f->get_info); if(rval != EFI_SUCCESS) { return -1; } int file_size = file_info->file_size; free(file_info); return file_size; } void _set_environment(char** envp) { unsigned i; unsigned j; unsigned length = _array_length(envp); char* name; char* value; for(i = 0; i < length; i += 1) { j = 0; name = envp[i]; while(envp[i][j] != '=') { j += 1; } envp[i][j] = 0; value = envp[i] + j + 1; _set_variable(name, value); envp[i][j] = '='; } } FILE* fopen(char const* filename, char const* mode); size_t fread(void* buffer, size_t size, size_t count, FILE* stream); int fclose(FILE* stream); int spawn(char* file_name, char** argv, char** envp) { FILE* fcmd = fopen(file_name, "r"); if(fcmd == NULL) return -1; long program_size = _get_file_size(fcmd->fd); void* executable = malloc(program_size); size_t count = fread(executable, 1, program_size, fcmd); if(count < program_size) { free(executable); fclose(fcmd); return -1; } fclose(fcmd); struct efi_device_path_protocol* device_path = calloc(2, sizeof(struct efi_device_path_protocol)); device_path->type = HARDWARE_DEVICE_PATH; device_path->subtype = MEMORY_MAPPED; device_path->length = sizeof(struct efi_device_path_protocol); device_path->memory_type = EFI_LOADER_DATA; device_path->start_address = executable; device_path->end_address = executable + program_size; device_path[1].type = END_HARDWARE_DEVICE_PATH; device_path[1].subtype = END_ENTIRE_DEVICE_PATH; device_path[1].length = 4; void* child_ih; unsigned rval = __uefi_6(0, _image_handle, device_path, executable, program_size, &child_ih, _system->boot_services->load_image); free(device_path); free(executable); if(rval != EFI_SUCCESS) return -1; struct efi_loaded_image_protocol* child_image; rval = _open_protocol(child_ih, &EFI_LOADED_IMAGE_PROTOCOL_GUID, &child_image, child_ih, 0, EFI_OPEN_PROTOCOL_BY_HANDLE_PROTOCOL); if(rval != EFI_SUCCESS) return -1; /* Concatenate char** argv array */ unsigned arg_length = -1 ; unsigned i = 0; while(argv[i] != NULL) { arg_length += strlen(argv[i]) + 1; i += 1; } char* load_options = calloc(arg_length + 1, 1); strcpy(load_options, argv[0]); i = 1; while(argv[i] != NULL) { strcat(load_options, " "); strcat(load_options, argv[i]); i += 1; } char* uefi_path = _string2wide(load_options); child_image->load_options = uefi_path; child_image->load_options_size = 2 * arg_length; free(load_options); child_image->device = _image->device; rval = _close_protocol(child_ih, &EFI_LOADED_IMAGE_PROTOCOL_GUID, child_ih, 0); if(rval != EFI_SUCCESS) return -1; /* Setup environment for child process */ _set_environment(envp); _set_variable("cwd", _cwd); _set_variable("root", _root); /* Run command */ rval = __uefi_3(child_ih, 0, 0, _system->boot_services->start_image); free(uefi_path); /* Restore initial environment * For simplicity we just delete all variables and restore them from _envp. * This assumes that _envp is not modified by application, e.g. kaem. */ _wipe_environment(); _set_environment(_envp); return rval; } int fork() { return -1; } int waitpid (int pid, int* status_ptr, int options) { return -1; } int execve(char* file_name, char** argv, char** envp) { return -1; } int read(int fd, char* buf, unsigned count) { struct efi_file_protocol* f = fd; __uefi_3(fd, &count, buf, f->read); return count; } int write(int fd, char* buf, unsigned count) { struct efi_file_protocol* f = fd; unsigned i; char c = 0; /* In UEFI StdErr might not be printing stuff to console, so just use stdout */ if(f == STDOUT_FILENO || f == STDERR_FILENO) { for(i = 0; i < count; i += 1) { c = buf[i]; __uefi_2(_system->con_out, &c, _system->con_out->output_string); if('\n' == c) { c = '\r'; __uefi_2(_system->con_out, &c, _system->con_out->output_string); } } return i; } /* Otherwise write to file */ __uefi_3(f, &count, buf, f->write); return count; } int lseek(int fd, int offset, int whence) { struct efi_file_protocol* f = fd; if(whence == SEEK_SET) { } else if(whence == SEEK_CUR) { unsigned position; __uefi_2(f, &position, f->get_position); offset += position; } else if(whence == SEEK_END) { offset += _get_file_size(fd); } else { return -1; } unsigned rval = __uefi_2(f, offset, f->set_position); if(rval == EFI_SUCCESS) { return offset; } return -1; } int close(int fd) { struct efi_file_protocol* f = fd; unsigned rval = __uefi_1(f, f->close); if(rval != EFI_SUCCESS) { return -1; } return rval; } int unlink(char* filename) { FILE* f = fopen(filename, "w"); struct efi_file_protocol* fd = f->fd; __uefi_1(fd, fd->delete); } char* getcwd(char* buf, unsigned size) { size_t length = strlen(_cwd); if(length >= size) return NULL; strcpy(buf, _cwd); return buf; } char* getwd(char* buf) { return getcwd(buf, __PATH_MAX); } char* get_current_dir_name() { return getcwd(malloc(__PATH_MAX), __PATH_MAX); } int brk(void *addr) { return -1; } int uname(struct utsname* unameData) { memcpy(unameData->sysname, "UEFI", 5); memcpy(unameData->release, "1.0", 4); memcpy(unameData->version, "1.0", 4); #ifdef __x86_64__ memcpy(unameData->machine, "x86_64", 7); #else #error unsupported arch #endif } int unshare(int flags) { if (flags != 0) { return -1; // Any unshare operation is invalid } return 0; } int geteuid(int flags) { return 0; } int getegid(int flags) { return 0; } int chroot(char const *path) { char *newroot = _relative_path_to_absolute(path); free(_root); _root = newroot; if(_root[strlen(_root) - 1] != '\\') { strncat(_root, "/", __PATH_MAX); } return 0; } int mount(char const *source, char const *target, char const *filesystemtype, SCM mountflags, void const *data) { return -1; } #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_C #define _UNISTD_C #include <sys/utsname.h> #define NULL 0 #define __PATH_MAX 4096 void* malloc(unsigned size); int access(char* pathname, int mode) { asm("lea_ebx,[esp+DWORD] %8" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %4" "mov_ecx,[ecx]" "mov_eax, %33" "int !0x80"); } int chdir(char* path) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %12" "int !0x80"); } int fchdir(int fd) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %133" "int !0x80"); } /* Defined in the libc */ void _exit(int value); int fork() { asm("mov_eax, %2" "mov_ebx, %0" "int !0x80"); } int waitpid (int pid, int* status_ptr, int options) { asm("lea_ebx,[esp+DWORD] %12" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %8" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %4" "mov_edx,[edx]" "mov_eax, %7" "int !0x80"); } int execve(char* file_name, char** argv, char** envp) { asm("lea_ebx,[esp+DWORD] %12" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %8" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %4" "mov_edx,[edx]" "mov_eax, %11" "int !0x80"); } int read(int fd, char* buf, unsigned count) { asm("lea_ebx,[esp+DWORD] %12" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %8" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %4" "mov_edx,[edx]" "mov_eax, %3" "int !0x80"); } int write(int fd, char* buf, unsigned count) { asm("lea_ebx,[esp+DWORD] %12" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %8" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %4" "mov_edx,[edx]" "mov_eax, %4" "int !0x80"); } int lseek(int fd, int offset, int whence) { asm("lea_ebx,[esp+DWORD] %12" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %8" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %4" "mov_edx,[edx]" "mov_eax, %19" "int !0x80"); } int close(int fd) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %6" "int !0x80"); } int unlink (char *filename) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %10" "int !0x80"); } int _getcwd(char* buf, int size) { asm("lea_ebx,[esp+DWORD] %8" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %4" "mov_ecx,[ecx]" "mov_eax, %183" "int !0x80"); } char* getcwd(char* buf, unsigned size) { int c = _getcwd(buf, size); if(0 == c) return NULL; return buf; } char* getwd(char* buf) { return getcwd(buf, __PATH_MAX); } char* get_current_dir_name() { return getcwd(malloc(__PATH_MAX), __PATH_MAX); } int brk(void *addr) { asm("mov_eax,[esp+DWORD] %4" "push_eax" "mov_eax, %45" "pop_ebx" "int !0x80"); } int uname(struct utsname* unameData) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %109" "int !0x80"); } int unshare(int flags) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %310" "int !0x80"); } int geteuid() { asm("mov_eax, %201" "int !0x80"); } int getegid() { asm("mov_eax, %202" "int !0x80"); } int mount(char *source, char *target, char *filesystemtype, SCM mountflags, void *data) { asm("lea_ebx,[esp+DWORD] %20" "mov_ebx,[ebx]" "lea_ecx,[esp+DWORD] %16" "mov_ecx,[ecx]" "lea_edx,[esp+DWORD] %12" "mov_edx,[edx]" "lea_esi,[esp+DWORD] %8" "mov_esi,[esi]" "lea_edi,[esp+DWORD] %4" "mov_edi,[edi]" "mov_eax, %21" "int !0x80"); } int chroot(char *path) { asm("lea_ebx,[esp+DWORD] %4" "mov_ebx,[ebx]" "mov_eax, %61" "int !0x80"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_C #define _UNISTD_C #include <sys/utsname.h> #define NULL 0 #define __PATH_MAX 4096 void* malloc(unsigned size); int access(char* pathname, int mode) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!33 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int chdir(char* path) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "!12 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int fchdir(int fd) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "!133 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } void _exit(int value); int fork() { asm("!2 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int waitpid (int pid, int* status_ptr, int options) { asm("!114 R7 LOADI8_ALWAYS" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int execve(char* file_name, char** argv, char** envp) { asm("!11 R7 LOADI8_ALWAYS" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int read(int fd, char* buf, unsigned count) { asm("!3 R7 LOADI8_ALWAYS" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int write(int fd, char* buf, unsigned count) { asm("!4 R7 LOADI8_ALWAYS" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int lseek(int fd, int offset, int whence) { asm("!19 R7 LOADI8_ALWAYS" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int close(int fd) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!6 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int unlink (char* filename) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!10 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int _getcwd(char* buf, int size) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!183 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } char* getcwd(char* buf, unsigned size) { int c = _getcwd(buf, size); if(0 == c) return NULL; return buf; } char* getwd(char* buf) { return getcwd(buf, __PATH_MAX); } char* get_current_dir_name() { return getcwd(malloc(__PATH_MAX), __PATH_MAX); } int brk(void *addr) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "!45 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int uname(struct utsname* unameData) { asm("!122 R7 LOADI8_ALWAYS" "!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "SYSCALL_ALWAYS"); } int unshare(int flags) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" /* because 337 can't fit in 1 byte */ "!0 R7 LOAD32 R15 MEMORY" "~0 JUMP_ALWAYS" "%337" "SYSCALL_ALWAYS"); } int geteuid() { asm("!201 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int getegid() { asm("!202 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int chroot(char const *path) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!61 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } int mount(char const *source, char const *target, char const *filesystemtype, SCM mountflags, void const *data) { asm("!4 R0 SUB R12 ARITH_ALWAYS" "!0 R0 LOAD32 R0 MEMORY" "!8 R1 SUB R12 ARITH_ALWAYS" "!0 R1 LOAD32 R1 MEMORY" "!12 R2 SUB R12 ARITH_ALWAYS" "!0 R2 LOAD32 R2 MEMORY" "!16 R3 SUB R12 ARITH_ALWAYS" "!0 R3 LOAD32 R3 MEMORY" "!20 R4 SUB R12 ARITH_ALWAYS" "!0 R4 LOAD32 R4 MEMORY" "!31 R7 LOADI8_ALWAYS" "SYSCALL_ALWAYS"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * Copyright (C) 2020 deesix <deesix@tuta.io> * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_C #define _UNISTD_C #include <sys/utsname.h> #define NULL 0 #define __PATH_MAX 4096 void* malloc(unsigned size); int access(char* pathname, int mode) { asm("SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_TO_0" "SET_X3_FROM_X0" "SET_X0_TO_FCNTL_H_AT_FDCWD" "SET_X8_TO_SYS_FACCESSAT" "SYSCALL"); } int chdir(char* path) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_CHDIR" "SYSCALL"); } int fchdir(int fd) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_FCHDIR" "SYSCALL"); } void _exit(int value); int fork() { asm("SET_X0_TO_0" "SET_X1_FROM_X0" "SET_X2_FROM_X0" "SET_X3_FROM_X0" "SET_X4_FROM_X0" "SET_X5_FROM_X0" "SET_X6_FROM_X0" "SET_X0_TO_17" "SET_X8_TO_SYS_CLONE" "SYSCALL"); } int waitpid (int pid, int* status_ptr, int options) { asm("SET_X0_TO_MINUS_1" "SET_X3_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_WAIT4" "SYSCALL"); } int execve(char* file_name, char** argv, char** envp) { asm("SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_EXECVE" "SYSCALL"); } int read(int fd, char* buf, unsigned count) { asm("SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_READ" "SYSCALL"); } int write(int fd, char* buf, unsigned count) { asm("SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_WRITE" "SYSCALL"); } int lseek(int fd, int offset, int whence) { asm("SET_X0_TO_MINUS_1" "SET_X3_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_LSEEK" "SYSCALL"); } int close(int fd) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_CLOSE" "SYSCALL"); } int unlink (char* filename) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_UNLINK" "SYSCALL"); } int _getcwd(char* buf, int size) { asm("SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_GETCWD" "SYSCALL"); } char* getcwd(char* buf, unsigned size) { int c = _getcwd(buf, size); if(0 == c) return NULL; return buf; } char* getwd(char* buf) { return getcwd(buf, __PATH_MAX); } char* get_current_dir_name() { return getcwd(malloc(__PATH_MAX), __PATH_MAX); } int brk(void *addr) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_BRK" "SYSCALL"); } int uname(struct utsname* unameData) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_UNAME" "SYSCALL"); } int unshare(int flags) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_UNSHARE" "SYSCALL"); } int geteuid() { asm("SET_X8_TO_SYS_GETEUID" "SYSCALL"); } int getegid() { asm("SET_X8_TO_SYS_GETEGID" "SYSCALL"); } int chroot(char const *path) { asm("SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_CHROOT" "SYSCALL"); } int mount(char const *source, char const *target, char const *filesystemtype, SCM mountflags, void const *data) { asm("SET_X0_FROM_BP" "SUB_X0_40" "DEREF_X0" "SET_X4_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_32" "DEREF_X0" "SET_X3_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_24" "DEREF_X0" "SET_X2_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_16" "DEREF_X0" "SET_X1_FROM_X0" "SET_X0_FROM_BP" "SUB_X0_8" "DEREF_X0" "SET_X8_TO_SYS_MOUNT" "SYSCALL"); } #endif
/* Copyright (C) 2020 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_C #define _UNISTD_C #include <signal.h> #include <sys/utsname.h> #define NULL 0 #define __PATH_MAX 4096 #define P_PID 1 #define WEXITED 4 #define __SI_SWAP_ERRNO_CODE void* malloc(unsigned size); int access(char* pathname, int mode) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-4 lw" "rd_a2 rs1_fp !-8 lw" "rd_a3 addi" /* flags = 0 */ "rd_a7 !48 addi" "ecall"); } int chdir(char* path) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !49 addi" "ecall"); } int fchdir(int fd) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !50 addi" "ecall"); } void _exit(int value); int fork() { asm("rd_a7 !220 addi" "rd_a0 !17 addi" /* SIGCHLD */ "rd_a1 mv" /* Child uses duplicate of parent's stack */ "ecall"); } int waitid(int idtype, int id, struct siginfo_t *infop, int options, void *rusage) { asm("rd_a0 rs1_fp !-4 lw" "rd_a1 rs1_fp !-8 lw" "rd_a2 rs1_fp !-12 lw" "rd_a3 rs1_fp !-16 lw" "rd_a4 rs1_fp !-20 lw" "rd_a7 !95 addi" "ecall"); } void* calloc(int count, int size); void free(void* l); struct siginfo_t *__waitpid_info; int waitpid(int pid, int* status_ptr, int options) { if(NULL == __waitpid_info) __waitpid_info = calloc(1, sizeof(struct siginfo_t)); int r = waitid(P_PID, pid, __waitpid_info, options|WEXITED, NULL); if(__waitpid_info->si_pid != 0) { int sw = 0; if(__waitpid_info->si_code == CLD_EXITED) { sw = (__waitpid_info->si_status & 0xff) << 8; } else if(__waitpid_info->si_code == CLD_KILLED) { sw = __waitpid_info->si_status & 0x7f; } else if(__waitpid_info->si_code == CLD_DUMPED) { sw = (__waitpid_info->si_status & 0x7f) | 0x80; } else if(__waitpid_info->si_code == CLD_CONTINUED) { sw = 0xffff; } else if(__waitpid_info->si_code == CLD_STOPPED || __waitpid_info->si_code == CLD_TRAPPED) { sw = ((__waitpid_info->si_status & 0xff) << 8) + 0x7f; } if(status_ptr != NULL) *status_ptr = sw; } int rval = __waitpid_info->si_pid; if(r < 0) { return r; } return rval; } int execve(char* file_name, char** argv, char** envp) { asm("rd_a0 rs1_fp !-4 lw" "rd_a1 rs1_fp !-8 lw" "rd_a2 rs1_fp !-12 lw" "rd_a7 !221 addi" "ecall"); } int read(int fd, char* buf, unsigned count) { asm("rd_a0 rs1_fp !-4 lw" "rd_a1 rs1_fp !-8 lw" "rd_a2 rs1_fp !-12 lw" "rd_a7 !63 addi" "ecall"); } int write(int fd, char* buf, unsigned count) { asm("rd_a0 rs1_fp !-4 lw" "rd_a1 rs1_fp !-8 lw" "rd_a2 rs1_fp !-12 lw" "rd_a7 !64 addi" "ecall"); } int llseek(int fd, int offset_high, int offset_low, int result, int whence) { asm("rd_a0 rs1_fp !-4 lw" "rd_a1 rs1_fp !-8 lw" "rd_a2 rs1_fp !-12 lw" "rd_a3 rs1_fp !-16 lw" "rd_a4 rs1_fp !-20 lw" "rd_a7 !62 addi" "ecall"); } int lseek(int fd, int offset, int whence) { int result; if(llseek(fd, offset >> 32, offset, &result, whence)) { return -1; } return result; } int close(int fd) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !57 addi" /* close */ "ecall"); } int unlink (char* filename) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-4 lw" "rd_a2 !0 addi" /* No flags */ "rd_a7 !35 addi" /* unlinkat */ "ecall"); } int _getcwd(char* buf, int size) { asm("rd_a0 rs1_fp !-4 lw" "rd_a1 rs1_fp !-8 lw" "rd_a7 !17 addi" "ecall"); } char* getcwd(char* buf, unsigned size) { int c = _getcwd(buf, size); if(0 == c) return NULL; return buf; } char* getwd(char* buf) { return getcwd(buf, __PATH_MAX); } char* get_current_dir_name() { return getcwd(malloc(__PATH_MAX), __PATH_MAX); } int brk(void *addr) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !214 addi" "ecall"); } int uname(struct utsname* unameData) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !160 addi" "ecall"); } int unshare(int flags) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !97 addi" "ecall"); } int geteuid() { asm("rd_a7 !175 addi" "ecall"); } int getegid() { asm("rd_a7 !177 addi" "ecall"); } int mount (char *source, char *target, char *filesystemtype, SCM mountflags, void *data) { asm("rd_a0 rs1_fp !-4 lw" "rd_a1 rs1_fp !-8 lw" "rd_a2 rs1_fp !-12 lw" "rd_a3 rs1_fp !-16 lw" "rd_a4 rs1_fp !-20 lw" "rd_a7 !40 addi" "ecall"); } int chroot(char *path) { asm("rd_a0 rs1_fp !-4 lw" "rd_a7 !51 addi" "ecall"); } #endif
/* Copyright (C) 2021 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _SIGNAL_H #define _SIGNAL_H #define CLD_EXITED 1 /* child has exited */ #define CLD_KILLED 2 /* child was killed */ #define CLD_DUMPED 3 /* child terminated abnormally */ #define CLD_TRAPPED 4 /* traced child has trapped */ #define CLD_STOPPED 5 /* child has stopped */ #define CLD_CONTINUED 6 /* stopped child has continued */ struct siginfo_t { int si_signo; #ifdef __SI_SWAP_ERRNO_CODE int si_code; int si_errno; #else int si_errno; int si_code; #endif int si_pid; int si_uid; int si_status; int si_utime; int si_stime; }; #endif
/* Copyright (C) 2020 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _UNISTD_C #define _UNISTD_C #include <sys/utsname.h> #define NULL 0 #define __PATH_MAX 4096 #define __SI_SWAP_ERRNO_CODE void* malloc(unsigned size); int access(char* pathname, int mode) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-8 ld" "rd_a2 rs1_fp !-16 ld" "rd_a3 addi" /* flags = 0 */ "rd_a7 !48 addi" "ecall"); } int chdir(char* path) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !49 addi" "ecall"); } int fchdir(int fd) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !50 addi" "ecall"); } void _exit(int value); int fork() { asm("rd_a7 !220 addi" "rd_a0 !17 addi" /* SIGCHld */ "rd_a1 mv" /* Child uses duplicate of parent's stack */ "ecall"); } int waitpid (int pid, int* status_ptr, int options) { /* Uses wait4 with struct rusage *ru set to NULL */ asm("rd_a0 rs1_fp !-8 ld" "rd_a1 rs1_fp !-16 ld" "rd_a2 rs1_fp !-24 ld" "rd_a3 addi" "rd_a7 !260 addi" "ecall"); } int execve(char* file_name, char** argv, char** envp) { asm("rd_a0 rs1_fp !-8 ld" "rd_a1 rs1_fp !-16 ld" "rd_a2 rs1_fp !-24 ld" "rd_a7 !221 addi" "ecall"); } int read(int fd, char* buf, unsigned count) { asm("rd_a0 rs1_fp !-8 ld" "rd_a1 rs1_fp !-16 ld" "rd_a2 rs1_fp !-24 ld" "rd_a7 !63 addi" "ecall"); } int write(int fd, char* buf, unsigned count) { asm("rd_a0 rs1_fp !-8 ld" "rd_a1 rs1_fp !-16 ld" "rd_a2 rs1_fp !-24 ld" "rd_a7 !64 addi" "ecall"); } int lseek(int fd, int offset, int whence) { asm("rd_a0 rs1_fp !-8 ld" "rd_a1 rs1_fp !-16 ld" "rd_a2 rs1_fp !-24 ld" "rd_a7 !62 addi" "ecall"); } int close(int fd) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !57 addi" /* close */ "ecall"); } int unlink (char* filename) { asm("rd_a0 !-100 addi" /* AT_FDCWD */ "rd_a1 rs1_fp !-8 ld" "rd_a2 !0 addi" /* No flags */ "rd_a7 !35 addi" /* unlinkat */ "ecall"); } int _getcwd(char* buf, int size) { asm("rd_a0 rs1_fp !-8 ld" "rd_a1 rs1_fp !-16 ld" "rd_a7 !17 addi" "ecall"); } char* getcwd(char* buf, unsigned size) { int c = _getcwd(buf, size); if(0 == c) return NULL; return buf; } char* getwd(char* buf) { return getcwd(buf, __PATH_MAX); } char* get_current_dir_name() { return getcwd(malloc(__PATH_MAX), __PATH_MAX); } int brk(void *addr) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !214 addi" "ecall"); } int uname(struct utsname* unameData) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !160 addi" "ecall"); } int unshare(int flags) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !97 addi" "ecall"); } int geteuid() { asm("rd_a7 !175 addi" "ecall"); } int getegid() { asm("rd_a7 !177 addi" "ecall"); } int mount(char *source, char *target, char *filesystemtype, SCM mountflags, void *data) { asm("rd_a0 rs1_fp !-8 ld" "rd_a1 rs1_fp !-16 ld" "rd_a2 rs1_fp !-24 ld" "rd_a3 rs1_fp !-32 ld" "rd_a4 rs1_fp !-40 ld" "rd_a7 !40 addi" "ecall"); } int chroot(char *path) { asm("rd_a0 rs1_fp !-8 ld" "rd_a7 !51 addi" "ecall"); } #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _STDLIB_H #define _STDLIB_H #include <unistd.h> #ifdef __M2__ #include <stdlib.c> #else #define EXIT_FAILURE 1 #define EXIT_SUCCESS 0 extern void exit(int value); extern long _malloc_ptr; extern long _brk_ptr; extern void free(void* l); extern void* malloc(unsigned size); extern void* memset(void* ptr, int value, int num); extern void* calloc(int count, int size); extern char *getenv(const char *name); size_t wcstombs(char* dest, const wchar_t* src, size_t n); #endif #endif
/* Copyright (C) 2016 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _BOOTSTRAPPABLE_H #define _BOOTSTRAPPABLE_H /* Essential common CONSTANTS*/ #define TRUE 1 #define FALSE 0 #ifdef __M2__ #include <bootstrappable.c> #else /* Universally useful functions */ void require(int bool, char* error); int match(char* a, char* b); int in_set(int c, char* s); int strtoint(char *a); char* int2str(int x, int base, int signed_p); #endif #endif
/* Copyright (C) 2021 Andrius Å tikonas * This file is part of mescc-tools-extra * * mescc-tools-extra is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools-extra is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools-extra. If not, see <http://www.gnu.org/licenses/>. */ /******************************************************************************** * "match" can be used to compare strings. It is useful to write conditional * * code in kaem. * * * * Usage: match string1 string2 * * Returns: 0 if strings match * ********************************************************************************/ #include <stdio.h> #include <string.h> #include "M2libc/bootstrappable.h" int main(int argc, char **argv) { /* ensure correct number of arguments */ if(argc != 3) { fputs("match needs exactly 2 arguments.\n", stderr); return 2; } /* deal with badly behaving shells calling */ if(NULL == argv[1]) { fputs("You passed a null string\n", stderr); return 3; } if(NULL == argv[2]) { fputs("You passed a null string\n", stderr); return 3; } return !match(argv[1], argv[2]); }
/* Copyright (C) 2009 Tim Kientzle * Copyright (C) 2021 Jeremiah Orians * Copyright (C) 2021 Andrius Å tikonas * This file is part of mescc-tools-extra * * mescc-tools-extra is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools-extra is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools-extra. If not, see <http://www.gnu.org/licenses/>. */ /******************************************************************************** * "mkdir" can be used to create empty directories. It can also create * * required parent directories. * * * * Usage: mkdir <dir1>/<dir2> <dir3> * * * * These are all highly standard and portable headers. * ********************************************************************************/ #include <stdio.h> #include <string.h> /* This is for mkdir(); this may need to be changed for some platforms. */ #include <sys/stat.h> /* For mkdir() */ #include <stdlib.h> #include "M2libc/bootstrappable.h" #define MAX_STRING 4096 int parents; /* Create a directory, including parent directories as necessary. */ void create_dir(char *pathname, int mode) { char *p; int r; /* Strip trailing '/' */ if(pathname[strlen(pathname) - 1] == '/') { pathname[strlen(pathname) - 1] = '\0'; } /* Try creating the directory. */ r = mkdir(pathname, mode); if((r != 0) && parents) { /* On failure, try creating parent directory. */ p = strrchr(pathname, '/'); if(p != NULL) { p[0] = '\0'; create_dir(pathname, mode); p[0] = '/'; r = mkdir(pathname, mode); } } if((r != 0) && !parents) { fputs("Could not create directory ", stderr); fputs(pathname, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } } int main(int argc, char **argv) { /* This adds some quasi-compatibility with GNU coreutils' mkdir. */ parents = FALSE; int i; int mode = 0755; char* raw_mode = NULL; for(i = 1; argc > i; i = i + 1) { if(match(argv[i], "-p") || match(argv[i], "--parents")) { parents = TRUE; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("mescc-tools-extra mkdir supports --parents and --mode 0750 " "but the last argument always must be the directly to make\n", stdout); return 0; } else if(match(argv[i], "-v") || match(argv[i], "--version")) { fputs("mescc-tools-extra mkdir version 1.3.0\n", stdout); return 0; } else if(match(argv[i], "-m") || match(argv[i], "--mode")) { raw_mode = calloc(MAX_STRING, sizeof(char)); require(raw_mode != NULL, "Memory initialization of mode failed\n"); /* We need to indicate it is octal */ strcat(raw_mode, "0"); strcat(raw_mode, argv[i+1]); mode = strtoint(raw_mode); i = i + 1; } else create_dir(argv[i], mode); } return 0; }
/* Copyright (C) 2009 Tim Kientzle * Copyright (C) 2021 Jeremiah Orians * This file is part of mescc-tools-extra * * mescc-tools-extra is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools-extra is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools-extra. If not, see <http://www.gnu.org/licenses/>. */ /* * "untar" is an extremely simple tar extractor: * * A single C source file, so it should be easy to compile * and run on any system with a C compiler. * * Extremely portable standard C. The only non-ANSI function * used is mkdir(). * * Reads basic ustar tar archives. * * Does not require libarchive or any other special library. * * To compile: cc -o untar untar.c * * Usage: untar <archive> * * In particular, this program should be sufficient to extract the * distribution for libarchive, allowing people to bootstrap * libarchive on systems that do not already have a tar program. * * To unpack libarchive-x.y.z.tar.gz: * * gunzip libarchive-x.y.z.tar.gz * * untar libarchive-x.y.z.tar * * Written by Tim Kientzle, March 2009. * * Released into the public domain. */ /* These are all highly standard and portable headers. */ #include <stdio.h> #include <stdlib.h> #include <string.h> /* This is for mkdir(); this may need to be changed for some platforms. */ #include <sys/stat.h> /* For mkdir() */ #include "M2libc/bootstrappable.h" int FUZZING; int VERBOSE; int STRICT; /* Parse an octal number, ignoring leading and trailing nonsense. */ int parseoct(char const* p, size_t n) { int i = 0; int h; while(((p[0] < '0') || (p[0] > '7')) && (n > 0)) { p = p + 1; n = n - 1; } while((p[0] >= '0') && (p[0] <= '7') && (n > 0)) { i = i << 3; h = p[0]; i = i + h - 48; p = p + 1; n = n - 1; } return i; } /* Returns true if this is 512 zero bytes. */ int is_end_of_archive(char const* p) { int n; for(n = 511; n >= 0; n = n - 1) { if(p[n] != 0) { return FALSE; } } return TRUE; } /* Create a directory, including parent directories as necessary. */ void create_dir(char *pathname, int mode) { char *p; int r; /* Strip trailing '/' */ if(pathname[strlen(pathname) - 1] == '/') { pathname[strlen(pathname) - 1] = '\0'; } /* Try creating the directory. */ if(!FUZZING) { r = mkdir(pathname, mode); if(r != 0) { /* On failure, try creating parent directory. */ p = strrchr(pathname, '/'); if(p != NULL) { p[0] = '\0'; create_dir(pathname, 0755); p[0] = '/'; r = mkdir(pathname, mode); } } if(r != 0) { fputs("Could not create directory ", stderr); fputs(pathname, stderr); fputc('\n', stderr); } } } /* Create a file, including parent directory as necessary. */ FILE* create_file(char *pathname) { if(FUZZING) return NULL; FILE* f; f = fopen(pathname, "w"); if(f == NULL) { /* Try creating parent dir and then creating file. */ char *p = strrchr(pathname, '/'); if(p != NULL) { p[0] = '\0'; create_dir(pathname, 0755); p[0] = '/'; f = fopen(pathname, "w"); } } return f; } /* Verify the tar checksum. */ int verify_checksum(char const* p) { int n; int u = 0; unsigned h; for(n = 0; n < 512; n = n + 1) { /* Standard tar checksum adds unsigned bytes. */ if((n < 148) || (n > 155)) { h = p[n]; u = u + h; } else { u = u + 0x20; } } int r = parseoct(p + 148, 8); return (u == r); } /* Extract a tar archive. */ int untar(FILE *a, char const* path) { char* buff = calloc(514, sizeof(char)); FILE* f = NULL; size_t bytes_read; size_t bytes_written; int filesize; int op; if(VERBOSE) { fputs("Extracting from ", stdout); puts(path); } while(TRUE) { memset(buff, 0, 514); bytes_read = fread(buff, sizeof(char), 512, a); if(bytes_read < 512) { fputs("Short read on ", stderr); fputs(path, stderr); fputs(": expected 512, got ", stderr); fputs(int2str(bytes_read, 10, TRUE), stderr); fputc('\n', stderr); return FALSE; } if(is_end_of_archive(buff)) { if(VERBOSE) { fputs("End of ", stdout); puts(path); } return TRUE; } if(!verify_checksum(buff)) { fputs("Checksum failure\n", stderr); return FALSE; } filesize = parseoct(buff + 124, 12); op = buff[156]; if('1' == op) { if(STRICT) { fputs("unable to create hardlinks\n", stderr); exit(EXIT_FAILURE); } fputs(" Ignoring hardlink ", stdout); puts(buff); } else if('2' == op) { if(STRICT) { fputs("unable to create symlinks\n", stderr); exit(EXIT_FAILURE); } fputs(" Ignoring symlink ", stdout); puts(buff); } else if('3' == op) { if(STRICT) { fputs("unable to create character devices\n", stderr); exit(EXIT_FAILURE); } fputs(" Ignoring character device ", stdout); puts(buff); } else if('4' == op) { if(STRICT) { fputs("unable to create block devices\n", stderr); exit(EXIT_FAILURE); } fputs(" Ignoring block device ", stdout); puts(buff); } else if('5' == op) { if(VERBOSE) { fputs(" Extracting dir ", stdout); puts(buff); } create_dir(buff, parseoct(buff + 100, 8)); filesize = 0; } else if('6' == op) { if(STRICT) { fputs("unable to create FIFO\n", stderr); exit(EXIT_FAILURE); } fputs(" Ignoring FIFO ", stdout); puts(buff); } else { if(VERBOSE) { fputs(" Extracting file ", stdout); puts(buff); } f = create_file(buff); } while(filesize > 0) { bytes_read = fread(buff, 1, 512, a); if(bytes_read < 512) { fputs("Short read on ", stderr); fputs(path, stderr); fputs(": Expected 512, got ", stderr); puts(int2str(bytes_read, 10, TRUE)); return FALSE; } if(filesize < 512) { bytes_read = filesize; } if(f != NULL) { if(!FUZZING) { bytes_written = fwrite(buff, 1, bytes_read, f); if(bytes_written != bytes_read) { fputs("Failed write\n", stderr); fclose(f); f = NULL; } } } filesize = filesize - bytes_read; } if(f != NULL) { fclose(f); f = NULL; } } return TRUE; } struct files_queue { char* name; FILE* f; struct files_queue* next; }; int main(int argc, char **argv) { struct files_queue* list = NULL; struct files_queue* a; STRICT = TRUE; FUZZING = FALSE; int r; int i = 1; while (i < argc) { if(NULL == argv[i]) { i = i + 1; } else if(match(argv[i], "-f") || match(argv[i], "--file")) { a = calloc(1, sizeof(struct files_queue)); require(NULL != a, "failed to allocate enough memory to even get the file name\n"); a->next = list; a->name = argv[i+1]; require(NULL != a->name, "the --file option requires a filename to be given\n"); a->f = fopen(a->name, "r"); if(a->f == NULL) { fputs("Unable to open ", stderr); fputs(a->name, stderr); fputc('\n', stderr); if(STRICT) exit(EXIT_FAILURE); } list = a; i = i + 2; } else if(match(argv[i], "--chaos") || match(argv[i], "--fuzz-mode") || match(argv[i], "--fuzzing")) { FUZZING = TRUE; fputs("fuzz-mode enabled, preparing for chaos\n", stderr); i = i + 1; } else if(match(argv[i], "-v") || match(argv[i], "--verbose")) { VERBOSE = TRUE; i = i + 1; } else if(match(argv[i], "--non-strict") || match(argv[i], "--bad-decisions-mode") || match(argv[i], "--drunk-mode")) { STRICT = FALSE; fputs("non-strict mode enabled, preparing for chaos\n", stderr); i = i + 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" --file $input.gz\n", stderr); fputs("--verbose to print list of extracted files\n", stderr); fputs("--help to get this message\n", stderr); fputs("--fuzz-mode if you wish to fuzz this application safely\n", stderr); fputs("--non-strict if you wish to just ignore files not existing\n", stderr); exit(EXIT_SUCCESS); } else { fputs("Unknown option:", stderr); fputs(argv[i], stderr); fputs("\nAborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } } /* Process the queue one file at a time */ while(NULL != list) { r = untar(list->f, list->name); fputs("The extraction of ", stderr); fputs(list->name, stderr); if(r) fputs(" was successful\n", stderr); else fputs(" produced errors\n", stderr); fclose(list->f); list = list->next; } return 0; }
/* Copyright (C) 2002-2013 Mark Adler, all rights reserved * Copyright (C) 2021 Jeremiah Orians * This file is part of mescc-tools-extra * * mescc-tools-extra is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools-extra is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools-extra. If not, see <http://www.gnu.org/licenses/>. */ /* puff.c * Copyright (C) 2002-2013 Mark Adler, all rights reserved * version 2.3, 21 Jan 2013 * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages * arising from the use of this software. * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * Mark Adler madler@alumni.caltech.edu */ /* ungz.c is a gz file decompression utility that leverages puff.c to provide * the deflate algorithm with multiple modifications to enable being built by * M2-Planet with M2libc. * * * puff.c is a simple inflate written to be an unambiguous way to specify the * deflate format. It is not written for speed but rather simplicity. As a * side benefit, this code might actually be useful when small code is more * important than speed, such as bootstrap applications. For typical deflate * data, zlib's inflate() is about four times as fast as puff(). zlib's * inflate compiles to around 20K on my machine, whereas puff.c compiles to * around 4K on my machine (a PowerPC using GNU cc). If the faster decode() * function here is used, then puff() is only twice as slow as zlib's * inflate(). * * All dynamically allocated memory comes from the stack. The stack required * is less than 2K bytes. This code is compatible with 16-bit int's and * assumes that long's are at least 32 bits. puff.c uses the short data type, * assumed to be 16 bits, for arrays in order to conserve memory. The code * works whether integers are stored big endian or little endian. * * In the comments below are "Format notes" that describe the inflate process * and document some of the less obvious aspects of the format. This source * code is meant to supplement RFC 1951, which formally describes the deflate * format: * * http://www.zlib.org/rfc-deflate.html */ /* * Change history: * * 1.0 10 Feb 2002 - First version * 1.1 17 Feb 2002 - Clarifications of some comments and notes * - Update puff() dest and source pointers on negative * errors to facilitate debugging deflators * - Remove longest from struct huffman -- not needed * - Simplify offs[] index in construct() * - Add input size and checking, using longjmp() to * maintain easy readability * - Use short data type for large arrays * - Use pointers instead of long to specify source and * destination sizes to avoid arbitrary 4 GB limits * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!), * but leave simple version for readabilty * - Make sure invalid distances detected if pointers * are 16 bits * - Fix fixed codes table error * - Provide a scanning mode for determining size of * uncompressed data * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Gailly] * - Add a puff.h file for the interface * - Add braces in puff() for else do [Gailly] * - Use indexes instead of pointers for readability * 1.4 31 Mar 2002 - Simplify construct() code set check * - Fix some comments * - Add FIXLCODES #define * 1.5 6 Apr 2002 - Minor comment fixes * 1.6 7 Aug 2002 - Minor format changes * 1.7 3 Mar 2003 - Added test code for distribution * - Added zlib-like license * 1.8 9 Jan 2004 - Added some comments on no distance codes case * 1.9 21 Feb 2008 - Fix bug on 16-bit integer architectures [Pohland] * - Catch missing end-of-block symbol error * 2.0 25 Jul 2008 - Add #define to permit distance too far back * - Add option in TEST code for puff to write the data * - Add option in TEST code to skip input bytes * - Allow TEST code to read from piped stdin * 2.1 4 Apr 2010 - Avoid variable initialization for happier compilers * - Avoid unsigned comparisons for even happier compilers * 2.2 25 Apr 2010 - Fix bug in variable initializations [Oberhumer] * - Add const where appropriate [Oberhumer] * - Split if's and ?'s for coverage testing * - Break out test code to separate file * - Move NIL to puff.h * - Allow incomplete code only if single code length is 1 * - Add full code coverage test to Makefile * 2.3 21 Jan 2013 - Check for invalid code length codes in dynamic blocks * ?? 22 May 2021 - Convert to M2-Planet C subset for bootstrapping purposes. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include "M2libc/bootstrappable.h" /* * Maximums for allocations and loops. It is not useful to change these -- * they are fixed by the deflate format. */ #define MAXBITS 15 /* maximum bits in a code */ #define MAXLCODES 286 /* maximum number of literal/length codes */ #define MAXDCODES 30 /* maximum number of distance codes */ #define MAXCODES 316 /* maximum codes lengths to read (MAXLCODES+MAXDCODES) */ #define FIXLCODES 288 /* number of fixed literal/length codes */ /* input and output state */ struct state { /* output state */ char *out; /* output buffer */ size_t outlen; /* available space at out */ size_t outcnt; /* bytes written to out so far */ /* input state */ char *in; /* input buffer */ size_t inlen; /* available input at in */ size_t incnt; /* bytes read so far */ int bitbuf; /* bit buffer */ int bitcnt; /* number of bits in bit buffer */ }; /* * Return need bits from the input stream. This always leaves less than * eight bits in the buffer. bits() works properly for need == 0. * * Format notes: * * - Bits are stored in bytes from the least significant bit to the most * significant bit. Therefore bits are dropped from the bottom of the bit * buffer, using shift right, and new bytes are appended to the top of the * bit buffer, using shift left. */ int bits(struct state *s, int need) { long val; /* bit accumulator (can use up to 20 bits) */ long hold; /* load at least need bits into val */ val = s->bitbuf; while (s->bitcnt < need) { if (s->incnt == s->inlen) { fputs("out of input\n", stderr); exit(EXIT_FAILURE); } hold = (s->in[s->incnt] & 0xFF); s->incnt = s->incnt + 1; val = val | (hold << s->bitcnt); /* load eight bits */ s->bitcnt = s->bitcnt + 8; } /* drop need bits and update buffer, always zero to seven bits left */ s->bitbuf = (val >> need); s->bitcnt = s->bitcnt - need; /* return need bits, zeroing the bits above that */ val = (val & ((1 << need) - 1)); #if defined(DEBUG) fputs(int2str(val, 16, FALSE), stderr); fputs(" : bits\n", stderr); #endif return val; } /* * Process a stored block. * * Format notes: * * - After the two-bit stored block type (00), the stored block length and * stored bytes are byte-aligned for fast copying. Therefore any leftover * bits in the byte that has the last bit of the type, as many as seven, are * discarded. The value of the discarded bits are not defined and should not * be checked against any expectation. * * - The second inverted copy of the stored block length does not have to be * checked, but it's probably a good idea to do so anyway. * * - A stored block can have zero length. This is sometimes used to byte-align * subsets of the compressed data for random access or partial recovery. */ int stored(struct state *s) { unsigned len; /* length of stored block */ /* discard leftover bits from current byte (assumes s->bitcnt < 8) */ s->bitbuf = 0; s->bitcnt = 0; /* get length and check against its one's complement */ if ((s->incnt + 4) > s->inlen) return 2; /* not enough input */ len = s->in[s->incnt]; s->incnt = s->incnt + 1; len = len | (s->in[s->incnt] << 8); s->incnt = s->incnt + 1; if(s->in[s->incnt] != (~len & 0xff)) return -2; /* didn't match complement! */ s->incnt = s->incnt + 1; if(s->in[s->incnt] != ((~len >> 8) & 0xff)) return -2; /* didn't match complement! */ s->incnt = s->incnt + 1; /* copy len bytes from in to out */ if ((s->incnt + len) > s->inlen) return 2; /* not enough input */ if (s->out != 0) { if ((s->outcnt + len) > s->outlen) return 1; /* not enough output space */ while (0 != len) { len = len - 1; s->out[s->outcnt] = s->in[s->incnt]; s->outcnt = s->outcnt + 1; s->incnt = s->incnt + 1; } } else { /* just scanning */ s->outcnt = s->outcnt + len; s->incnt = s->incnt + len; } /* done with a valid stored block */ return 0; } /* * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of * each length, which for a canonical code are stepped through in order. * symbol[] are the symbol values in canonical order, where the number of * entries is the sum of the counts in count[]. The decoding process can be * seen in the function decode() below. */ struct huffman { int *count; /* number of symbols of each length */ int *symbol; /* canonically ordered symbols */ }; /* * Decode a code from the stream s using huffman table h. Return the symbol or * a negative value if there is an error. If all of the lengths are zero, i.e. * an empty code, or if the code is incomplete and an invalid code is received, * then -10 is returned after reading MAXBITS bits. * * Format notes: * * - The codes as stored in the compressed data are bit-reversed relative to * a simple integer ordering of codes of the same lengths. Hence below the * bits are pulled from the compressed data one at a time and used to * build the code value reversed from what is in the stream in order to * permit simple integer comparisons for decoding. A table-based decoding * scheme (as used in zlib) does not need to do this reversal. * * - The first code for the shortest length is all zeros. Subsequent codes of * the same length are simply integer increments of the previous code. When * moving up a length, a zero bit is appended to the code. For a complete * code, the last code of the longest length will be all ones. * * - Incomplete codes are handled by this decoder, since they are permitted * in the deflate format. See the format notes for fixed() and dynamic(). */ int decode(struct state *s, struct huffman *h) { int len; /* current number of bits in code */ int code = 0; /* len bits being decoded */ int first = 0; /* first code of length len */ int count; /* number of codes of length len */ int index = 0; /* index of first code of length len in symbol table */ long hold; for (len = 1; len <= MAXBITS; len = len + 1) { hold = bits(s, 1); /* get next bit */ code = code | hold; count = h->count[len]; if ((code - count) < first) { hold = index + (code - first); return h->symbol[hold]; /* if length len, return symbol */ } index = index + count; /* else update for next length */ first = first + count; first = first << 1; code = code << 1; } return -10; /* ran out of codes */ } /* * Given the list of code lengths length[0..n-1] representing a canonical * Huffman code for n symbols, construct the tables required to decode those * codes. Those tables are the number of codes of each length, and the symbols * sorted by length, retaining their original order within each length. The * return value is zero for a complete code set, negative for an over- * subscribed code set, and positive for an incomplete code set. The tables * can be used if the return value is zero or positive, but they cannot be used * if the return value is negative. If the return value is zero, it is not * possible for decode() using that table to return an error--any stream of * enough bits will resolve to a symbol. If the return value is positive, then * it is possible for decode() using that table to return an error for received * codes past the end of the incomplete lengths. * * Not used by decode(), but used for error checking, h->count[0] is the number * of the n symbols not in the code. So n - h->count[0] is the number of * codes. This is useful for checking for incomplete codes that have more than * one symbol, which is an error in a dynamic block. * * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS * This is assured by the construction of the length arrays in dynamic() and * fixed() and is not verified by construct(). * * Format notes: * * - Permitted and expected examples of incomplete codes are one of the fixed * codes and any code with a single symbol which in deflate is coded as one * bit instead of zero bits. See the format notes for fixed() and dynamic(). * * - Within a given code length, the symbols are kept in ascending order for * the code bits definition. */ int construct(struct huffman *h, int *length, int n) { int symbol; /* current symbol when stepping through length[] */ int len; /* current length when stepping through h->count[] */ int left; /* number of possible codes left of current length */ int* offs; /* offsets in symbol table for each length */ offs = calloc(MAXBITS+1, sizeof(int)); long hold; #if defined(DEBUG) int i; fputs(int2str(n, 16, FALSE), stderr); fputs(" : construct 0\n", stderr); for(i = 0; i < n; i = i + 1) { fputs(int2str(length[i], 16, FALSE), stderr); fputs(" : construct 2\n", stderr); } #endif /* count number of codes of each length */ for (len = 0; len <= MAXBITS; len = len + 1) { h->count[len] = 0; } for (symbol = 0; symbol < n; symbol = symbol + 1) { hold = length[symbol]; h->count[hold] = h->count[hold] + 1; /* assumes lengths are within bounds */ } if (h->count[0] == n) return 0; /* no codes! complete, but decode() will fail */ /* check for an over-subscribed or incomplete set of lengths */ left = 1; /* one possible code of zero length */ for (len = 1; len <= MAXBITS; len = len + 1) { left = left << 1; /* one more bit, double codes left */ left = left - h->count[len]; /* deduct count from possible codes */ if (left < 0) return left; /* over-subscribed--return negative */ } /* left > 0 means incomplete */ /* generate offsets into symbol table for each length for sorting */ offs[1] = 0; for (len = 1; len < MAXBITS; len = len + 1) { offs[len + 1] = offs[len] + h->count[len]; } /* * put symbols in table sorted by length, by symbol order within each * length */ for (symbol = 0; symbol < n; symbol = symbol + 1) { if (length[symbol] != 0) { hold = length[symbol]; hold = offs[hold]; h->symbol[hold] = symbol; hold = length[symbol]; offs[hold] = offs[hold] + 1; } } /* return zero for complete set, positive for incomplete set */ return left; } /* * Decode literal/length and distance codes until an end-of-block code. * * Format notes: * * - Compressed data that is after the block type if fixed or after the code * description if dynamic is a combination of literals and length/distance * pairs terminated by and end-of-block code. Literals are simply Huffman * coded bytes. A length/distance pair is a coded length followed by a * coded distance to represent a string that occurs earlier in the * uncompressed data that occurs again at the current location. * * - Literals, lengths, and the end-of-block code are combined into a single * code of up to 286 symbols. They are 256 literals (0..255), 29 length * symbols (257..285), and the end-of-block symbol (256). * * - There are 256 possible lengths (3..258), and so 29 symbols are not enough * to represent all of those. Lengths 3..10 and 258 are in fact represented * by just a length symbol. Lengths 11..257 are represented as a symbol and * some number of extra bits that are added as an integer to the base length * of the length symbol. The number of extra bits is determined by the base * length symbol. These are in the static arrays below, lens[] for the base * lengths and lext[] for the corresponding number of extra bits. * * - The reason that 258 gets its own symbol is that the longest length is used * often in highly redundant files. Note that 258 can also be coded as the * base value 227 plus the maximum extra value of 31. While a good deflate * should never do this, it is not an error, and should be decoded properly. * * - If a length is decoded, including its extra bits if any, then it is * followed a distance code. There are up to 30 distance symbols. Again * there are many more possible distances (1..32768), so extra bits are added * to a base value represented by the symbol. The distances 1..4 get their * own symbol, but the rest require extra bits. The base distances and * corresponding number of extra bits are below in the static arrays dist[] * and dext[]. * * - Literal bytes are simply written to the output. A length/distance pair is * an instruction to copy previously uncompressed bytes to the output. The * copy is from distance bytes back in the output stream, copying for length * bytes. * * - Distances pointing before the beginning of the output data are not * permitted. * * - Overlapped copies, where the length is greater than the distance, are * allowed and common. For example, a distance of one and a length of 258 * simply copies the last byte 258 times. A distance of four and a length of * twelve copies the last four bytes three times. A simple forward copy * ignoring whether the length is greater than the distance or not implements * this correctly. You should not use memcpy() since its behavior is not * defined for overlapped arrays. You should not use memmove() or bcopy() * since though their behavior -is- defined for overlapping arrays, it is * defined to do the wrong thing in this case. */ int* codes_lens() { /* Size base for length codes 257..285 */ int* r = calloc(30, sizeof(int)); r[0] = 3; r[1] = 4; r[2] = 5; r[3] = 6; r[4] = 7; r[5] = 8; r[6] = 9; r[7] = 10; r[8] = 11; r[9] = 13; r[10] = 15; r[11] = 17; r[12] = 19; r[13] = 23; r[14] = 27; r[15] = 31; r[16] = 35; r[17] = 43; r[18] = 51; r[19] = 59; r[20] = 67; r[21] = 83; r[22] = 99; r[23] = 115; r[24] = 131; r[25] = 163; r[26] = 195; r[27] = 227; r[28] = 258; return r; } int* codes_lext() { /* Extra bits for length codes 257..285 */ int* r = calloc(30, sizeof(int)); r[0] = 0; r[1] = 0; r[2] = 0; r[3] = 0; r[4] = 0; r[5] = 0; r[6] = 0; r[7] = 0; r[8] = 1; r[9] = 1; r[10] = 1; r[11] = 1; r[12] = 2; r[13] = 2; r[14] = 2; r[15] = 2; r[16] = 3; r[17] = 3; r[18] = 3; r[19] = 3; r[20] = 4; r[21] = 4; r[22] = 4; r[23] = 4; r[24] = 5; r[25] = 5; r[26] = 5; r[27] = 5; r[28] = 0; return r; } int* codes_dists() { /* Offset base for distance codes 0..29 */ int* r = calloc(31, sizeof(int)); r[0] = 1; r[1] = 2; r[2] = 3; r[3] = 4; r[4] = 5; r[5] = 7; r[6] = 9; r[7] = 13; r[8] = 17; r[9] = 25; r[10] = 33; r[11] = 49; r[12] = 65; r[13] = 97; r[14] = 129; r[15] = 193; r[16] = 257; r[17] = 385; r[18] = 513; r[19] = 769; r[20] = 1025; r[21] = 1537; r[22] = 2049; r[23] = 3073; r[24] = 4097; r[25] = 6145; r[26] = 8193; r[27] = 12289; r[28] = 16385; r[29] = 24577; return r; } int* codes_dext() { /* Extra bits for distance codes 0..29 */ int* r = calloc(31, sizeof(int)); r[0] = 0; r[1] = 0; r[2] = 0; r[3] = 0; r[4] = 1; r[5] = 1; r[6] = 2; r[7] = 2; r[8] = 3; r[9] = 3; r[10] = 4; r[11] = 4; r[12] = 5; r[13] = 5; r[14] = 6; r[15] = 6; r[16] = 7; r[17] = 7; r[18] = 8; r[19] = 8; r[20] = 9; r[21] = 9; r[22] = 10; r[23] = 10; r[24] = 11; r[25] = 11; r[26] = 12; r[27] = 12; r[28] = 13; r[29] = 13; return r; } int codes(struct state *s, struct huffman *lencode, struct huffman *distcode) { int symbol; /* decoded symbol */ int len; /* length for copy */ unsigned dist; /* distance for copy */ int* lens = codes_lens(); int* lext = codes_lext(); int* dists = codes_dists(); int* dext = codes_dext(); /* decode literals and length/distance pairs */ do { symbol = decode(s, lencode); if (symbol < 0) return symbol; /* invalid symbol */ if (symbol < 256) /* literal: symbol is the byte */ { /* write out the literal */ if (s->out != 0) { if (s->outcnt == s->outlen) return 1; s->out[s->outcnt] = symbol; } s->outcnt = s->outcnt + 1; } else if (symbol > 256) /* length */ { /* get and compute length */ symbol = symbol - 257; if (symbol >= 29) return -10; /* invalid fixed code */ len = lens[symbol] + bits(s, lext[symbol]); /* get and check distance */ symbol = decode(s, distcode); if (symbol < 0) return symbol; /* invalid symbol */ dist = dists[symbol] + bits(s, dext[symbol]); if (dist > s->outcnt) return -11; /* distance too far back */ /* copy length bytes from distance bytes back */ if (s->out != 0) { if (s->outcnt + len > s->outlen) return 1; while (0 != len) { len = len - 1; if(dist > s->outcnt) s->out[s->outcnt] = 0; else s->out[s->outcnt] = s->out[s->outcnt - dist]; s->outcnt = s->outcnt + 1; } } else s->outcnt = s->outcnt + len; } } while (symbol != 256); /* end of block symbol */ /* done with a valid fixed or dynamic block */ return 0; } /* * Process a fixed codes block. * * Format notes: * * - This block type can be useful for compressing small amounts of data for * which the size of the code descriptions in a dynamic block exceeds the * benefit of custom codes for that block. For fixed codes, no bits are * spent on code descriptions. Instead the code lengths for literal/length * codes and distance codes are fixed. The specific lengths for each symbol * can be seen in the "for" loops below. * * - The literal/length code is complete, but has two symbols that are invalid * and should result in an error if received. This cannot be implemented * simply as an incomplete code since those two symbols are in the "middle" * of the code. They are eight bits long and the longest literal/length\ * code is nine bits. Therefore the code must be constructed with those * symbols, and the invalid symbols must be detected after decoding. * * - The fixed distance codes also have two invalid symbols that should result * in an error if received. Since all of the distance codes are the same * length, this can be implemented as an incomplete code. Then the invalid * codes are detected while decoding. */ int fixed(struct state *s) { int* lencnt = calloc((MAXBITS + 1), sizeof(int)); int* lensym = calloc(FIXLCODES, sizeof(int)); int* distcnt = calloc((MAXBITS + 1), sizeof(int)); int* distsym = calloc(MAXDCODES, sizeof(int)); struct huffman* lencode = calloc(1, sizeof(struct huffman)); struct huffman* distcode = calloc(1, sizeof(struct huffman)); int hold; /* build fixed huffman tables if first call (may not be thread safe) */ int symbol; int* lengths = calloc(FIXLCODES, sizeof(int)); /* construct lencode and distcode */ lencode->count = lencnt; lencode->symbol = lensym; distcode->count = distcnt; distcode->symbol = distsym; /* literal/length table */ for (symbol = 0; symbol < 144; symbol = symbol + 1) { lengths[symbol] = 8; } while(symbol < 256) { lengths[symbol] = 9; symbol = symbol + 1; } while(symbol < 280) { lengths[symbol] = 7; symbol = symbol + 1; } while(symbol < FIXLCODES) { lengths[symbol] = 8; symbol = symbol + 1; } construct(lencode, lengths, FIXLCODES); /* distance table */ for (symbol = 0; symbol < MAXDCODES; symbol = symbol + 1) { lengths[symbol] = 5; } construct(distcode, lengths, MAXDCODES); /* decode data until end-of-block code */ hold = codes(s, lencode, distcode); return hold; } /* * Process a dynamic codes block. * * Format notes: * * - A dynamic block starts with a description of the literal/length and * distance codes for that block. New dynamic blocks allow the compressor to * rapidly adapt to changing data with new codes optimized for that data. * * - The codes used by the deflate format are "canonical", which means that * the actual bits of the codes are generated in an unambiguous way simply * from the number of bits in each code. Therefore the code descriptions * are simply a list of code lengths for each symbol. * * - The code lengths are stored in order for the symbols, so lengths are * provided for each of the literal/length symbols, and for each of the * distance symbols. * * - If a symbol is not used in the block, this is represented by a zero as * as the code length. This does not mean a zero-length code, but rather * that no code should be created for this symbol. There is no way in the * deflate format to represent a zero-length code. * * - The maximum number of bits in a code is 15, so the possible lengths for * any code are 1..15. * * - The fact that a length of zero is not permitted for a code has an * interesting consequence. Normally if only one symbol is used for a given * code, then in fact that code could be represented with zero bits. However * in deflate, that code has to be at least one bit. So for example, if * only a single distance base symbol appears in a block, then it will be * represented by a single code of length one, in particular one 0 bit. This * is an incomplete code, since if a 1 bit is received, it has no meaning, * and should result in an error. So incomplete distance codes of one symbol * should be permitted, and the receipt of invalid codes should be handled. * * - It is also possible to have a single literal/length code, but that code * must be the end-of-block code, since every dynamic block has one. This * is not the most efficient way to create an empty block (an empty fixed * block is fewer bits), but it is allowed by the format. So incomplete * literal/length codes of one symbol should also be permitted. * * - If there are only literal codes and no lengths, then there are no distance * codes. This is represented by one distance code with zero bits. * * - The list of up to 286 length/literal lengths and up to 30 distance lengths * are themselves compressed using Huffman codes and run-length encoding. In * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means * that length, and the symbols 16, 17, and 18 are run-length instructions. * Each of 16, 17, and 18 are follwed by extra bits to define the length of * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10 * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols * are common, hence the special coding for zero lengths. * * - The symbols for 0..18 are Huffman coded, and so that code must be * described first. This is simply a sequence of up to 19 three-bit values * representing no code (0) or the code length for that symbol (1..7). * * - A dynamic block starts with three fixed-size counts from which is computed * the number of literal/length code lengths, the number of distance code * lengths, and the number of code length code lengths (ok, you come up with * a better name!) in the code descriptions. For the literal/length and * distance codes, lengths after those provided are considered zero, i.e. no * code. The code length code lengths are received in a permuted order (see * the order[] array below) to make a short code length code length list more * likely. As it turns out, very short and very long codes are less likely * to be seen in a dynamic code description, hence what may appear initially * to be a peculiar ordering. * * - Given the number of literal/length code lengths (nlen) and distance code * lengths (ndist), then they are treated as one long list of nlen + ndist * code lengths. Therefore run-length coding can and often does cross the * boundary between the two sets of lengths. * * - So to summarize, the code description at the start of a dynamic block is * three counts for the number of code lengths for the literal/length codes, * the distance codes, and the code length codes. This is followed by the * code length code lengths, three bits each. This is used to construct the * code length code which is used to read the remainder of the lengths. Then * the literal/length code lengths and distance lengths are read as a single * set of lengths using the code length codes. Codes are constructed from * the resulting two sets of lengths, and then finally you can start * decoding actual compressed data in the block. * * - For reference, a "typical" size for the code description in a dynamic * block is around 80 bytes. */ int* dynamic_order() { /* permutation of code length codes */ int* r = calloc(20, sizeof(int)); r[0] = 16; r[1] = 17; r[2] = 18; r[3] = 0; r[4] = 8; r[5] = 7; r[6] = 9; r[7] = 6; r[8] = 10; r[9] = 5; r[10] = 11; r[11] = 4; r[12] = 12; r[13] = 3; r[14] = 13; r[15] = 2; r[16] = 14; r[17] = 1; r[18] = 15; return r; } int dynamic(struct state *s) { #if defined(__M2__) int array = sizeof(int); #else int array = 1; #endif int nlen; int ndist; int ncode; /* number of lengths in descriptor */ int index; /* index of lengths[] */ int err; /* construct() return value */ int* lengths = calloc(MAXCODES, sizeof(int)); /* descriptor code lengths */ int* lencnt = calloc((MAXBITS + 1), sizeof(int)); int* lensym = calloc(MAXLCODES, sizeof(int)); /* lencode memory */ int* distcnt = calloc((MAXBITS + 1), sizeof(int)); int* distsym = calloc(MAXDCODES, sizeof(int)); /* distcode memory */ struct huffman* lencode = calloc(1, sizeof(struct huffman)); struct huffman* distcode = calloc(1, sizeof(struct huffman)); int* order = dynamic_order(); long hold; int* set; /* construct lencode and distcode */ lencode->count = lencnt; lencode->symbol = lensym; distcode->count = distcnt; distcode->symbol = distsym; /* get number of lengths in each table, check lengths */ nlen = bits(s, 5) + 257; ndist = bits(s, 5) + 1; ncode = bits(s, 4) + 4; if (nlen > MAXLCODES) return -3; /* bad counts */ if(ndist > MAXDCODES) return -3; /* bad counts */ /* read code length code lengths (really), missing lengths are zero */ for (index = 0; index < ncode; index = index + 1) { hold = order[index]; lengths[hold] = bits(s, 3); } while(index < 19) { hold = order[index]; lengths[hold] = 0; index = index + 1; } /* build huffman table for code lengths codes (use lencode temporarily) */ err = construct(lencode, lengths, 19); if (err != 0) return -4; /* require complete code set here */ /* read length/literal and distance code length tables */ index = 0; int symbol; /* decoded value */ int len; /* last length to repeat */ while (index < (nlen + ndist)) { symbol = decode(s, lencode); if (symbol < 0) return symbol; /* invalid symbol */ if (symbol < 16) /* length in 0..15 */ { lengths[index] = symbol; index = index + 1; } else /* repeat instruction */ { len = 0; /* assume repeating zeros */ if (symbol == 16) /* repeat last length 3..6 times */ { if (index == 0) return -5; /* no last length! */ len = lengths[index - 1]; /* last length */ symbol = 3 + bits(s, 2); } else if (symbol == 17) symbol = 3 + bits(s, 3); /* repeat zero 3..10 times */ else symbol = 11 + bits(s, 7); /* == 18, repeat zero 11..138 times */ if ((index + symbol) > (nlen + ndist)) return -6; /* too many lengths! */ while(0 != symbol) /* repeat last or zero symbol times */ { lengths[index] = len; index = index + 1; symbol = symbol - 1; } } } /* check for end-of-block code -- there better be one! */ if (lengths[256] == 0) return -9; /* build huffman table for literal/length codes */ err = construct(lencode, lengths, nlen); /* incomplete code ok only for single length 1 code */ if (err < 0) return -7; if((0 != err) && (nlen != (lencode->count[0] + lencode->count[1]))) return -7; /* build huffman table for distance codes */ set = lengths + (nlen * array); err = construct(distcode, set, ndist); /* incomplete code ok only for single length 1 code */ if (err < 0) return -8; if((0 != err) && (ndist != (distcode->count[0] + distcode->count[1]))) return -8; /* decode data until end-of-block code */ hold = codes(s, lencode, distcode); return hold; } /* * Inflate source to dest. On return, destlen and sourcelen are updated to the * size of the uncompressed data and the size of the deflate data respectively. * On success, the return value of puff() is zero. If there is an error in the * source data, i.e. it is not in the deflate format, then a negative value is * returned. If there is not enough input available or there is not enough * output space, then a positive error is returned. In that case, destlen and * sourcelen are not updated to facilitate retrying from the beginning with the * provision of more input data or more output space. In the case of invalid * inflate data (a negative error), the dest and source pointers are updated to * facilitate the debugging of deflators. * * puff() also has a mode to determine the size of the uncompressed output with * no output written. For this dest must be (unsigned char *)0. In this case, * the input value of *destlen is ignored, and on return *destlen is set to the * size of the uncompressed output. * * The return codes are: * * 2: available inflate data did not terminate * 1: output space exhausted before completing inflate * 0: successful inflate * -1: invalid block type (type == 3) * -2: stored block length did not match one's complement * -3: dynamic block code description: too many length or distance codes * -4: dynamic block code description: code lengths codes incomplete * -5: dynamic block code description: repeat lengths with no first length * -6: dynamic block code description: repeat more than specified lengths * -7: dynamic block code description: invalid literal/length code lengths * -8: dynamic block code description: invalid distance code lengths * -9: dynamic block code description: missing end-of-block code * -10: invalid literal/length or distance code in fixed or dynamic block * -11: distance is too far back in fixed or dynamic block * * Format notes: * * - Three bits are read for each block to determine the kind of block and * whether or not it is the last block. Then the block is decoded and the * process repeated if it was not the last block. * * - The leftover bits in the last byte of the deflate data after the last * block (if it was a fixed or dynamic block) are undefined and have no * expected values to check. */ struct puffer { int error; size_t destlen; size_t sourcelen; }; struct puffer* puff(char* dest, size_t destlen, char* source, size_t sourcelen) { struct state* s = calloc(1, sizeof(struct state)); /* input/output state */ int last; int type; /* block information */ int err; /* return value */ /* initialize output state */ s->out = dest; s->outlen = destlen; /* ignored if dest is NIL */ s->outcnt = 0; /* initialize input state */ s->in = source; s->inlen = sourcelen; s->incnt = 0; s->bitbuf = 0; s->bitcnt = 0; /* process blocks until last block or error */ do { last = bits(s, 1); /* one if last block */ type = bits(s, 2); /* block type 0..3 */ if(0 == type) { err = stored(s); } else if(1 == type) { err = fixed(s); } else if(2 == type) { err = dynamic(s); } else err = -1; if (err != 0) break; /* return with error */ } while (!last); /* update the lengths and return */ struct puffer* r = calloc(1, sizeof(struct puffer)); r->error = err; r->destlen = s->outcnt; r->sourcelen = s->incnt; return r; } void write_blob(char* s, int start, int len, FILE* f) { char* table = "0123456789ABCDEF"; if(start > len) return; int i = s[start] & 0xFF; fputc(table[(i >> 4)], f); fputc(table[(i & 0xF)], f); fputc(' ', f); if(start == len) fputc('\n', f); else fputc(' ', f); write_blob(s, start + 1, len, f); } #define FTEXT 0x01 #define FHCRC 0x02 #define FEXTRA 0x04 #define FNAME 0x08 #define FCOMMENT 0x10 struct gz { char* HEADER; int ID; int CM; int FLG; int MTIME; int XFL; int OS; int XLEN; char* FLG_FEXTRA; char* FLG_FNAME; char* FLG_FCOMMENT; int CRC16; char* FLG_FHCRC; char* block; int CRC32; size_t ISIZE; size_t file_size; }; /* Read the input file *name, or stdin if name is NULL, into allocated memory. Reallocate to larger buffers until the entire file is read in. Return a pointer to the allocated data, or NULL if there was a memory allocation failure. *len is the number of bytes of data read from the input file (even if load() returns NULL). If the input file was empty or could not be opened or read, *len is zero. */ struct gz* load(char* name) { struct gz* r = calloc(1, sizeof(struct gz)); char* scratch = calloc(5, sizeof(char)); FILE* f = fopen(name, "r"); int count; int ID1; int ID2; int count1; int count2; int count3; int count4; int c; int i; char* s = calloc(11, sizeof(char)); if(NULL == f) { fputs("unable to open file: ", stderr); fputs(name, stderr); fputs("\nfor reading\n", stderr); return NULL; } fseek(f, 0, SEEK_END); r->file_size = ftell(f); fseek(f, 0, SEEK_SET); count = fread(s, sizeof(char), 10, f); if(10 != count) { fputs("incomplete gzip header\n", stderr); return NULL; } /* Verify header */ r->HEADER = s; #if defined(DEBUG) write_blob(s, 0, 10, stderr); #endif ID1 = (s[0] & 0xFF); ID2 = (s[1] & 0xFF); r->ID = ((ID1 << 8) | ID2); if(0x1f8b != r->ID) { fputs("bad header\n", stderr); return NULL; } /* Verify Compression */ r->CM = (r->HEADER[2] & 0xFF); if(8 != r->CM) { fputs("NOT DEFLATE COMPRESSION\n", stderr); return NULL; } /* Get specials specified in flag bits */ r->FLG = (r->HEADER[3] & 0xFF); if(0 != (FEXTRA & r->FLG)) { count = fread(scratch, sizeof(char), 4, f); count1 = (scratch[0] & 0xFF); count2 = (scratch[1] & 0xFF); count3 = (scratch[2] & 0xFF); count4 = (scratch[3] & 0xFF); count = (count1 << 24) | (count2 << 16) | (count3 << 8) | count4; require(0 < count, "FEXTRA field needs to be a positive number of bytes in size\n"); require(100000000 > count, "we don't support FEXTRA fields greater than 100MB in size\n"); r->FLG_FEXTRA = calloc(count + 1, sizeof(char)); fread(r->FLG_FEXTRA, sizeof(char), count, f); } if(0 != (FNAME & r->FLG)) { r->FLG_FNAME = calloc(r->file_size, sizeof(char)); i = 0; do { c = fgetc(f); require(0 <= c, "received a non-null terminated filename in the file\n"); r->FLG_FNAME[i] = c; i = i + 1; } while(0 != c); } if(0 != (FCOMMENT & r->FLG)) { r->FLG_FCOMMENT = calloc(r->file_size, sizeof(char)); i = 0; do { c = fgetc(f); require(0 <= c, "received a non-null terminated comment in the file\n"); r->FLG_FCOMMENT[i] = c; i = i + 1; } while(0 != c); } if(0 != (FHCRC & r->FLG)) { /* Not implemented */ fputs("FHCRC is not implemented at this time\n", stderr); return NULL; } if(NULL == r->FLG_FNAME) { count = strlen(name) - 3; r->FLG_FNAME = calloc(count + 4, sizeof(char)); i = 0; while(i < count) { r->FLG_FNAME[i] = name[i]; i = i + 1; } } r->block = calloc(r->file_size, sizeof(char)); count = fread(r->block, sizeof(char), r->file_size, f); r->ISIZE = count; fclose(f); return r; } int main(int argc, char **argv) { struct puffer* ret; char* name; char* buffer; char *dest; struct gz* in; FILE* out; int FUZZING = FALSE; /* process arguments */ int i = 1; while (i < argc) { if(NULL == argv[i]) { i = i + 1; } else if(match(argv[i], "-f") || match(argv[i], "--file")) { name = argv[i+1]; require(NULL != name, "the --file option requires a filename to be given\n"); i = i + 2; } else if(match(argv[i], "-o") || match(argv[i], "--output")) { dest = argv[i+1]; require(NULL != dest, "the --output option requires a filename to be given\n"); i = i + 2; } else if(match(argv[i], "--chaos") || match(argv[i], "--fuzz-mode") || match(argv[i], "--fuzzing")) { FUZZING = TRUE; fputs("fuzz-mode enabled, preparing for chaos\n", stderr); i = i + 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" --file $input.gz", stderr); fputs(" [--output $output] (or it'll use the internal filename)\n", stderr); fputs("--help to get this message\n", stderr); fputs("--fuzz-mode if you wish to fuzz this application safely\n", stderr); exit(EXIT_SUCCESS); } else { fputs("Unknown option:", stderr); fputs(argv[i], stderr); fputs("\nAborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } } in = load(name); if (in == NULL) { fputs("memory allocation failure\nDidn't read file\n", stderr); exit(1); } ret = puff(0, 0, in->block, in->ISIZE); if(NULL == dest) { dest = in->FLG_FNAME; } fputs(name, stderr); fputs(" => ", stderr); fputs(dest, stderr); if (0 != ret->error) { fputs("puff() failed with return code ", stderr); fputs(int2str(ret->error, 10, TRUE), stderr); fputc('\n', stderr); exit(3); } else { fputs(": succeeded uncompressing ", stderr); fputs(int2str(ret->destlen, 10, FALSE), stderr); fputs(" bytes\n", stderr); } buffer = malloc(ret->destlen); if (buffer == NULL) { fputs("memory allocation failure\n", stderr); return 4; } ret = puff(buffer, ret->destlen, in->block, in->ISIZE); if(!FUZZING) { out = fopen(dest, "w"); fwrite(buffer, 1, ret->destlen, out); } else { fputs("skipped write to file due to --fuzz-mode flag\n", stderr); } free(buffer); /* clean up */ return 0; }
/* Copyright (C) 2003, 2007 Rob Landley <rob@landley.net> * Copyright (C) 2022 Paul Dersey <pdersey@gmail.com> * This file is part of mescc-tools-extra * * mescc-tools-extra is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools-extra is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools-extra. If not, see <http://www.gnu.org/licenses/>. */ /* bzcat.c - bzip2 decompression * * Copyright 2003, 2007 Rob Landley <rob@landley.net> * * Based on a close reading (but not the actual code) of the original bzip2 * decompression code by Julian R Seward (jseward@acm.org), which also * acknowledges contributions by Mike Burrows, David Wheeler, Peter Fenwick, * Alistair Moffat, Radford Neal, Ian H. Witten, Robert Sedgewick, and * Jon L. Bentley. * * No standard. */ /******************************************************************************** * unbz2.c is a bz2 file decompression utility based on bzcat.c with * * modifications to enable being built by M2-Planet with M2libc. * ********************************************************************************/ #include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> #include "M2libc/bootstrappable.h" // Constants for huffman coding #define MAX_GROUPS 6 #define GROUP_SIZE 50 /* 64 would have been more efficient */ #define MAX_HUFCODE_BITS 20 /* Longest huffman code allowed */ #define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ #define SYMBOL_RUNA 0 #define SYMBOL_RUNB 1 // Other housekeeping constants #define IOBUF_SIZE 4096 // Status return values #define RETVAL_LAST_BLOCK (-100) #define RETVAL_NOT_BZIP_DATA (-1) #define RETVAL_DATA_ERROR (-2) #define RETVAL_OBSOLETE_INPUT (-3) #define INT_MAX 2147483647 // This is what we know about each huffman coding group struct group_data { int *limit; int *base; int *permute; char minLen; char maxLen; }; // Data for burrows wheeler transform struct bwdata { unsigned origPtr; int *byteCount; // State saved when interrupting output int writePos; int writeRun; int writeCount; int writeCurrent; unsigned dataCRC; unsigned headerCRC; unsigned *dbuf; }; // Structure holding all the housekeeping data, including IO buffers and // memory that persists between calls to bunzip struct bunzip_data { // Input stream, input buffer, input bit buffer int in_fd; int inbufCount; int inbufPos; char *inbuf; unsigned inbufBitCount; unsigned inbufBits; // Output buffer char *outbuf; int outbufPos; unsigned totalCRC; // First pass decompression data (Huffman and MTF decoding) char *selectors; // nSelectors=15 bits struct group_data *groups; // huffman coding tables int symTotal; int groupCount; int nSelectors; unsigned *symToByte; unsigned *mtfSymbol; // The CRC values stored in the block header and calculated from the data unsigned *crc32Table; // Second pass decompression data (burrows-wheeler transform) unsigned dbufSize; struct bwdata* bwdata; }; int FUZZING; void crc_init(unsigned *crc_table, int little_endian) { unsigned i; unsigned j; unsigned c; // Init the CRC32 table (big endian) for(i = 0; i < 256; i += 1) { if(little_endian) { c = i; } else { c = i << 24; } for(j = 8; j > 0; j -= 1) { if(little_endian) { if(c & 1) { c = (c >> 1) ^ 0xEDB88320; } else { c = c >> 1; } } else { if(c & 0x80000000) { c = (c << 1) ^ 0x04C11DB7; #if defined(__M2__) // & 0xFFFFFFFF not working if(sizeof(unsigned) == 8) { c <<= 32; c >>= 32; } #endif } else { c = c << 1; } } } crc_table[i] = c; } } // Return the next nnn bits of input. All reads from the compressed input // are done through this function. All reads are big endian. unsigned get_bits(struct bunzip_data *bd, char bits_wanted) { unsigned bits = 0; // If we need to get more data from the byte buffer, do so. (Loop getting // one byte at a time to enforce endianness and avoid unaligned access.) while(bd->inbufBitCount < bits_wanted) { // If we need to read more data from file into byte buffer, do so if(bd->inbufPos == bd->inbufCount) { if(0 >= (bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE))) { exit(1); } bd->inbufPos = 0; } // Avoid 32-bit overflow (dump bit buffer to top of output) if(bd->inbufBitCount >= 24) { bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); bits_wanted = bits_wanted - bd->inbufBitCount; bits = bits << bits_wanted; bd->inbufBitCount = 0; } // Grab next 8 bits of input from buffer. bd->inbufBits = (bd->inbufBits << 8) | (bd->inbuf[bd->inbufPos] & 0xFF); bd->inbufPos = bd->inbufPos + 1; bd->inbufBitCount = bd->inbufBitCount + 8; } // Calculate result bd->inbufBitCount = bd->inbufBitCount - bits_wanted; bits = bits | ((bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1)); return bits; } /* Read block header at start of a new compressed data block. Consists of: * * 48 bits : Block signature, either pi (data block) or e (EOF block). * 32 bits : bw->headerCRC * 1 bit : obsolete feature flag. * 24 bits : origPtr (Burrows-wheeler unwind index, only 20 bits ever used) * 16 bits : Mapping table index. *[16 bits]: symToByte[symTotal] (Mapping table. For each bit set in mapping * table index above, read another 16 bits of mapping table data. * If correspondig bit is unset, all bits in that mapping table * section are 0.) * 3 bits : groupCount (how many huffman tables used to encode, anywhere * from 2 to MAX_GROUPS) * variable: hufGroup[groupCount] (MTF encoded huffman table data.) */ int read_block_header(struct bunzip_data *bd, struct bwdata *bw) { struct group_data *hufGroup; int hh; int ii; int jj; int kk; int symCount; int *base; int *limit; unsigned uc; unsigned *length = calloc(MAX_SYMBOLS, sizeof(unsigned)); unsigned *temp = calloc(MAX_HUFCODE_BITS + 1, sizeof(unsigned)); size_t minLen; size_t maxLen; int pp; #if defined(__M2__) int int_array = sizeof(int); int group_data_array = sizeof(struct group_data); #else int int_array = 1; int group_data_array = 1; #endif size_t hold; // Read in header signature and CRC (which is stored big endian) ii = get_bits(bd, 24); jj = get_bits(bd, 24); bw->headerCRC = get_bits(bd, 32); // Is this the EOF block with CRC for whole file? (Constant is "e") if(ii == 0x177245 && jj == 0x385090) { free(length); free(temp); return RETVAL_LAST_BLOCK; } // Is this a valid data block? (Constant is "pi".) if(ii != 0x314159 || jj != 0x265359) { return RETVAL_NOT_BZIP_DATA; } // We can add support for blockRandomised if anybody complains. if(get_bits(bd, 1)) { return RETVAL_OBSOLETE_INPUT; } if((bw->origPtr = get_bits(bd, 24)) > bd->dbufSize) { return RETVAL_DATA_ERROR; } // mapping table: if some byte values are never used (encoding things // like ascii text), the compression code removes the gaps to have fewer // symbols to deal with, and writes a sparse bitfield indicating which // values were present. We make a translation table to convert the symbols // back to the corresponding bytes. hh = get_bits(bd, 16); bd->symTotal = 0; for(ii = 0; ii < 16; ii += 1) { if(hh & (1 << (15 - ii))) { kk = get_bits(bd, 16); for(jj = 0; jj < 16; jj += 1) { if(kk & (1 << (15 - jj))) { bd->symToByte[bd->symTotal] = (16 * ii) + jj; bd->symTotal += 1; } } } } // How many different huffman coding groups does this block use? bd->groupCount = get_bits(bd, 3); if(bd->groupCount < 2 || bd->groupCount > MAX_GROUPS) { return RETVAL_DATA_ERROR; } // nSelectors: Every GROUP_SIZE many symbols we switch huffman coding // tables. Each group has a selector, which is an index into the huffman // coding table arrays. // // Read in the group selector array, which is stored as MTF encoded // bit runs. (MTF = Move To Front. Every time a symbol occurs its moved // to the front of the table, so it has a shorter encoding next time.) if(!(bd->nSelectors = get_bits(bd, 15))) { return RETVAL_DATA_ERROR; } for(ii = 0; ii < bd->groupCount; ii += 1) { bd->mtfSymbol[ii] = ii; } for(ii = 0; ii < bd->nSelectors; ii += 1) { // Get next value for(jj = 0; get_bits(bd, 1); jj += 1) if(jj >= bd->groupCount) { return RETVAL_DATA_ERROR; } // Decode MTF to get the next selector, and move it to the front. uc = bd->mtfSymbol[jj]; while(jj) { jj = jj - 1; bd->mtfSymbol[jj + 1] = bd->mtfSymbol[jj]; } bd->mtfSymbol[0] = bd->selectors[ii] = uc; } // Read the huffman coding tables for each group, which code for symTotal // literal symbols, plus two run symbols (RUNA, RUNB) symCount = bd->symTotal + 2; for(jj = 0; jj < bd->groupCount; jj += 1) { // Read lengths hh = get_bits(bd, 5); for(ii = 0; ii < symCount; ii += 1) { while(TRUE) { // !hh || hh > MAX_HUFCODE_BITS in one test. if(MAX_HUFCODE_BITS - 1 < hh - 1) { return RETVAL_DATA_ERROR; } // Grab 2 bits instead of 1 (slightly smaller/faster). Stop if // first bit is 0, otherwise second bit says whether to // increment or decrement. kk = get_bits(bd, 2); if(kk & 2) { hh += (1 - ((kk & 1) << 1)); } else { bd->inbufBitCount += 1; break; } } length[ii] = hh; } // Find largest and smallest lengths in this group minLen = maxLen = length[0]; for(ii = 1; ii < symCount; ii += 1) { hold = length[ii]; if(hold > maxLen) { maxLen = hold; } else if(hold < minLen) { minLen = hold; } } /* Calculate permute[], base[], and limit[] tables from length[]. * * permute[] is the lookup table for converting huffman coded symbols * into decoded symbols. It contains symbol values sorted by length. * * base[] is the amount to subtract from the value of a huffman symbol * of a given length when using permute[]. * * limit[] indicates the largest numerical value a symbol with a given * number of bits can have. It lets us know when to stop reading. * * To use these, keep reading bits until value <= limit[bitcount] or * youve read over 20 bits (error). Then the decoded symbol * equals permute[hufcode_value - base[hufcode_bitcount]]. */ hufGroup = bd->groups + (group_data_array * jj); require(minLen > 0, "hufGroup minLen can't have negative values\n"); require(minLen <= MAX_HUFCODE_BITS, "hufGroup minLen can't exceed MAX_HUFCODE_BITS\n"); hufGroup->minLen = minLen; require(maxLen > 0, "hufGroup maxLen can't have negative values\n"); require(maxLen <= MAX_HUFCODE_BITS, "hufGroup maxLen can't exceed MAX_HUFCODE_BITS\n"); hufGroup->maxLen = maxLen; // Note that minLen cant be smaller than 1, so we adjust the base // and limit array pointers so were not always wasting the first // entry. We do this again when using them (during symbol decoding). base = hufGroup->base - (int_array * 1); require(0 <= base, "can't have a negative hufGroup->base\n"); limit = hufGroup->limit - (int_array * 1); // zero temp[] and limit[], and calculate permute[] pp = 0; for(ii = minLen; ii <= maxLen; ii += 1) { require(MAX_HUFCODE_BITS >= ii, "Invalid HUFCODE_BITS length\n"); temp[ii] = 0; limit[ii] = 0; for(hh = 0; hh < symCount; hh += 1) { if(length[hh] == ii) { require(MAX_SYMBOLS >= pp, "pp exceeded MAX_SYMBOLS\n"); hufGroup->permute[pp] = hh; pp += 1; } } } // Count symbols coded for at each bit length for(ii = 0; ii < symCount; ii += 1) { hold = length[ii]; require(MAX_HUFCODE_BITS >= hold, "Invalid HUFCODE_BITS length\n"); temp[hold] += 1; } /* Calculate limit[] (the largest symbol-coding value at each bit * length, which is (previous limit<<1)+symbols at this level), and * base[] (number of symbols to ignore at each bit length, which is * limit minus the cumulative count of symbols coded for already). */ pp = hh = 0; for(ii = minLen; ii < maxLen; ii += 1) { pp += temp[ii]; limit[ii] = pp - 1; pp = pp << 1; hh += temp[ii]; base[ii + 1] = pp - hh; } limit[maxLen] = pp + temp[maxLen] - 1; limit[maxLen + 1] = INT_MAX; base[minLen] = 0; } free(length); free(temp); return 0; } /* First pass, read blocks symbols into dbuf[dbufCount]. * * This undoes three types of compression: huffman coding, run length encoding, * and move to front encoding. We have to undo all those to know when weve * read enough input. */ int read_huffman_data(struct bunzip_data *bd, struct bwdata *bw) { struct group_data *hufGroup; int ii; int jj; int kk; int runPos; int dbufCount; int symCount; int selector; int nextSym; int *byteCount; int *base; int *limit; unsigned hh; unsigned *dbuf = bw->dbuf; unsigned uc; #if defined(__M2__) int int_array = sizeof(int); int group_data_array = sizeof(struct group_data); #else int int_array = 1; int group_data_array = 1; #endif // Weve finished reading and digesting the block header. Now read this // blocks huffman coded symbols from the file and undo the huffman coding // and run length encoding, saving the result into dbuf[dbufCount++] = uc // Initialize symbol occurrence counters and symbol mtf table byteCount = bw->byteCount; for(ii = 0; ii < 256; ii += 1) { byteCount[ii] = 0; bd->mtfSymbol[ii] = ii; } // Loop through compressed symbols. This is the first "tight inner loop" // that needs to be micro-optimized for speed. (This one fills out dbuf[] // linearly, staying in cache more, so isnt as limited by DRAM access.) runPos = 0; dbufCount = 0; symCount = 0; selector = 0; // Some unnecessary initializations to shut gcc up. base = 0; limit = 0; hufGroup = 0; hh = 0; while(TRUE) { // Have we reached the end of this huffman group? if(!(symCount)) { // Determine which huffman coding group to use. symCount = GROUP_SIZE - 1; if(selector >= bd->nSelectors) { return RETVAL_DATA_ERROR; } hufGroup = bd->groups + (group_data_array * bd->selectors[selector]); selector += 1; base = hufGroup->base - (int_array * 1); require(0 <= base, "can't have negative hufGroup->base\n"); limit = hufGroup->limit - (int_array * 1); } else { symCount -= 1; } // Read next huffman-coded symbol (into jj). ii = hufGroup->minLen; jj = get_bits(bd, ii); while(jj > limit[ii]) { // if (ii > hufGroup->maxLen) return RETVAL_DATA_ERROR; ii += 1; // Unroll get_bits() to avoid a function call when the datas in // the buffer already. if(bd->inbufBitCount) { bd->inbufBitCount -= 1; kk = (bd->inbufBits >> bd->inbufBitCount) & 1; } else { kk = get_bits(bd, 1); } jj = (jj << 1) | kk; } // Huffman decode jj into nextSym (with bounds checking) jj -= base[ii]; if(ii > hufGroup->maxLen || jj >= MAX_SYMBOLS) { return RETVAL_DATA_ERROR; } nextSym = hufGroup->permute[jj]; // If this is a repeated run, loop collecting data if(nextSym <= SYMBOL_RUNB) { // If this is the start of a new run, zero out counter if(!runPos) { runPos = 1; hh = 0; } /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at each bit position, add 1 or 2 instead. For example, 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2. You can make any bit pattern that way using 1 less symbol than the basic or 0/1 method (except all bits 0, which would use no symbols, but a run of length 0 doesnt mean anything in this context). Thus space is saved. */ hh += (runPos << nextSym); // +runPos if RUNA; +2*runPos if RUNB runPos = runPos << 1; continue; } /* When we hit the first non-run symbol after a run, we now know how many times to repeat the last literal, so append that many copies to our buffer of decoded symbols (dbuf) now. (The last literal used is the one at the head of the mtfSymbol array.) */ if(runPos) { runPos = 0; // Check for integer overflow if(hh > bd->dbufSize || dbufCount + hh > bd->dbufSize) { return RETVAL_DATA_ERROR; } uc = bd->symToByte[bd->mtfSymbol[0]]; byteCount[uc] += hh; while(hh) { hh -= 1; dbuf[dbufCount] = uc; dbufCount += 1; } } // Is this the terminating symbol? if(nextSym > bd->symTotal) { break; } /* At this point, the symbol we just decoded indicates a new literal character. Subtract one to get the position in the MTF array at which this literal is currently to be found. (Note that the result cant be -1 or 0, because 0 and 1 are RUNA and RUNB. Another instance of the first symbol in the mtf array, position 0, would have been handled as part of a run.) */ if(dbufCount >= bd->dbufSize) { return RETVAL_DATA_ERROR; } ii = nextSym - 1; uc = bd->mtfSymbol[ii]; // On my laptop, unrolling this memmove() into a loop shaves 3.5% off // the total running time. while(ii) { ii -= 1; bd->mtfSymbol[ii + 1] = bd->mtfSymbol[ii]; } bd->mtfSymbol[0] = uc; uc = bd->symToByte[uc]; // We have our literal byte. Save it into dbuf. byteCount[uc] += 1; dbuf[dbufCount] = uc; dbufCount += 1; } // Now we know what dbufCount is, do a better sanity check on origPtr. if(bw->origPtr >= (bw->writeCount = dbufCount)) { return RETVAL_DATA_ERROR; } return 0; } // Flush output buffer to disk void flush_bunzip_outbuf(struct bunzip_data *bd, int out_fd) { if(bd->outbufPos) { if(write(out_fd, bd->outbuf, bd->outbufPos) != bd->outbufPos) { exit(1); } bd->outbufPos = 0; } } void burrows_wheeler_prep(struct bunzip_data *bd, struct bwdata *bw) { int ii; int jj; int kk; unsigned *dbuf = bw->dbuf; int *byteCount = bw->byteCount; unsigned uc; // Turn byteCount into cumulative occurrence counts of 0 to n-1. jj = 0; for(ii = 0; ii < 256; ii += 1) { kk = jj + byteCount[ii]; byteCount[ii] = jj; jj = kk; } // Use occurrence counts to quickly figure out what order dbuf would be in // if we sorted it. for(ii = 0; ii < bw->writeCount; ii += 1) { uc = dbuf[ii] & 0xFF; dbuf[byteCount[uc]] = dbuf[byteCount[uc]] | (ii << 8); byteCount[uc] += 1; } // blockRandomised support would go here. // Using ii as position, jj as previous character, hh as current character, // and uc as run count. bw->dataCRC = 0xffffffff; /* Decode first byte by hand to initialize "previous" byte. Note that it doesnt get output, and if the first three characters are identical it doesnt qualify as a run (hence uc=255, which will either wrap to 1 or get reset). */ if(bw->writeCount) { bw->writePos = dbuf[bw->origPtr]; bw->writeCurrent = bw->writePos; bw->writePos = bw->writePos >> 8; bw->writeRun = -1; } } // Decompress a block of text to intermediate buffer int read_bunzip_data(struct bunzip_data *bd) { int rc = read_block_header(bd, bd->bwdata); if(!rc) { rc = read_huffman_data(bd, bd->bwdata); } // First thing that can be done by a background thread. burrows_wheeler_prep(bd, bd->bwdata); return rc; } // Undo burrows-wheeler transform on intermediate buffer to produce output. // If !len, write up to len bytes of data to buf. Otherwise write to out_fd. // Returns len ? bytes written : 0. Notice all errors are negative #s. // // Burrows-wheeler transform is described at: // http://dogma.net/markn/articles/bwt/bwt.htm // http://marknelson.us/1996/09/01/bwt/ int write_bunzip_data(struct bunzip_data *bd, struct bwdata *bw, int out_fd, char *outbuf, int len) { unsigned *dbuf = bw->dbuf; int count; int pos; int current; int run; int copies; int outbyte; int previous; int gotcount = 0; int i; int crc_index; while(TRUE) { // If last read was short due to end of file, return last block now if(bw->writeCount < 0) { return bw->writeCount; } // If we need to refill dbuf, do it. if(!bw->writeCount) { i = read_bunzip_data(bd); if(i) { if(i == RETVAL_LAST_BLOCK) { bw->writeCount = i; return gotcount; } else { return i; } } } // loop generating output count = bw->writeCount; pos = bw->writePos; current = bw->writeCurrent; run = bw->writeRun; while(count) { // If somebody (like tar) wants a certain number of bytes of // data from memory instead of written to a file, humor them. if(len && bd->outbufPos >= len) { goto dataus_interruptus; } count -= 1; // Follow sequence vector to undo Burrows-Wheeler transform. previous = current; pos = dbuf[pos]; current = pos & 0xff; pos = pos >> 8; // Whenever we see 3 consecutive copies of the same byte, // the 4th is a repeat count if(run == 3) { run += 1; copies = current; outbyte = previous; current = -1; } else { run += 1; copies = 1; outbyte = current; } // Output bytes to buffer, flushing to file if necessary while(copies) { copies -= 1; if(bd->outbufPos == IOBUF_SIZE) { flush_bunzip_outbuf(bd, out_fd); } bd->outbuf[bd->outbufPos] = outbyte; bd->outbufPos += 1; crc_index = ((bw->dataCRC >> 24) ^ outbyte) & 0xFF; bw->dataCRC = (bw->dataCRC << 8) ^ bd->crc32Table[crc_index]; } if(current != previous) { run = 0; } } // decompression of this block completed successfully bw->dataCRC = ~(bw->dataCRC); #if defined(__M2__) // & 0xFFFFFFFF not working if(sizeof(unsigned) == 8) { bw->dataCRC <<= 32; bw->dataCRC >>= 32; } #endif bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bw->dataCRC; // if this block had a crc error, force file level crc error. if(bw->dataCRC != bw->headerCRC) { bd->totalCRC = bw->headerCRC + 1; return RETVAL_LAST_BLOCK; } dataus_interruptus: bw->writeCount = count; if(len) { gotcount += bd->outbufPos; memcpy(outbuf, bd->outbuf, len); // If we got enough data, checkpoint loop state and return len -= bd->outbufPos; if(len < 1) { bd->outbufPos -= len; if(bd->outbufPos) { memmove(bd->outbuf, bd->outbuf + len, bd->outbufPos); } bw->writePos = pos; bw->writeCurrent = current; bw->writeRun = run; return gotcount; } } } } // Allocate the structure, read file header. If !len, src_fd contains // filehandle to read from. Else inbuf contains data. int start_bunzip(struct bunzip_data **bdp, int src_fd) { struct bunzip_data *bd; unsigned i; // Figure out how much data to allocate. i = sizeof(struct bunzip_data); // Allocate bunzip_data. Most fields initialize to zero. *bdp = malloc(i); bd = *bdp; memset(bd, 0, i); bd->inbuf = calloc(IOBUF_SIZE, sizeof(char)); bd->outbuf = calloc(IOBUF_SIZE, sizeof(char)); bd->selectors = calloc(32768, sizeof(char)); bd->groups = calloc(MAX_GROUPS, sizeof(struct group_data)); for(i = 0; i < MAX_GROUPS; i += 1) { bd->groups[i].limit = calloc(MAX_HUFCODE_BITS + 1, sizeof(int)); bd->groups[i].base = calloc(MAX_HUFCODE_BITS, sizeof(int)); bd->groups[i].permute = calloc(MAX_SYMBOLS, sizeof(int)); } bd->symToByte = calloc(256, sizeof(unsigned)); bd->mtfSymbol = calloc(256, sizeof(unsigned)); bd->crc32Table = calloc(256, sizeof(unsigned)); bd->bwdata = calloc(1, sizeof(struct bwdata)); bd->bwdata->byteCount = calloc(256, sizeof(int)); unsigned *crc32Table; bd->in_fd = src_fd; crc_init(bd->crc32Table, 0); // Ensure that file starts with "BZh". char *header = "BZh"; for(i = 0; i < 3; i += 1) if(get_bits(bd, 8) != header[i]) { return RETVAL_NOT_BZIP_DATA; } // Next byte ascii 1-9, indicates block size in units of 100k of // uncompressed data. Allocate intermediate buffer for block. i = get_bits(bd, 8); if(i < 49 || i > 57) { return RETVAL_NOT_BZIP_DATA; } bd->dbufSize = 100000 * (i - 48); bd->bwdata[0].dbuf = malloc(bd->dbufSize * sizeof(int)); return 0; } // Example usage: decompress src_fd to dst_fd. (Stops at end of bzip data, // not end of file.) int bunzipStream(int src_fd, int dst_fd) { struct bunzip_data *bd; int i; int j; if(!(i = start_bunzip(&bd, src_fd))) { i = write_bunzip_data(bd, bd->bwdata, dst_fd, 0, 0); if(i == RETVAL_LAST_BLOCK) { if(bd->bwdata[0].headerCRC == bd->totalCRC) { i = 0; } else { i = RETVAL_DATA_ERROR; } } } flush_bunzip_outbuf(bd, dst_fd); free(bd->bwdata[0].dbuf); free(bd->inbuf); free(bd->outbuf); free(bd->selectors); for(j = 0; j < MAX_GROUPS; j += 1) { free(bd->groups[j].limit); free(bd->groups[j].base); free(bd->groups[j].permute); } free(bd->groups); free(bd->symToByte); free(bd->mtfSymbol); free(bd->crc32Table); free(bd->bwdata->byteCount); free(bd->bwdata); free(bd); return -i; } void do_bunzip2(int in_fd, int out_fd) { int err = bunzipStream(in_fd, out_fd); if(err) { exit(1); } } int main(int argc, char **argv) { char *name = NULL; char *dest = NULL; FUZZING = FALSE; /* process arguments */ int i = 1; while(i < argc) { if(NULL == argv[i]) { i += 1; } else if(match(argv[i], "-f") || match(argv[i], "--file")) { name = argv[i + 1]; require(NULL != name, "the --file option requires a filename to be given\n"); i += 2; } else if(match(argv[i], "-o") || match(argv[i], "--output")) { dest = argv[i + 1]; require(NULL != dest, "the --output option requires a filename to be given\n"); i += 2; } else if(match(argv[i], "--fuzzing-mode")) { FUZZING = TRUE; i += 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" --file $input.bz2", stderr); fputs(" --output $output\n", stderr); fputs("--help to get this message\n", stderr); exit(EXIT_SUCCESS); } else { fputs("Unknown option:", stderr); fputs(argv[i], stderr); fputs("\nAborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } } /* Deal with no input */ if(NULL == name) { fputs("an input file (--file $name) must be provided\n", stderr); exit(EXIT_FAILURE); } int in_fd = open(name, 0, 0); if(in_fd < 0) { fputs("Unable to open input file\n", stderr); exit(EXIT_FAILURE); } /* If an output name isn't provided */ if(NULL == dest) { int length = strlen(name); require(length > 4, "file name length not sufficient, please provide output name with --output $filename\n"); /* Assume they want the output file name to be the input file name minus the .bz2 */ dest = calloc(length, sizeof(char)); require(NULL != dest, "Failed to allocate new output file name\n"); /* do name.bz2 => name */ strcpy(dest, name); dest[length-3] = 0; } int out_fd; if(FUZZING) { /* Dump to /dev/null the garbage data produced during fuzzing */ out_fd = open("/dev/null", O_WRONLY|O_CREAT|O_TRUNC, 0600); } else { out_fd = open(dest, O_WRONLY|O_CREAT|O_TRUNC, 0600); } if(out_fd < 0) { fputs("Unable to open output file for writing\n", stderr); exit(EXIT_FAILURE); } do_bunzip2(in_fd, out_fd); close(in_fd); close(out_fd); exit(0); }
/* Copyright (C) 2019 pts@fazekas.hu * Copyright (C) 2024 Jeremiah Orians * Copyright (C) 2024 Gábor Stefanik * This file is part of mescc-tools-extra * * mescc-tools-extra is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools-extra is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools-extra. If not, see <http://www.gnu.org/licenses/>. * * Built upon the great work in: * muxzcat.c: tiny .xz and .lzma decompression filter * by pts@fazekas.hu at Wed Jan 30 15:15:23 CET 2019 * from https://github.com/pts/muxzcat * For .xz it supports only LZMA2 (no other filters such as BCJ). * For .lzma it doesn't work with files with 5 <= lc + lp <= 12. * It doesn't verify checksums (e.g. CRC-32 and CRC-64). * It extracts the first stream only, and it ignores the index. * * LZMA algorithm implementation based on * https://github.com/pts/pts-tiny-7z-sfx/commit/b9a101b076672879f861d472665afaa6caa6fec1 * , which is based on 7z922.tar.bz2. */ #include <stdio.h> #include <string.h> /* memcpy(), memmove() */ #include <unistd.h> /* read(), write() */ #include <stdint.h> #include <stdlib.h> /* realloc() */ #include "M2libc/bootstrappable.h" /* Constants needed */ #define SZ_OK 0 #define SZ_ERROR_DATA 1 #define SZ_ERROR_MEM 2 /* Out of memory. */ #define SZ_ERROR_CRC 3 #define SZ_ERROR_UNSUPPORTED 4 #define SZ_ERROR_PARAM 5 #define SZ_ERROR_INPUT_EOF 6 /*#define SZ_ERROR_OUTPUT_EOF 7*/ #define SZ_ERROR_READ 8 #define SZ_ERROR_WRITE 9 #define SZ_ERROR_FINISHED_WITH_MARK 15 /* LzmaDec_DecodeToDic stream was finished with end mark. */ #define SZ_ERROR_NOT_FINISHED 16 /* LzmaDec_DecodeToDic stream was not finished, i.e. dicfLimit reached while there is input to decompress */ #define SZ_ERROR_NEEDS_MORE_INPUT 17 /* LzmaDec_DecodeToDic, you must provide more input bytes */ /*#define SZ_MAYBE_FINISHED_WITHOUT_MARK SZ_OK*/ /* LzmaDec_DecodeToDic, there is probability that stream was finished without end mark */ #define SZ_ERROR_CHUNK_NOT_CONSUMED 18 #define SZ_ERROR_NEEDS_MORE_INPUT_PARTIAL 17 /* LzmaDec_DecodeToDic, more input needed, but existing input was partially processed */ #define LZMA_REQUIRED_INPUT_MAX 20 #define LZMA_BASE_SIZE 1846 #define LZMA_LIT_SIZE 768 #define LZMA2_LCLP_MAX 4 #define MAX_DIC_SIZE 1610612736 /* ~1.61 GB. 2 GiB is user virtual memory limit for many 32-bit systems. */ #define MAX_DIC_SIZE_PROP 37 #define MAX_MATCH_SIZE 273 #define kNumTopBits 24 #define kTopValue (1 << kNumTopBits) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 #define RC_INIT_SIZE 5 #define kNumPosBitsMax 4 #define kNumPosStatesMax (1 << kNumPosBitsMax) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) #define kLenNumMidBits 3 #define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) #define LenChoice 0 #define LenChoice2 (LenChoice + 1) #define LenLow (LenChoice2 + 1) #define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) #define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) #define kNumLenProbs (LenHigh + kLenNumHighSymbols) #define kNumStates 12 #define kNumLitStates 7 #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) #define kNumPosSlotBits 6 #define kNumLenToPosStates 4 #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kMatchMinLen 2 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) #define IsMatch 0 #define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) #define IsRepG0 (IsRep + kNumStates) #define IsRepG1 (IsRepG0 + kNumStates) #define IsRepG2 (IsRepG1 + kNumStates) #define IsRep0Long (IsRepG2 + kNumStates) #define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) #define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) #define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) #define LenCoder (Align + kAlignTableSize) #define RepLenCoder (LenCoder + kNumLenProbs) #define Literal (RepLenCoder + kNumLenProbs) #define LZMA_DIC_MIN (1 << 12) #define SZ_ERROR_BAD_MAGIC 51 #define SZ_ERROR_BAD_STREAM_FLAGS 52 /* SZ_ERROR_BAD_MAGIC is reported instead. */ #define SZ_ERROR_UNSUPPORTED_FILTER_COUNT 53 #define SZ_ERROR_BAD_BLOCK_FLAGS 54 #define SZ_ERROR_UNSUPPORTED_FILTER_ID 55 #define SZ_ERROR_UNSUPPORTED_FILTER_PROPERTIES_SIZE 56 #define SZ_ERROR_BAD_PADDING 57 #define SZ_ERROR_BLOCK_HEADER_TOO_LONG 58 #define SZ_ERROR_BAD_CHUNK_CONTROL_BYTE 59 #define SZ_ERROR_BAD_CHECKSUM_TYPE 60 #define SZ_ERROR_BAD_DICTIONARY_SIZE 61 #define SZ_ERROR_UNSUPPORTED_DICTIONARY_SIZE 62 #define SZ_ERROR_FEED_CHUNK 63 /*#define SZ_ERROR_NOT_FINISHED_WITH_MARK 64*/ #define SZ_ERROR_BAD_DICPOS 65 #define SZ_ERROR_MISSING_INITPROP 67 #define SZ_ERROR_BAD_LCLPPB_PROP 68 #define FILTER_ID_LZMA2 0x21 // 65536 + 12 * 1 byte (sizeof(uint8_t) #define sizeof_readBuf 65548 #define sizeof_writeBuf 0x1000000 #define MAX_DICF_SIZE (MAX_DIC_SIZE + MAX_MATCH_SIZE + sizeof_writeBuf) /* Maximum number of bytes in global.dicf. */ #define DUMMY_ERROR 0 /* unexpected end of input stream */ #define DUMMY_LIT 1 #define DUMMY_MATCH 2 #define DUMMY_REP 3 /* (LZMA_BASE_SIZE + (LZMA_LIT_SIZE << LZMA2_LCLP_MAX)) */ #define probs_size 14134 #define BIT31 (1<<31) #define BITS32 (0x7FFFFFFF | BIT31) #define HIGHBITS (0xFFFFFFFF - BITS32) FILE* destination; FILE* source; uint32_t pos; /* For LZMA streams, lc <= 8, lp <= 4, lc + lp <= 8 + 4 == 12. * For LZMA2 streams, lc + lp <= 4. * Minimum value: 1846. * Maximum value for LZMA streams: 1846 + (768 << (8 + 4)) == 3147574. * Maximum value for LZMA2 streams: 1846 + (768 << 4) == 14134. * Memory usage of prob: sizeof(uint32_t) * value == (2 or 4) * value bytes. */ struct CLzmaDec { /* lc, lp and pb would fit into a byte, but i386 code is shorter as uint32_t. * * Constraints: * * * (0 <= lc <= 8) by LZMA. * * 0 <= lc <= 4 by LZMA2 and muxzcat-LZMA and muzxcat-LZMA2. * * 0 <= lp <= 4. * * 0 <= pb <= 4. * * (0 <= lc + lp == 8 + 4 <= 12) by LZMA. * * 0 <= lc + lp <= 4 by LZMA2 and muxzcat-LZMA and muxzcat-LZMA2. */ uint32_t lc; uint32_t lp; uint32_t pb; /* Configured in prop byte. */ /* Maximum lookback delta. * More optimized implementations (but not this version of muxzcat) need * that many bytes of storage for the dictionary. muxzcat uses more, * because it keeps the entire decompression output in memory, for * the simplicity of the implementation. * Configured in dicSizeProp byte. Maximum LZMA and LZMA2 supports is 0xffffffff, * maximum we support is MAX_DIC_SIZE == 1610612736. */ uint32_t dicSize; uint8_t *buf; uint32_t range; uint32_t code; uint32_t dicfPos; /* The next decompression output byte will be written to dicf + dicfPos. */ uint32_t dicfLimit; /* It's OK to write this many decompression output bytes to dic. GrowDic(dicfPos + len) must be called before writing len bytes at dicfPos. */ uint32_t writtenPos; /* Decompression output bytes dicf[:writtenPos] are already written to the output file. writtenPos <= dicfPos. */ uint32_t discardedSize; /* Number of decompression output bytes discarded. */ uint32_t writeRemaining; /* Maximum number of remaining bytes to write, or ~0 for unlimited. */ uint32_t allocCapacity; /* Number of bytes allocated in dic. */ uint32_t processedPos; /* Decompression output byte count since the last call to LzmaDec_InitDicAndState(TRUE, ...); */ uint32_t checkDicSize; uint32_t state; uint32_t reps[4]; uint32_t remainLen; uint32_t tempBufSize; uint32_t probs[probs_size]; int needFlush; int needInitLzma; int needInitDic; int needInitState; int needInitProp; uint8_t tempBuf[LZMA_REQUIRED_INPUT_MAX]; /* Contains the decompresison output, and used as the lookback dictionary. * allocCapacity bytes are allocated, it's OK to grow it up to dicfLimit. */ uint8_t *dicf; uint8_t* readBuf; uint8_t* readCur; uint8_t* readEnd; }; /* globals needed */ struct CLzmaDec* global; int FUZZING; /* Writes uncompressed data (global.dicf[global.writtenPos : global.dicfPos] to stdout. */ void Flush() { /* print the bytes in the buffer until done */ uint8_t* p = global->dicf + global->writtenPos; uint8_t* q = global->dicf + global->dicfPos; while(p < q) { fputc(0xFF & p[0], destination); p = p + 1; } global->writtenPos = global->dicfPos; } void FlushDiscardOldFromStartOfDic() { if(global->dicfPos > global->dicSize) { uint32_t delta = global->dicfPos - global->dicSize; if(delta + MAX_MATCH_SIZE >= sizeof_writeBuf) { Flush(); global->dicf = memmove(global->dicf, global->dicf + delta, global->dicSize); global->dicfPos = global->dicfPos - delta; global->dicfLimit = global->dicfLimit - delta; global->writtenPos = global->writtenPos - delta; global->discardedSize = global->discardedSize + delta; } } } void GrowCapacity(uint32_t newCapacity) { if(newCapacity > global->allocCapacity) { /* make sure we don't alloc too much */ require(newCapacity <= MAX_DICF_SIZE, "GrowCapacity exceeds MAX_DICF_SIZE"); /* Get our new block */ uint8_t* dicf = calloc(newCapacity, sizeof(uint8_t)); require(NULL != dicf, "GrowCapacity memory allocation failed"); /* copy our old block into it and get rid of the old block */ if (NULL != global->dicf) { memcpy(dicf, global->dicf, global->allocCapacity); free(global->dicf); } /* now track that new state */ global->dicf = dicf; global->allocCapacity = newCapacity; } /* else no need to grow */ } void FlushDiscardGrowDic(uint32_t dicfPosDelta) { uint32_t minCapacity = global->dicfPos + dicfPosDelta; uint32_t newCapacity; if(minCapacity > global->allocCapacity) { FlushDiscardOldFromStartOfDic(); minCapacity = global->dicfPos + dicfPosDelta; if(minCapacity > global->allocCapacity) { /* start by assuming 64KB */ newCapacity = (1 << 16); while(newCapacity + MAX_MATCH_SIZE < minCapacity) { /* No overflow. */ if(newCapacity > global->dicSize) { newCapacity = global->dicSize; if(newCapacity + MAX_MATCH_SIZE < minCapacity) { newCapacity = minCapacity - MAX_MATCH_SIZE; } break; } newCapacity = newCapacity << 1; } GrowCapacity(newCapacity + MAX_MATCH_SIZE); } } } void LzmaDec_DecodeReal(uint32_t limit, uint8_t *bufLimit) { uint32_t *probs = global->probs; uint32_t state = global->state; uint32_t rep0 = global->reps[0]; uint32_t rep1 = global->reps[1]; uint32_t rep2 = global->reps[2]; uint32_t rep3 = global->reps[3]; uint32_t pbMask = (1 << (global->pb)) - 1; uint32_t lpMask = (1 << (global->lp)) - 1; uint32_t lc = global->lc; uint8_t* dicl = global->dicf; uint32_t diclLimit = global->dicfLimit; uint32_t diclPos = global->dicfPos; uint32_t processedPos = global->processedPos; uint32_t checkDicSize = global->checkDicSize; uint32_t len = 0; uint8_t* buf = global->buf; uint32_t range = global->range; uint32_t code = global->code; uint32_t* prob; uint32_t bound; uint32_t ttt; uint32_t posState; uint32_t symbol; uint32_t matchByte; uint32_t offs; uint32_t bit; uint32_t* probLit; uint32_t distance; uint32_t limita; uint32_t *probLen; uint32_t offset; uint32_t posSlot; uint32_t numDirectBits; uint32_t mask; uint32_t i; uint32_t n; uint32_t t; uint32_t rem; uint32_t curLen; uint32_t pos; uint8_t* p; do { posState = processedPos & pbMask; p = probs; prob = p + 4 * (IsMatch + (state << kNumPosBitsMax) + posState); ttt = prob[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[0]); p = probs; prob = p + 4 * Literal; if(checkDicSize != 0 || processedPos != 0) { if(diclPos == 0) { p = prob; prob = p + 4 * (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + (0xFF & dicl[(diclLimit) - 1])) >> (8 - lc)); } else { p = prob; prob = p + 4 *(LZMA_LIT_SIZE * ((((processedPos & lpMask) << lc) + (0xFF & dicl[diclPos - 1])) >> (8 - lc))); } } if(state < kNumLitStates) { if(state < 4) state = 0; else state = state - 3; symbol = 1; do { ttt = prob[symbol]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[symbol] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[symbol]); symbol = (symbol + symbol); } else { range = range - bound; code = code - bound; prob[symbol] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[symbol]); symbol = (symbol + symbol) + 1; } } while(symbol < 0x100); } else { if(diclPos < rep0) matchByte = 0xFF & dicl[(diclPos - rep0) + diclLimit]; else matchByte = 0xFF & dicl[(diclPos - rep0)]; offs = 0x100; if(state < 10) state = state - 3; else state = state - 6; symbol = 1; do { matchByte = matchByte << 1; bit = (matchByte & offs); p = prob; probLit = p + 4 * (offs + bit + symbol); ttt = probLit[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; probLit[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & probLit[0]); symbol = (symbol + symbol); offs = offs & ~bit; } else { range = range - bound; code = code - bound; probLit[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & probLit[0]); symbol = (symbol + symbol) + 1; offs = offs & bit; } } while(symbol < 0x100); } if(diclPos >= global->allocCapacity) { global->dicfPos = diclPos; FlushDiscardGrowDic(1); dicl = global->dicf; diclLimit = global->dicfLimit; diclPos = global->dicfPos; } dicl[diclPos] = (0xFF & symbol) | ((~0xFF) & dicl[diclPos]); diclPos = diclPos + 1; processedPos = processedPos + 1; continue; } else { range = range - bound; code = code - bound; prob[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[0]); p = probs; prob = p + 4 * (IsRep + state); ttt = prob[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[0]); state = state + kNumStates; p = probs; prob = p + 4 * LenCoder; } else { range = range - bound; code = code - bound; prob[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[0]); require((checkDicSize != 0) || (processedPos != 0), "checkDicsize == 0 && processPos == 0"); p = probs; prob = p + 4 * (IsRepG0 + state); ttt = prob[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[0]); p = probs; prob = p + 4 * (IsRep0Long + (state << kNumPosBitsMax) + posState); ttt = prob[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[0]); if(diclPos >= global->allocCapacity) { global->dicfPos = diclPos; FlushDiscardGrowDic(1); dicl = global->dicf; diclLimit = global->dicfLimit; diclPos = global->dicfPos; } if(diclPos < rep0) dicl[diclPos] = (0xFF & dicl[(diclPos - rep0) + diclLimit]) | ((~0xFF) & dicl[diclPos]); else dicl[diclPos] = (0xFF & dicl[(diclPos - rep0)]) | ((~0xFF) & dicl[diclPos]); diclPos = diclPos + 1; processedPos = processedPos + 1; if(state < kNumLitStates) state = 9; else state = 11; continue; } range = range - bound; code = code - bound; prob[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[0]); } else { range = range - bound; code = code - bound; prob[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[0]); p = probs; prob = p + 4 * (IsRepG1 + state); ttt = prob[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[0]); distance = rep1; } else { range = range - bound; code = code - bound; prob[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[0]); p = probs; prob = p + 4 * (IsRepG2 + state); ttt = prob[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[0]); distance = rep2; } else { range = range - bound; code = code - bound; prob[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[0]); distance = rep3; rep3 = rep2; } rep2 = rep1; } rep1 = rep0; rep0 = distance; } if(state < kNumLitStates) state = 8; else state = 11; p = probs; prob = p + 4 * RepLenCoder; } p = prob; probLen = p + 4 * LenChoice; ttt = probLen[0]; if(range < kTopValue) { range <<= 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; probLen[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & probLen[0]); p = prob; probLen = p + 4 * (LenLow + (posState << kLenNumLowBits)); offset = 0; limita = (1 << kLenNumLowBits); } else { range = range - bound; code = code - bound; probLen[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & probLen[0]); p = prob; probLen = p + 4 * LenChoice2; ttt = probLen[0]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; probLen[0] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & probLen[0]); p = prob; probLen = p + 4 * (LenMid + (posState << kLenNumMidBits)); offset = kLenNumLowSymbols; limita = (1 << kLenNumMidBits); } else { range = range - bound; code = code - bound; probLen[0] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & probLen[0]); p = prob; probLen = p + 4 * LenHigh; offset = kLenNumLowSymbols + kLenNumMidSymbols; limita = (1 << kLenNumHighBits); } } len = 1; do { ttt = probLen[len]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; probLen[len] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & probLen[len]); len = (len + len); } else { range = range - bound; code = code - bound; probLen[len] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & probLen[len]); len = (len + len) + 1; } } while(len < limita); len = len - limita + offset; if(state >= kNumStates) { if(len < kNumLenToPosStates) { p = probs; prob = p + 4 * (PosSlot + (len << kNumPosSlotBits)); } else { p = probs; prob = p + 4 * (PosSlot + ((kNumLenToPosStates - 1) << kNumPosSlotBits)); } distance = 1; do { ttt = prob[distance]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[distance] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[distance]); distance = (distance + distance); } else { range = range - bound; code = code - bound; prob[distance] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[distance]); distance = (distance + distance) + 1; } } while(distance < (1 << 6)); distance = distance - (1 << 6); if(distance >= kStartPosModelIndex) { posSlot = distance; numDirectBits = (distance >> 1) - 1; distance = (2 | (distance & 1)); if(posSlot < kEndPosModelIndex) { distance = distance << numDirectBits; p = probs; prob = p + 4 * (SpecPos + distance - posSlot - 1); mask = 1; i = 1; do { ttt = prob[i]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[i] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i); } else { range = range - bound; code = code - bound; prob[i] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i) + 1; distance = distance | mask; } mask = mask << 1; numDirectBits = numDirectBits - 1; } while(numDirectBits != 0); } else { numDirectBits = numDirectBits - kNumAlignBits; do { if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } range = range >> 1; { code = code - range; t = (0 - (code >> 31)); distance = (distance << 1) + (t + 1); code = code + (range & t); } numDirectBits = numDirectBits - 1; } while(numDirectBits != 0); p = probs; prob = p + 4 * Align; distance = distance << kNumAlignBits; i = 1; ttt = prob[i]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[i] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i); } else { range = range - bound; code = code - bound; prob[i] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i) + 1; distance = distance | 1; } ttt = prob[i]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[i] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i); } else { range = range - bound; code = code - bound; prob[i] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i) + 1; distance = distance | 2; } ttt = prob[i]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[i] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i); } else { range = range - bound; code = code - bound; prob[i] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i) + 1; distance = distance | 4; } ttt = prob[i]; if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; prob[i] = (BITS32 & ((ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i); } else { range = range - bound; code = code - bound; prob[i] = (BITS32 & ((ttt - (ttt >> kNumMoveBits)))) | (HIGHBITS & prob[i]); i = (i + i) + 1; distance = distance | 8; } if(distance == BITS32) { len = len + kMatchSpecLenStart; state = state - kNumStates; break; } } } rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance + 1; if(checkDicSize == 0) require(distance < processedPos , "distance >= processedPos"); else require(distance < checkDicSize, "distance >= checkDicSize"); if(state < kNumStates + kNumLitStates) state = kNumLitStates; else state = kNumLitStates + 3; } len = len + kMatchMinLen; require(len <= MAX_MATCH_SIZE, "len greater than MAX_MATCH_SIZE"); require(limit != diclPos, "limit == diclPos"); rem = limit - diclPos; if(rem < len) curLen = rem; else curLen = len; if(diclPos < rep0) pos = (diclPos - rep0) + diclLimit; else pos = diclPos - rep0; processedPos = processedPos + curLen; len = len - curLen; /* TODO(pts): ASSERT(len == curLen);, simplify buffering code. */ /* + cannot overflow. */ if((diclPos + curLen) > global->allocCapacity) { global->dicfPos = diclPos; FlushDiscardGrowDic(curLen); pos = pos + global->dicfPos - diclPos; dicl = global->dicf; diclLimit = global->dicfLimit; diclPos = global->dicfPos; } if((pos + curLen) <= diclLimit) { require(diclPos > pos, "diclPos > pos"); require(curLen > 0, "curLen > 0"); i = 0; n = curLen; /* overlapping memcpy of sorts */ while(n > 0) { dicl[diclPos + i] = (0xFF & dicl[pos + i]) | ((~0xFF) & dicl[diclPos + i]); i = i + 1; n = n - 1; } diclPos = diclPos + curLen; } else { do { dicl[diclPos] = (0xFF & dicl[pos]) | ((~0xFF) & dicl[diclPos]); diclPos = diclPos + 1; pos = pos + 1; if(pos == diclLimit) { pos = 0; } curLen = curLen - 1; } while(curLen != 0); } } } while((diclPos < limit) && (buf < bufLimit)); if(range < kTopValue) { range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } global->buf = buf; global->range = range; global->code = code; global->remainLen = len; global->dicfPos = diclPos; global->processedPos = processedPos; global->reps[0] = rep0; global->reps[1] = rep1; global->reps[2] = rep2; global->reps[3] = rep3; global->state = state; } void LzmaDec_WriteRem(uint32_t limit) { uint8_t *dicl; uint32_t diclPos; uint32_t diclLimit; uint32_t len; uint32_t rep0; if(global->remainLen != 0 && global->remainLen < kMatchSpecLenStart) { dicl = global->dicf; diclPos = global->dicfPos; diclLimit = global->dicfLimit; len = global->remainLen; rep0 = global->reps[0]; if(limit - diclPos < len) { len = limit - diclPos; } if(diclPos + len > global->allocCapacity) { FlushDiscardGrowDic(len); dicl = global->dicf; diclLimit = global->dicfLimit; diclPos = global->dicfPos; } if((global->checkDicSize == 0) && ((global->dicSize - global->processedPos) <= len)) { global->checkDicSize = global->dicSize; } global->processedPos = global->processedPos + len; global->remainLen = global->remainLen - len; while(len != 0) { len = len - 1; if(diclPos < rep0) dicl[diclPos] = (0xFF & dicl[(diclPos - rep0) + diclLimit]) | ((~0xFF) & dicl[diclPos]); else dicl[diclPos] = (0xFF & dicl[diclPos - rep0]) | ((~0xFF) & dicl[diclPos]); diclPos = diclPos + 1; } global->dicfPos = diclPos; } } void LzmaDec_DecodeReal2(uint32_t limit, uint8_t *bufLimit) { uint32_t limit2; uint32_t rem; do { limit2 = limit; if(global->checkDicSize == 0) { rem = global->dicSize - global->processedPos; if((limit - global->dicfPos) > rem) { limit2 = global->dicfPos + rem; } } LzmaDec_DecodeReal(limit2, bufLimit); if(global->processedPos >= global->dicSize) { global->checkDicSize = global->dicSize; } LzmaDec_WriteRem(limit); } while((global->dicfPos < limit) && (global->buf < bufLimit) && (global->remainLen < kMatchSpecLenStart)); if(global->remainLen > kMatchSpecLenStart) { global->remainLen = kMatchSpecLenStart; } } int LzmaDec_TryDummy(uint8_t* buf, uint32_t inSize) { uint32_t range = global->range; uint32_t code = global->code; uint8_t* bufLimit = buf + inSize; uint32_t* probs = global->probs; uint32_t state = global->state; int res; uint32_t* prob; uint32_t bound; uint32_t ttt; uint32_t posState; uint32_t hold; uint32_t symbol; uint32_t matchByte; uint32_t offs; uint32_t bit; uint32_t* probLit; uint32_t len; uint32_t limit; uint32_t offset; uint32_t* probLen; uint32_t posSlot; uint32_t numDirectBits; uint32_t i; uint8_t* p; posState = (global->processedPos) & ((1 << global->pb) - 1); p = probs; prob = p + 4 * (IsMatch + (state << kNumPosBitsMax) + posState); ttt = prob[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; p = probs; prob = p + 4 * Literal; if(global->checkDicSize != 0 || global->processedPos != 0) { hold = (((global->processedPos) & ((1 << (global->lp)) - 1)) << global->lc); if(global->dicfPos == 0) { hold = hold + ((0xFF & global->dicf[global->dicfLimit - 1]) >> (8 - global->lc)); } else { hold = hold + ((0xFF & global->dicf[global->dicfPos - 1]) >> (8 - global->lc)); } p = prob; prob = p + 4 * (LZMA_LIT_SIZE * hold); } if(state < kNumLitStates) { symbol = 1; do { ttt = prob[symbol]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; symbol = (symbol + symbol); } else { range = range - bound; code = code - bound; symbol = (symbol + symbol) + 1; } } while(symbol < 0x100); } else { if(global->dicfPos < (global->reps[0] & BITS32)) { hold = global->dicfPos - (global->reps[0] & BITS32) + global->dicfLimit; } else hold = global->dicfPos - (global->reps[0] & BITS32); matchByte = 0xFF & global->dicf[hold]; offs = 0x100; symbol = 1; do { matchByte = matchByte << 1; bit = (matchByte & offs); p = prob; probLit = p + 4 * (offs + bit + symbol); ttt = probLit[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; symbol = (symbol + symbol); offs = offs & ~bit; } else { range = range - bound; code = code - bound; symbol = (symbol + symbol) + 1; offs = offs & bit; } } while(symbol < 0x100); } res = DUMMY_LIT; } else { range = range - bound; code = code - bound; p = probs; prob = p + 4 * (IsRep + state); ttt = prob[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; state = 0; p = probs; prob = p + 4 * LenCoder; res = DUMMY_MATCH; } else { range = range - bound; code = code - bound; res = DUMMY_REP; p = probs; prob = p + 4 * (IsRepG0 + state); ttt = prob[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; p = probs; prob = p + 4 * (IsRep0Long + (state << kNumPosBitsMax) + posState); ttt = prob[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } return DUMMY_REP; } else { range = range - bound; code = code - bound; } } else { range = range - bound; code = code - bound; p = probs; prob = p + 4 * (IsRepG1 + state); ttt = prob[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; } else { range = range - bound; code = code - bound; p = probs; prob = p + 4 * (IsRepG2 + state); ttt = prob[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; } else { range = range - bound; code = code - bound; } } } state = kNumStates; p = probs; prob = p + 4 * RepLenCoder; } p = prob; probLen = p + 4 * LenChoice; ttt = probLen[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; p = prob; probLen = p + 4 * (LenLow + (posState << kLenNumLowBits)); offset = 0; limit = 1 << kLenNumLowBits; } else { range = range - bound; code = code - bound; p = prob; probLen = p + 4 * LenChoice2; ttt = probLen[0]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; p = prob; probLen = p + 4 * (LenMid + (posState << kLenNumMidBits)); offset = kLenNumLowSymbols; limit = 1 << kLenNumMidBits; } else { range = range - bound; code = code - bound; probLen = p + 4 * LenHigh; offset = kLenNumLowSymbols + kLenNumMidSymbols; limit = 1 << kLenNumHighBits; } } len = 1; do { ttt = probLen[len]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; len = (len + len); } else { range = range - bound; code = code - bound; len = (len + len) + 1; } } while(len < limit); len = len - limit + offset; if(state < 4) { if(len < kNumLenToPosStates) hold = len << kNumPosSlotBits; else hold = (kNumLenToPosStates - 1) << kNumPosSlotBits; p = probs; prob = p + 4 * (PosSlot + hold); posSlot = 1; do { ttt = prob[posSlot]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; posSlot = (posSlot + posSlot); } else { range = range - bound; code = code - bound; posSlot = (posSlot + posSlot) + 1; } } while(posSlot < (1 << kNumPosSlotBits)); posSlot = posSlot - (1 << kNumPosSlotBits); if(posSlot >= kStartPosModelIndex) { numDirectBits = ((posSlot >> 1) - 1); if(posSlot < kEndPosModelIndex) { p = probs; prob = p + 4 * (SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1); } else { numDirectBits = numDirectBits - kNumAlignBits; do { if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } range = range >> 1; code = code - (range & ((((code - range) >> 31) & 1) - 1)); numDirectBits = numDirectBits - 1; } while(numDirectBits != 0); p = probs; prob = p + 4 * Align; numDirectBits = kNumAlignBits; } i = 1; do { ttt = prob[i]; if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } bound = (range >> kNumBitModelTotalBits) * ttt; if(code < bound) { range = bound; i = (i + i); } else { range = range - bound; code = code - bound; i = (i + i) + 1; } numDirectBits = numDirectBits - 1; } while(numDirectBits != 0); } } } if(range < kTopValue) { if(buf >= bufLimit) { return DUMMY_ERROR; } /* is this even needed? */ range = range << 8; code = (code << 8) | (0xFF & buf[0]); buf = buf + 1; } return res; } void LzmaDec_InitRc(uint8_t* data) { global->code = ((0xFF & data[1]) << 24) | ((0xFF & data[2]) << 16) | ((0xFF & data[3]) << 8) | (0xFF & data[4]); global->range = BITS32; global->needFlush = FALSE; } void LzmaDec_InitDicAndState(int initDic, int initState) { global->needFlush = TRUE; global->remainLen = 0; global->tempBufSize = 0; if(initDic) { global->processedPos = 0; global->checkDicSize = 0; global->needInitLzma = TRUE; } if(initState) { global->needInitLzma = TRUE; } } void LzmaDec_InitStateReal() { uint32_t numProbs = Literal + (LZMA_LIT_SIZE << (global->lc + global->lp)); uint32_t i; uint32_t* probs = global->probs; for(i = 0; i < numProbs; i = i + 1) { probs[i] = (BITS32 & (kBitModelTotal >> 1)) | (HIGHBITS & probs[i]); } global->reps[0] = 1; global->reps[1] = 1; global->reps[2] = 1; global->reps[3] = 1; global->state = 0; global->needInitLzma = FALSE; } uint32_t LzmaDec_DecodeToDic(uint8_t* src, uint32_t srcLen) { uint32_t srcLen0 = srcLen; uint32_t inSize = srcLen; int checkEndMarkNow; uint32_t processed; uint8_t *bufLimit; uint32_t dummyRes; uint32_t rem; uint32_t lookAhead; srcLen = 0; LzmaDec_WriteRem(global->dicfLimit); while(global->remainLen != kMatchSpecLenStart) { if(global->needFlush) { while(inSize > 0 && global->tempBufSize < RC_INIT_SIZE) { global->tempBuf[global->tempBufSize] = 0xFF & src[0]; global->tempBufSize = global->tempBufSize + 1; src = src + 1; srcLen = srcLen + 1; inSize = inSize - 1; } if(global->tempBufSize < RC_INIT_SIZE) { if(srcLen != srcLen0) return SZ_ERROR_NEEDS_MORE_INPUT_PARTIAL; return SZ_ERROR_NEEDS_MORE_INPUT; } if((0xFF & global->tempBuf[0]) != 0) return SZ_ERROR_DATA; LzmaDec_InitRc(global->tempBuf); global->tempBufSize = 0; } checkEndMarkNow = FALSE; if(global->dicfPos >= global->dicfLimit) { if((global->remainLen == 0) && (global->code == 0)) { if(srcLen != srcLen0) return SZ_ERROR_CHUNK_NOT_CONSUMED; return SZ_OK /* MAYBE_FINISHED_WITHOUT_MARK */; } if(global->remainLen != 0) return SZ_ERROR_NOT_FINISHED; checkEndMarkNow = TRUE; } if(global->needInitLzma) LzmaDec_InitStateReal(); if(global->tempBufSize == 0) { if(inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { dummyRes = LzmaDec_TryDummy(src, inSize); if(dummyRes == DUMMY_ERROR) { memcpy(global->tempBuf, src, inSize); global->tempBufSize = inSize; srcLen += inSize; if(srcLen != srcLen0) return SZ_ERROR_NEEDS_MORE_INPUT_PARTIAL; return SZ_ERROR_NEEDS_MORE_INPUT; } if(checkEndMarkNow && dummyRes != DUMMY_MATCH) return SZ_ERROR_NOT_FINISHED; bufLimit = src; } else { bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; } global->buf = src; LzmaDec_DecodeReal2(global->dicfLimit, bufLimit); processed = (global->buf - src); srcLen = srcLen + processed; src = src + processed; inSize = inSize - processed; } else { rem = global->tempBufSize; lookAhead = 0; while((rem < LZMA_REQUIRED_INPUT_MAX) && (lookAhead < inSize)) { global->tempBuf[rem] = 0xFF & src[lookAhead]; rem = rem + 1; lookAhead = lookAhead + 1; } global->tempBufSize = rem; if(rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { dummyRes = LzmaDec_TryDummy(global->tempBuf, rem); if(dummyRes == DUMMY_ERROR) { srcLen = srcLen + lookAhead; if(srcLen != srcLen0) return SZ_ERROR_NEEDS_MORE_INPUT_PARTIAL; return SZ_ERROR_NEEDS_MORE_INPUT; } if(checkEndMarkNow && dummyRes != DUMMY_MATCH) return SZ_ERROR_NOT_FINISHED; } global->buf = global->tempBuf; LzmaDec_DecodeReal2(global->dicfLimit, global->buf); lookAhead = lookAhead - (rem - (global->buf - global->tempBuf)); srcLen = srcLen + lookAhead; src = src + lookAhead; inSize = inSize - lookAhead; global->tempBufSize = 0; } } if(global->code != 0) return SZ_ERROR_DATA; return SZ_ERROR_FINISHED_WITH_MARK; } /* Tries to preread r bytes to the read buffer. Returns the number of bytes * available in the read buffer. If smaller than r, that indicates EOF. * * Doesn't try to preread more than absolutely necessary, to avoid copies in * the future. * * Works only if r <= sizeof(readBuf). */ uint32_t Preread(uint32_t r) { uint32_t hold; uint32_t p = global->readEnd - global->readCur; require(r <= sizeof_readBuf, "r <= sizeof_readBuf"); if(p < r) /* Not enough pending available. */ { if(global->readBuf + sizeof_readBuf - global->readCur + 0 < r) { /* If no room for r bytes to the end, discard bytes from the beginning. */ global->readBuf = memmove(global->readBuf, global->readCur, p); global->readEnd = global->readBuf + p; global->readCur = global->readBuf; } while(p < r) { /* our single spot for reading input */ hold = fgetc(source); pos = pos + 1; /* EOF or error on input. */ if(EOF == hold) break; /* otherwise just add it */ global->readEnd[0] = (0xFF & hold) | ((~0xFF) & global->readEnd[0]); global->readEnd = global->readEnd + 1; p = p + 1; } } return p; } void IgnoreVarint() { while((0xFF & global->readCur[0]) >= 0x80) { global->readCur = global->readCur + 1; } global->readCur = global->readCur + 1; } uint32_t IgnoreZeroBytes(uint32_t c) { while(c > 0) { if((0xFF & global->readCur[0]) != 0) { global->readCur = global->readCur + 1; return SZ_ERROR_BAD_PADDING; } global->readCur = global->readCur + 1; c = c - 1; } return SZ_OK; } uint32_t GetLE4(uint8_t *p) { return (0xFF & p[0]) | (0xFF & p[1]) << 8 | (0xFF & p[2]) << 16 | (0xFF & p[3]) << 24; } /* Expects global->dicSize be set already. Can be called before or after InitProp. */ void InitDecode() { /* global->lc = global->pb = global->lp = 0; */ /* needinitprop will initialize it */ global->dicfLimit = 0; /* We'll increment it later. */ global->needInitDic = TRUE; global->needInitState = TRUE; global->needInitProp = TRUE; global->writtenPos = 0; global->writeRemaining = BITS32; global->discardedSize = 0; global->dicfPos = 0; LzmaDec_InitDicAndState(TRUE, TRUE); } uint32_t InitProp(uint8_t b) { uint32_t lc; uint32_t lp; if(b >= (9 * 5 * 5)) { return SZ_ERROR_BAD_LCLPPB_PROP; } lc = b % 9; b = b / 9; global->pb = b / 5; lp = b % 5; if(lc + lp > LZMA2_LCLP_MAX) { return SZ_ERROR_BAD_LCLPPB_PROP; } global->lc = lc; global->lp = lp; global->needInitProp = FALSE; return SZ_OK; } /* Reads .xz or .lzma data from source, writes uncompressed bytes to destination, * uses CLzmaDec.dic. It verifies some aspects of the file format (so it * can't be tricked to an infinite loop etc.), it doesn't verify checksums * (e.g. CRC32). */ uint32_t DecompressXzOrLzma() { uint8_t checksumSize; /* Block header flags */ uint32_t bhf; uint32_t result; /* uncompressed chunk size*/ uint32_t us; /* needed by lzma */ uint32_t srcLen; uint32_t res; /* needed by xz */ uint8_t blockSizePad; uint32_t bhs; uint32_t bhs2; uint8_t dicSizeProp; uint8_t* readAtBlock; uint8_t control; uint8_t numRecords; /* compressed chunk size */ uint32_t cs; int initDic; uint8_t mode; int initState; int isProp; /* 12 for the stream header + 12 for the first block header + 6 for the * first chunk header. empty.xz is 32 bytes. */ if(Preread(12 + 12 + 6) < 12 + 12 + 6) { return SZ_ERROR_INPUT_EOF; } /* readbuf[7] is actually stream flags, should also be 0. */ if(0 != memcmp(global->readCur, "\xFD""7zXZ\0", 7)) { /* sanity check for lzma */ require((0xFF & global->readCur[0]) <= 225, "lzma check 1 failed"); require((0xFF & global->readCur[13]) == 0, "lzma check 2 failed"); require((((bhf = GetLE4(global->readCur + 9)) == 0) || (bhf == BITS32)), "lzma check 3 failed"); require((global->dicSize = GetLE4(global->readCur + 1)) >= LZMA_DIC_MIN, "lzma check 4 failed"); /* Based on https://svn.python.org/projects/external/xz-5.0.3/doc/lzma-file-format.txt */ /* TODO(pts): Support 8-byte uncompressed size. */ if(bhf == 0) us = GetLE4(global->readCur + 5); else us = bhf; if(global->dicSize > MAX_DIC_SIZE) return SZ_ERROR_UNSUPPORTED_DICTIONARY_SIZE; InitDecode(); global->allocCapacity = 0; global->dicf = NULL; /* LZMA2 restricts lc + lp <= 4. LZMA requires lc + lp <= 12. * We apply the LZMA2 restriction here (to save memory in * CLzmaDec.probs), thus we are not able to extract some legitimate * .lzma files. */ result = (InitProp(0xFF & global->readCur[0])); if(result != SZ_OK) return result; global->readCur = global->readCur + 13; /* Start decompressing the 0 byte. */ global->dicfLimit = global->writeRemaining; global->writeRemaining = us; if(us <= global->dicSize) GrowCapacity(us); while((global->discardedSize + global->dicfPos) != us) { if((srcLen = Preread(sizeof_readBuf)) == 0) { if(us != BITS32) return SZ_ERROR_INPUT_EOF; break; } res = LzmaDec_DecodeToDic(global->readCur, srcLen); global->readCur = global->readCur + srcLen; if(res == SZ_ERROR_FINISHED_WITH_MARK) break; if(res != SZ_ERROR_NEEDS_MORE_INPUT && res != SZ_OK) return res; } Flush(); return SZ_OK; } global->allocCapacity = 0; global->dicf = NULL; while(TRUE) { /* Based on https://tukaani.org/xz/xz-file-format-1.0.4.txt */ switch(0xFF & global->readCur[7]) { /* None */ case 0: checksumSize = 1; break; /* CRC32 */ case 1: checksumSize = 4; break; /* CRC64, typical xz output. */ case 4: checksumSize = 8; break; default: return SZ_ERROR_BAD_CHECKSUM_TYPE; } /* Also ignore the CRC32 after checksumSize. */ global->readCur = global->readCur + 12; while(TRUE) { /* We need it modulo 4, so a uint8_t is enough. */ blockSizePad = 3; require(global->readEnd - global->readCur >= 12, "readEnd - readCur >= 12"); /* At least 12 bytes preread. */ bhs = 0xFF & global->readCur[0]; /* Last block, index follows. */ if(bhs == 0) { global->readCur = global->readCur + 1; /* This is actually a varint, but it's shorter to read it as a byte. */ numRecords = 0xFF & global->readCur[0]; global->readCur = global->readCur + 1; while(0 != numRecords) { /* a varint is at most 9 bytes long, but may be shorter */ Preread(9); IgnoreVarint(); Preread(9); IgnoreVarint(); numRecords = numRecords - 1; } /* Synchronize to 4-byte boundary */ if (0 != ((pos - (global->readEnd - global->readCur)) & 3)) { Preread(4 - ((pos - (global->readEnd - global->readCur)) & 3)); global->readCur = global->readCur + (4 - ((pos - (global->readEnd - global->readCur)) & 3)); } /* Consume crc32 of index + stream footer */ Preread(16); global->readCur = global->readCur + 16; break; } global->readCur = global->readCur + 1; /* Block header size includes the bhs field above and the CRC32 below. */ bhs = (bhs + 1) << 2; /* Typically the Preread(12 + 12 + 6) above covers it. */ if(Preread(bhs) < bhs) { return SZ_ERROR_INPUT_EOF; } readAtBlock = global->readCur; bhf = 0xFF & global->readCur[0]; global->readCur = global->readCur + 1; if((bhf & 2) != 0) return SZ_ERROR_UNSUPPORTED_FILTER_COUNT; if((bhf & 20) != 0) return SZ_ERROR_BAD_BLOCK_FLAGS; /* Compressed size present. */ /* Usually not present, just ignore it. */ if((bhf & 64) != 0) IgnoreVarint(); /* Uncompressed size present. */ /* Usually not present, just ignore it. */ if((bhf & 128) != 0) IgnoreVarint(); /* This is actually a varint, but it's shorter to read it as a byte. */ if((0xFF & global->readCur[0]) != FILTER_ID_LZMA2) return SZ_ERROR_UNSUPPORTED_FILTER_ID; global->readCur = global->readCur + 1; /* This is actually a varint, but it's shorter to read it as a byte. */ if((0xFF & global->readCur[0]) != 1) return SZ_ERROR_UNSUPPORTED_FILTER_PROPERTIES_SIZE; global->readCur = global->readCur + 1; dicSizeProp = 0xFF & global->readCur[0]; global->readCur = global->readCur + 1; /* Typical large dictionary sizes: * 35: 805306368 bytes == 768 MiB * 36: 1073741824 bytes == 1 GiB * 37: 1610612736 bytes, largest supported by .xz * 38: 2147483648 bytes == 2 GiB * 39: 3221225472 bytes == 3 GiB * 40: 4294967295 bytes, largest supported by .7z */ if(dicSizeProp > 40) return SZ_ERROR_BAD_DICTIONARY_SIZE; /* LZMA2 and .xz support it, we don't (for simpler memory management on * 32-bit systems). */ if(dicSizeProp > MAX_DIC_SIZE_PROP) return SZ_ERROR_UNSUPPORTED_DICTIONARY_SIZE; /* Works if dicSizeProp <= 39. */ global->dicSize = ((2 | ((dicSizeProp) & 1)) << ((dicSizeProp) / 2 + 11)); /* TODO(pts): Free dic after use, also after realloc error. */ require(global->dicSize >= LZMA_DIC_MIN, "global->dicSize >= LZMA_DIC_MIN"); GrowCapacity(global->dicSize + MAX_MATCH_SIZE + sizeof_writeBuf); bhs2 = global->readCur - readAtBlock + 5; if(bhs2 > bhs) return SZ_ERROR_BLOCK_HEADER_TOO_LONG; result = IgnoreZeroBytes(bhs - bhs2); if(result != 0) return result; /* Ignore CRC32. */ global->readCur = global->readCur + 4; /* Typically it's offset 24, xz creates it by default, minimal. */ /* Finally Parse LZMA2 stream. */ InitDecode(); while(TRUE) { require(global->dicfPos == global->dicfLimit, "global->dicfPos == global->dicfLimit"); /* Actually 2 bytes is enough to get to the index if everything is * aligned and there is no block checksum. */ if(Preread(6) < 6) return SZ_ERROR_INPUT_EOF; control = 0xFF & global->readCur[0]; if(control == 0) { global->readCur = global->readCur + 1; break; } else if(((control - 3) & 0xFF) < 0x7D) return SZ_ERROR_BAD_CHUNK_CONTROL_BYTE; us = ((0xFF & global->readCur[1]) << 8) + (0xFF & global->readCur[2]) + 1; /* Uncompressed chunk. */ if(control < 3) { /* assume it was already setup */ initDic = FALSE; cs = us; global->readCur = global->readCur + 3; blockSizePad = blockSizePad - 3; /* now test that assumption */ if(control == 1) { global->needInitProp = global->needInitState; global->needInitState = TRUE; global->needInitDic = FALSE; } else if(global->needInitDic) return SZ_ERROR_DATA; LzmaDec_InitDicAndState(initDic, FALSE); } else { /* LZMA chunk. */ mode = (((control) >> 5) & 3); if(mode == 3) initDic = TRUE; else initDic = FALSE; if(mode > 0) initState = TRUE; else initState = FALSE; if((control & 64) != 0) isProp = TRUE; else isProp = FALSE; us = us + ((control & 31) << 16); cs = ((0xFF & global->readCur[3]) << 8) + (0xFF & global->readCur[4]) + 1; if(isProp) { result = InitProp(0xFF & global->readCur[5]); if(result != 0) return result; global->readCur = global->readCur + 1; blockSizePad = blockSizePad - 1; } else if(global->needInitProp) return SZ_ERROR_MISSING_INITPROP; global->readCur = global->readCur + 5; blockSizePad = blockSizePad - 5; if((!initDic && global->needInitDic) || (!initState && global->needInitState)) { return SZ_ERROR_DATA; } LzmaDec_InitDicAndState(initDic, initState); global->needInitDic = FALSE; global->needInitState = FALSE; } require(us <= (1 << 24), "us <= (1 << 24)"); require(cs <= (1 << 16), "cs <= (1 << 16)"); require(global->dicfPos == global->dicfLimit, "global->dicfPos == global->dicfLimit"); FlushDiscardOldFromStartOfDic(); global->dicfLimit = global->dicfLimit + us; if(global->dicfLimit < us) return SZ_ERROR_MEM; /* Read 6 extra bytes to optimize away a read(...) system call in * the Prefetch(6) call in the next chunk header. */ if(Preread(cs + 6) < cs) return SZ_ERROR_INPUT_EOF; /* Uncompressed chunk, at most 64 KiB. */ if(control < 3) { require((global->dicfPos + us) == global->dicfLimit, "global->dicfPos + us == global->dicfLimit"); FlushDiscardGrowDic(us); memcpy(global->dicf + global->dicfPos, global->readCur, us); global->dicfPos = global->dicfPos + us; if((global->checkDicSize == 0) && ((global->dicSize - global->processedPos) <= us)) { global->checkDicSize = global->dicSize; } global->processedPos = global->processedPos + us; } else { /* Compressed chunk. */ /* This call doesn't change global->dicfLimit. */ result = LzmaDec_DecodeToDic(global->readCur, cs); if(result != 0) return result; } if(global->dicfPos != global->dicfLimit) return SZ_ERROR_BAD_DICPOS; global->readCur = global->readCur + cs; blockSizePad = blockSizePad - cs; /* We can't discard decompressbuf[:global->dicfLimit] now, * because we need it a dictionary in which subsequent calls to * Lzma2Dec_DecodeToDic will look up backreferences. */ } Flush(); /* End of LZMA2 stream. */ /* End of block. */ /* 7 for padding4 and CRC32 + 12 for the next block header + 6 for the next * chunk header. */ if(Preread(7 + 12 + 6) < 7 + 12 + 6) return SZ_ERROR_INPUT_EOF; /* Ignore block padding. */ result = (IgnoreZeroBytes(blockSizePad & 3)); if(result != 0) return result; global->readCur = global->readCur + checksumSize; /* Ignore CRC32, CRC64 etc. */ } /* Look for another concatenated stream */ /* 12 for the stream header + 12 for the first block header + 6 for the * first chunk header. empty.xz is 32 bytes. */ if(Preread(12 + 12 + 6) < 12 + 12 + 6) { break; } if(0 != memcmp(global->readCur, "\xFD""7zXZ\0", 7)) { break; } } /* The .xz input file continues with the index, which we ignore from here. */ return SZ_OK; } int main(int argc, char **argv) { uint32_t res; char* name; char* dest; FUZZING = FALSE; name = NULL; dest = NULL; pos = 0; /* process arguments */ int i = 1; while (i < argc) { if(NULL == argv[i]) { i = i + 1; } else if(match(argv[i], "-f") || match(argv[i], "--file")) { name = argv[i+1]; require(NULL != name, "the --file option requires a filename to be given\n"); i = i + 2; } else if(match(argv[i], "-o") || match(argv[i], "--output")) { dest = argv[i+1]; require(NULL != dest, "the --output option requires a filename to be given\n"); i = i + 2; } else if(match(argv[i], "--chaos") || match(argv[i], "--fuzz-mode") || match(argv[i], "--fuzzing")) { FUZZING = TRUE; fputs("fuzz-mode enabled, preparing for chaos\n", stderr); i = i + 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" [--file $input.xz or --file $input.lzma] (or it'll read from stdin)\n", stderr); fputs(" [--output $output] (or it'll write to stdout)\n", stderr); fputs("--help to get this message\n", stderr); fputs("--fuzz-mode if you wish to fuzz this application safely\n", stderr); exit(EXIT_SUCCESS); } else { fputs("Unknown option:", stderr); fputs(argv[i], stderr); fputs("\nAborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } } if(NULL != name) source = fopen(name, "r"); else source = stdin; if(NULL != dest) destination = fopen(dest, "w"); else destination = stdout; if(FUZZING) destination = fopen("/dev/null", "w"); global = calloc(1, sizeof(struct CLzmaDec)); global->readBuf = calloc(sizeof_readBuf, sizeof(uint8_t)); global->readCur = global->readBuf; global->readEnd = global->readBuf; global->allocCapacity = 0; global->dicSize = 0; res = DecompressXzOrLzma(); free(global->dicf); /* Pacify valgrind(1). */ free(global->readBuf); free(global); return res; }
/* Copyright (C) 2024 Jeremiah Orians * This file is part of M2-Planet. * * M2-Planet is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M2-Planet is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M2-Planet. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _STDINT_H #define _STDINT_H #ifdef __M2__ /* nothing needed for M2-Planet as the standard types are included by default*/ #else /* if we plan on supporting other compilers put stuff here */ #endif #endif
/* Copyright (C) 2019 Jeremiah Orians * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> #define BUFFER_SIZE 4096 /******************************************************************************** * the reason why we are using read and write instead of fread and fwrite is * * because it is much faster and involves less copying of values around * ********************************************************************************/ int main(int argc, char** argv) { if(2 > argc) { fputs("catm requires 2 or more arguments\n", stderr); exit(EXIT_FAILURE); } /* create a new file with read/write permissions only */ int output = open(argv[1], O_TRUNC | O_CREAT | O_WRONLY , 0600); if(-1 == output) { fputs("The file: ", stderr); fputs(argv[1], stderr); fputs(" is not a valid output file name\n", stderr); exit(EXIT_FAILURE); } int i; int bytes; char* buffer = calloc(BUFFER_SIZE + 1, sizeof(char)); int input; for(i = 2; i < argc ; i = i + 1) { input = open(argv[i], 0, 0); if(-1 == input) { fputs("The file: ", stderr); fputs(argv[i], stderr); fputs(" is not a valid input file name\n", stderr); exit(EXIT_FAILURE); } keep: bytes = read(input, buffer, BUFFER_SIZE); write(output, buffer, bytes); if(BUFFER_SIZE == bytes) goto keep; } free(buffer); return EXIT_SUCCESS; }
/* Copyright (C) 2020 fosslinux * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <fcntl.h> #include "M2libc/bootstrappable.h" #define MAX_STRING 4096 #define MAX_ARRAY 256 /* Globals */ int verbose; /* UTILITY FUNCTIONS */ /* Function to find a character's position in a string (last match) */ int find_last_char_pos(char* string, char a) { int i = strlen(string) - 1; if(i < 0) return i; while(i >= 0) { /* * This conditional should be in the while conditional but we are * running into the M2-Planet short-circuit bug. */ if(a == string[i]) break; i = i - 1; } return i; } /* Function to find the length of a char**; an array of strings */ int array_length(char** array) { int length = 0; while(array[length] != NULL) { length = length + 1; } return length; } /* PROCESSING FUNCTIONS */ char* directory_dest(char* dest, char* source, int require_directory) { /* * First, check if it is a directory to copy to. * We have two ways of knowing this: * - If the destination ends in a slash, the user has explicitly said * it is a directory. * - Normally we would use stat() but we don't want to force support for * that syscall onto the kernel, so we just attempt to chdir() into it * and if it works then it must be a directory. A bit hacky, bit it * works. */ int isdirectory = FALSE; if(dest[strlen(dest) - 1] == '/') { isdirectory = TRUE; } if(!isdirectory) { /* Use the other testing method */ /* * Get the current path so that we can chdir back to it if it does * chdir successfully. */ char* current_path = calloc(MAX_STRING, sizeof(char)); require(current_path != NULL, "Memory initialization of current_path in directory_dest failed\n"); getcwd(current_path, MAX_STRING); require(!match("", current_path), "getcwd() failed\n"); /* * chdir expects an absolute path. * If the first character is / then it is already absolute, otherwise * it is relative and needs to be changed (by appending current_path * to the dest path). */ char* chdir_dest = calloc(MAX_STRING, sizeof(char)); require(chdir_dest != NULL, "Memory initialization of chdir_dest in directory_dest failed\n"); if(dest[0] != '/') { /* The path is relative, append current_path */ strcat(chdir_dest, current_path); strcat(chdir_dest, "/"); strcat(chdir_dest, dest); } else { /* The path is absolute */ strcpy(chdir_dest, dest); } if(0 <= chdir(chdir_dest)) { /* chdir returned successfully */ /* * But because of M2-Planet, that doesn't mean anything actually * happened, check that before we go any further. */ char* new_path = calloc(MAX_STRING, sizeof(char)); require(new_path != NULL, "Memory initialization of new_path in directory_dest failed\n"); getcwd(new_path, MAX_STRING); if(!match(current_path, new_path)) { isdirectory = TRUE; chdir(current_path); } } free(chdir_dest); free(current_path); } /* * If it isn't a directory, and we require one, error out. * Otherwise, just return what we were given, we're done here. */ if(require_directory) require(isdirectory, "Provide a directory destination for multiple source files\n"); if(!isdirectory) return dest; /* If it is, we need to make dest a full path */ /* 1. Get the basename of source */ char* basename = calloc(MAX_STRING, sizeof(char)); require(basename != NULL, "Memory initialization of basename in directory_dest failed\n"); int last_slash_pos = find_last_char_pos(source, '/'); if(last_slash_pos >= 0) { /* Yes, there is a slash in it, copy over everything after that pos */ unsigned spos; /* source pos */ unsigned bpos = 0; /* basename pos */ /* Do the actual copy */ for(spos = last_slash_pos + 1; spos < strlen(source); spos = spos + 1) { basename[bpos] = source[spos]; bpos = bpos + 1; } } else { /* No, there is no slash in it, hence the basename is just the source */ strcpy(basename, source); } /* 2. Ensure our dest (which is a directory) has a trailing slash */ if(dest[strlen(dest) - 1] != '/') { strcat(dest, "/"); } /* 3. Add the basename to the end of the directory */ strcat(dest, basename); free(basename); /* Now we have a returnable path! */ return dest; } void copy_file(char* source, char* dest) { if(verbose) { /* Output message */ /* Of the form 'source' -> 'dest' */ fputs("'", stdout); fputs(source, stdout); fputs("' -> '", stdout); fputs(dest, stdout); fputs("'\n", stdout); } /* Open source and dest as FILE*s */ FILE* fsource = fopen(source, "r"); if(fsource == NULL) { fputs("Error opening source file ", stderr); fputs(source, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } FILE* fdest = fopen(dest, "w"); if(fdest < 0) { fputs("Error opening destination file", stderr); fputs(dest, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } /* * The following loop reads a character from the source and writes it to the * dest file. This is all M2-Planet supports. */ int c = fgetc(fsource); while(c != EOF) { fputc(c, fdest); c = fgetc(fsource); } /* Cleanup */ fclose(fsource); fclose(fdest); } int main(int argc, char** argv) { /* Initialize variables */ char** sources = calloc(MAX_ARRAY, sizeof(char*)); require(sources != NULL, "Memory initialization of sources failed\n"); int sources_index = 0; char* dest = NULL; /* Set defaults */ verbose = FALSE; int i = 1; int j; int args_found; /* Loop arguments */ while(i <= argc) { if(NULL == argv[i]) { /* Ignore and continue */ i = i + 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("Usage: ", stdout); fputs(argv[0], stdout); fputs(" [-h | --help] [-V | --version] [-v | --verbose] source1 source2 sourcen destination\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-V") || match(argv[i], "--version")) { /* Output version */ fputs("cp version 1.3.0\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-v") || match(argv[i], "--verbose")) { verbose = TRUE; i = i + 1; } else if(argv[i][0] != '-') { /* It is not an option */ /* * We can tell if this is the source file or the destination file * through looking *ahead*. If it is the last of this type of argument then * it must be the destination. (1 destination, many sources). */ j = i + 1; args_found = 0; while(j < array_length(argv)) { if(argv[j][0] != '-') { /* It's one of these type of arguments */ args_found = args_found + 1; } j = j + 1; } if(args_found == 0) { /* We are setting the destination (there are no more left after this) */ dest = calloc(MAX_STRING, sizeof(char)); require(dest != NULL, "Memory initialization of dest failed\n"); strcpy(dest, argv[i]); } else { /* We are setting a source */ require(sources_index < MAX_ARRAY, "Too many files\n"); sources[sources_index] = calloc(MAX_STRING, sizeof(char)); require(sources[sources_index] != NULL, "Memory initialization of sources[source_index] failed\n"); strcpy(sources[sources_index], argv[i]); sources_index = sources_index + 1; } i = i + 1; } else { /* Unknown argument */ fputs("UNKNOWN_ARGUMENT\n", stderr); exit(EXIT_FAILURE); } } /* Sanitize values */ /* Ensure the two values have values */ /* Another workaround for short-circuit bug */ int error = FALSE; if(sources[0] == NULL) error = TRUE; if(error == FALSE) if(match(sources[0], "")) error = TRUE; require(!error, "Provide a source file\n"); error = FALSE; if(dest == NULL) error = TRUE; if(error == FALSE) if(match(dest, "")) error = TRUE; require(!error, "Provide a destination file\n"); /* Loop through all of the sources, copying each one */ char* this_dest; for(i = 0; i < array_length(sources); i = i + 1) { /* Convert the dest variable to a full path if it's a directory copying to */ /* * Also, if there is more than one source, we have to be copying to * a directory destination... */ if(array_length(sources) == 1) { dest = directory_dest(dest, sources[i], FALSE); copy_file(sources[i], dest); } else { this_dest = calloc(MAX_STRING, sizeof(char)); require(this_dest != NULL, "Memory initalization of this_dest failed\n"); this_dest = directory_dest(dest, sources[i], TRUE); copy_file(sources[i], this_dest); } /* Perform the actual copy */ free(sources[i]); } free(sources); free(dest); return EXIT_SUCCESS; }
/* Copyright (C) 2020 fosslinux * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <fcntl.h> #include <sys/stat.h> #include "M2libc/bootstrappable.h" /* Define all of the constants */ #define MAX_STRING 4096 #define MAX_ARRAY 256 struct files { char* name; struct files* next; }; /* Globals */ int verbose; /* PROCESSING FUNCTIONS */ int main(int argc, char** argv) { /* Initialize variables */ char* mode = NULL; struct files* f = NULL; struct files* n; int ok; /* Set defaults */ verbose = FALSE; int i = 1; /* Loop arguments */ while(i <= argc) { if(NULL == argv[i]) { /* Ignore and continue */ i = i + 1; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("Usage: ", stdout); fputs(argv[0], stdout); fputs(" [-h | --help] [-V | --version] [-v | --verbose]\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-V") || match(argv[i], "--version")) { /* Output version */ fputs("chmod version 1.3.0\n", stdout); exit(EXIT_SUCCESS); } else if(match(argv[i], "-v") || match(argv[i], "--verbose")) { verbose = TRUE; i = i + 1; } else { /* It must be the file or the mode */ if(mode == NULL) { /* Mode always comes first */ mode = calloc(MAX_STRING, sizeof(char)); require(mode != NULL, "Memory initialization of mode failed\n"); /* We need to indicate it is octal */ strcat(mode, "0"); strcat(mode, argv[i]); } else { /* It's a file, as the mode is already done */ n = calloc(1, sizeof(struct files)); require(n != NULL, "Memory initialization of files failed\n"); n->next = f; f = n; f->name = argv[i]; } i = i + 1; } } /* Ensure the two values have values */ require(mode != NULL, "Provide a mode\n"); require(f != NULL, "Provide a file\n"); /* Convert the mode str into octal */ int omode = strtoint(mode); /* Loop over files to be operated on */ while(NULL != f) { /* Make sure the file can be opened */ ok = access(f->name, 0); if(ok != 0) { fputs("The file: ", stderr); fputs(f->name, stderr); fputs(" does not exist\n", stderr); exit(EXIT_FAILURE); } /* Verbose message */ if(verbose) { fputs("mode of '", stdout); fputs(f->name, stdout); fputs("' changed to ", stdout); fputs(mode, stdout); fputs("\n", stdout); } /* Perform the chmod */ chmod(f->name, omode); f = f->next; } }
/* Copyright (C) 2021 Jeremiah Orians * This file is part of mescc-tools-extra * * mescc-tools-extra is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools-extra is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools-extra. If not, see <http://www.gnu.org/licenses/>. */ /******************************************************************************** * "rm" can be used to delete files. It can also delete * * parent directories. * * * * Usage: rm <dir1>/<file1> <file2> * * * * These are all highly standard and portable headers. * ********************************************************************************/ #include <stdio.h> #include <string.h> /* This is for unlink() ; this may need to be changed for some platforms. */ #include <unistd.h> /* For unlink() */ #include <stdlib.h> #include "M2libc/bootstrappable.h" void delete_dir(char* name) { int r = unlink(name); if(0 != r) { fputs("unable to delete file: ", stderr); fputs(name, stderr); fputs(" !!!\n", stderr); } } int main(int argc, char **argv) { int i; for(i = 1; argc > i; i = i + 1) { delete_dir(argv[i]); } return 0; }
/* Copyright (C) 2019 Jeremiah Orians * This file is part of mescc-tools * * mescc-tools is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * mescc-tools is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with mescc-tools. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include "M2libc/bootstrappable.h" char* input_name; FILE* input; char* output_name; FILE* output; char* pattern; size_t pattern_length; char* replacement; char* buffer; size_t buffer_index; char* hold; void read_next_byte() { int c= hold[0]; size_t i = 0; while(i < pattern_length) { hold[i] = hold[i+1]; i = i + 1; } hold[pattern_length-1] = buffer[buffer_index]; buffer_index = buffer_index + 1; /* NEVER WRITE NULLS!!! */ if(0 != c) fputc(c, output); } void clear_hold() { /* FILL hold with NULLS */ size_t i = 0; while(i < pattern_length) { hold[i] = 0; i = i + 1; } } void check_match() { /* Do the actual replacing */ if(match(pattern, hold)) { fputs(replacement, output); clear_hold(); } } int main(int argc, char** argv) { output_name = "/dev/stdout"; pattern = NULL; replacement = NULL; buffer_index = 0; int i = 1; while (i < argc) { if(NULL == argv[i]) { i = i + 1; } else if(match(argv[i], "-f") || match(argv[i], "--file")) { input_name = argv[i+1]; require(NULL != input_name, "the --file option requires a filename to be given\n"); i = i + 2; } else if(match(argv[i], "-o") || match(argv[i], "--output")) { output_name = argv[i+1]; require(NULL != output_name, "the --output option requires a filename to be given\n"); i = i + 2; } else if(match(argv[i], "-m") || match(argv[i], "--match-on")) { pattern = argv[i+1]; require(NULL != pattern, "the --match-on option requires a string to be given\n"); i = i + 2; } else if(match(argv[i], "-r") || match(argv[i], "--replace-with")) { replacement = argv[i+1]; require(NULL != replacement, "the --replace-with option requires a string to be given\n"); i = i + 2; } else if(match(argv[i], "-h") || match(argv[i], "--help")) { fputs("Usage: ", stderr); fputs(argv[0], stderr); fputs(" --file $input", stderr); fputs(" --match-on $string", stderr); fputs(" --replace-with $string", stderr); fputs(" [--output $output] (or it'll dump to stdout)\n", stderr); fputs("--help to get this message\n", stderr); exit(EXIT_SUCCESS); } else { fputs("Unknown option:", stderr); fputs(argv[i], stderr); fputs("\nAborting to avoid problems\n", stderr); exit(EXIT_FAILURE); } } /* Sanity check that we got everything we need */ require(NULL != input_name, "You need to pass an input file with --file\n"); require(NULL != output_name, "You need to pass an output file with --output\n"); require(NULL != pattern, "You can't do a replacement without something to match on\n"); require(NULL != replacement, "You can't do a replacement without something to replace it with\n"); input = fopen(input_name, "r"); require(NULL != input, "unable to open requested input file!\n"); /* Get enough buffer to read it all */ fseek(input, 0, SEEK_END); size_t size = ftell(input); buffer = malloc((size + 8) * sizeof(char)); /* Save ourself work if the input file is too small */ pattern_length = strlen(pattern); require(pattern_length < size, "input file is to small for pattern\n"); /* Now read it all into buffer */ fseek(input, 0, SEEK_SET); size_t r = fread(buffer,sizeof(char), size, input); require(r == size, "incomplete read of input\n"); fclose(input); /* Now we can safely open the output (which could have been the same as the input */ output = fopen(output_name, "w"); require(NULL != input, "unable to open requested output file!\n"); /* build our match buffer */ hold = calloc(pattern_length + 4, sizeof(char)); require(NULL != hold, "temp memory allocation failed\n"); /* Replace it all */ while((size + pattern_length + 4) >= buffer_index) { read_next_byte(); check_match(); } fclose(output); }
/* SPDX-FileCopyrightText: 2023 Max Hearnden <max@hearnden.org.uk> */ /* SPDX-License-Identifier: GPL-3.0-or-later */ #define CLONE_NEWUSER 0x10000000 #define CLONE_NEWNS 0x00020000 #define MS_BIND 4096 #define MS_REC 16384 #define MNT_DETACH 0x00000002 #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> #include <unistd.h> #include "M2libc/bootstrappable.h" void touch(char *path) { int fd = open(path, O_CREAT, 0777); if (fd == -1) { fputs("Failed to create file ", stderr); fputs(path, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } if (close(fd) != 0) { fputs("Failed to close file ", stderr); fputs(path, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } } void mkmount(char *source, char *target, char *filesystemtype, unsigned mountflags, void *data, int type) { int r = 0; if (type) { r = mkdir(target, 0755); } else { touch(target); } if (r != 0 && r != -17) { fputs("Failed to create mountpoint ", stderr); fputs(target, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } r = mount(source, target, filesystemtype, mountflags, data); if (r != 0) { fputs("Failed to mount directory ", stderr); fputs(target, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } } void set_map(int parent_id, char *path) { int fd = open(path, O_WRONLY, 0); if (fd == -1) { fputs("Failed to open map file ", stderr); fputs(path, stderr); fputc('\n', stderr); exit(EXIT_FAILURE); } char *map_contents = calloc(38, sizeof(char)); #ifdef __M2__ strcpy(map_contents, "0 "); char *parent_id_str = int2str(parent_id, 10, 0); strcat(map_contents, parent_id_str); strcat(map_contents, " 1"); #else snprintf(map_contents, 38, "0 %i 1", parent_id); #endif write(fd, map_contents, strlen(map_contents)); write(STDOUT_FILENO, map_contents, strlen(map_contents)); free(map_contents); close(fd); } void deny_setgroups() { int fd = open("/proc/self/setgroups", O_WRONLY, 0777); if(fd == -1) { fputs("Failed to open /proc/self/setgroups\n", stderr); exit(EXIT_FAILURE); } write(fd, "deny", 4); close(fd); } char **copy_environment(char **newenv, char *variable) { char *var_contents = getenv(variable); size_t var_len = strlen(variable); if (var_contents != NULL) { *newenv = malloc(var_len + 2 + strlen(var_contents)); if (newenv[0] == NULL) { fputs("Failed to allocate space for new environment\n", stderr); exit(EXIT_FAILURE); } memcpy(*newenv, variable, var_len); (*newenv)[var_len] = '='; strcpy(*newenv + var_len + 1, var_contents); #ifdef __M2__ return newenv + sizeof(char *); #else return newenv + 1; #endif } return newenv; } int main(int argc, char **argv) { if(argc <= 1) { fputs("Expected at least one argument: command\n", stderr); exit(EXIT_FAILURE); } char *cwd = get_current_dir_name(); /* Do nothing if cwd is already root */ if (strcmp(cwd, "/")) { int uid = geteuid(); int gid = getegid(); /* Don't create a user and mount namespace if we are already root */ if (uid != 0) { /* CLONE_NEWUSER allows for CLONE_NEWNS in an unprivileged process */ if (unshare(CLONE_NEWUSER | CLONE_NEWNS) != 0) { fputs("Failed to create user and mount namespaces\n", stderr); exit(EXIT_FAILURE); } /* Prevent the use of setgroups and make gid_map writeable */ deny_setgroups(); /* Map the root user in the user namespace to our user id */ set_map(uid, "/proc/self/uid_map"); /* Map the root group in the user namespace to our group id */ set_map(gid, "/proc/self/gid_map"); } int r = mkdir("dev", 0755); if (r != 0 && r != -17) { fputs("Failed to create dev folder\n", stderr); exit(EXIT_FAILURE); } #if !__uefi__ mkmount ("/dev/null", "dev/null", "", MS_BIND, NULL, 0); mkmount ("/dev/zero", "dev/zero", "", MS_BIND, NULL, 0); mkmount ("/dev/random", "dev/random", "", MS_BIND, NULL, 0); mkmount ("/dev/urandom", "dev/urandom", "", MS_BIND, NULL, 0); mkmount ("/dev/ptmx", "dev/ptmx", "", MS_BIND, NULL, 0); mkmount ("/dev/tty", "dev/tty", "", MS_BIND, NULL, 0); mkmount ("tmpfs", "dev/shm", "tmpfs", 0, NULL, 1); mkmount ("/proc", "proc", "", MS_BIND | MS_REC, NULL, 1); mkmount ("/sys", "sys", "", MS_BIND | MS_REC, NULL, 1); mkmount ("tmpfs", "tmp", "tmpfs", 0, NULL, 1); #endif if (chroot (".") != 0) { fputs("Failed to chroot into .\n", stderr); exit(EXIT_FAILURE); } } free(cwd); /* Copy environment variables into the new envornment */ char **newenv = malloc(13 * sizeof(char *)); char **newenv_end = newenv; if (newenv == NULL) { fputs("Failed to allocate space for new environment\n", stderr); exit(EXIT_FAILURE); } newenv_end = copy_environment(newenv_end, "ARCH"); newenv_end = copy_environment(newenv_end, "ARCH_DIR"); newenv_end = copy_environment(newenv_end, "M2LIBC"); newenv_end = copy_environment(newenv_end, "TOOLS"); newenv_end = copy_environment(newenv_end, "BLOOD_FLAG"); newenv_end = copy_environment(newenv_end, "BASE_ADDRESS"); newenv_end = copy_environment(newenv_end, "ENDIAN_FLAG"); newenv_end = copy_environment(newenv_end, "BINDIR"); newenv_end = copy_environment(newenv_end, "BUILDDIR"); newenv_end = copy_environment(newenv_end, "TMPDIR"); newenv_end = copy_environment(newenv_end, "OPERATING_SYSTEM"); newenv_end[0] = "WRAPPED=yes"; newenv_end[1] = NULL; #ifdef __M2__ #if __uefi__ return spawn (argv[1], argv + sizeof(char *), newenv); #else return execve (argv[1], argv + sizeof(char *), newenv); #endif #else return execve (argv[1], argv + 1, newenv); #endif }
538c5b816299c29b7cfa6dbbd1e98919bcb055bac3263db8d219c124ad7b7b3a AMD64/bin/blood-elf 1c7677d53730e5a1ffc713aada443ba0ae07ef01dd6156a06552427b36cc8b74 AMD64/bin/catm 9d7266abf60bcd1e4eba4c4b3b9517c64699577f9203df947486b513165b3fdb AMD64/bin/chmod 05fb821cdb42abe08d7d9fc4c5bca48dab2e33981c5c2e98aa72ea608b700404 AMD64/bin/cp b22c3fe61c127674f61e682c8908af420a06c2ad17793662cd724aafcbab038c AMD64/bin/get_machine 508f75aed624fc0fba81322ba76498e011b5284119b7f898989a72de32d06061 AMD64/bin/hex2 4dd7608f26392946e8d471e0e3167a996a0699ea348716232da0b75553bf2a5b AMD64/bin/kaem bdc40bd326764e09048d11e387e0dd7f8f0c3f0e4477cddb20b810bfc15cadb6 AMD64/bin/M1 51c5ba7cc82d8833a9347ec4811b0132cbc5af8a30fc014736d54d3953bcadbd AMD64/bin/M2-Mesoplanet e627d45bb607a509427b44a0cb3acd33e6694de601b7ea1a8aed2e5818b759cd AMD64/bin/M2-Planet d7b4d54f3272d7aaba5c39f9bdb8f83011870bba4637966dbabe03dae1eb980d AMD64/bin/match 3d79ccf08950fbf6285c7aa5d01722380ea39c9876861df59fbc8de60d5a236d AMD64/bin/mkdir 18dbbb45ca5eb338c9c3aa543c0b906051d8a78eeb5bc4d80173c0ca4cc32225 AMD64/bin/replace 4a952eaa1f84a9c9f9da543460b8a8e36409ccda66044c0c34d65f743e73d906 AMD64/bin/rm e77e3fc58996ad802daf368c2aaa22786f0dc3ee5968d2b1035300d482828f12 AMD64/bin/sha256sum 2b1042ddec58092947074678a6a63094268d6bd2b164d976e4662b9282a5bcd9 AMD64/bin/ungz 4d59e87e7f3040ae5d17597af88e84cfc91938b28c2852e761b44b027c0df43b AMD64/bin/unbz2 6e26e56ce8d71b2e409106efabd325d4cb0fe37bdafa1177d6be14cb1106a771 AMD64/bin/unxz 6343143e0956d2d56917fca23fab823dd18bf822d6fc494e02ef502111cd53ed AMD64/bin/untar
#!/bin/sh # SPDX-FileCopyrightText: 2021 Andrius Å tikonas <andrius@stikonas.eu> # SPDX-FileCopyrightText: 2021 Paul Dersey <pdersey@gmail.com> # SPDX-FileCopyrightText: 2020-2022 fosslinux <fosslinux@aussies.space> # SPDX-FileCopyrightText: 2022 Dor Askayo <dor.askayo@gmail.com> # # SPDX-License-Identifier: GPL-3.0-or-later set -ex PATH=/${ARCH_DIR}/bin catm seed-full.kaem /steps/bootstrap.cfg /steps/env seed.kaem kaem --file seed-full.kaem
Below the version of the scan_trace.cpp program is given that is used to produce this page.
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> // --------------------------------- char *copystr(const char *str) { char *new_str = (char*)malloc(strlen(str) + 1); strcpy(new_str, str); return new_str; } FILE *fout = stdout; #define MAX_FILENAME_LEN 500 // --------------------------------- class SubModule { public: char *path; char *url; SubModule *next; SubModule(const char *p, const char *u, SubModule *n) : next(n) { path = copystr(p); url = copystr(u); } }; SubModule* lbs_subModules = 0; SubModule* dist_subModules = 0; void addSubModule(const char *path, const char *url) { lbs_subModules = new SubModule(path, url, lbs_subModules); } void addDistSubModule(const char *path, const char *url) { dist_subModules = new SubModule(path, url, dist_subModules); } const char *source_dir = "../live-bootstrap/"; size_t len_source_dir; void read_commit_hash(const char *path, char *commit) { FILE *f = fopen(path, "r"); if (f == 0) commit[0] = '\0'; else { if (fgets(commit, 41, f)) commit[40] = '\0'; else commit[0] = '\0'; fclose(f); } fprintf(fout, "Head file: '%s': %s\n", path, commit); } void read_sub_modules(const char *fn, char *modules_dir) { char complete_path[MAX_FILENAME_LEN+1]; strcpy(complete_path, fn); char *s_path = complete_path + strlen(complete_path); char *m_path = modules_dir + strlen(modules_dir); strcpy(s_path, ".gitmodules"); FILE *f = fopen(complete_path, "r"); if (f == 0) return; //fprintf(fout, "Parsing %s %s\n", complete_path, modules_dir); char path[MAX_FILENAME_LEN+1]; char url[MAX_FILENAME_LEN+1]; while (fgets(path, MAX_FILENAME_LEN, f)) { if (!strncmp(path, "\tpath = ", 8) == 0) continue; if (fgets(url, MAX_FILENAME_LEN, f) && strncmp(url, "\turl = ", 7) == 0) { //fprintf(fout, "1: %s2: %s", path, url); while (path[strlen(path)-1] < ' ') path[strlen(path)-1] = '\0'; while (url[strlen(url)-1] < ' ') url[strlen(url)-1] = '\0'; char *s = strstr(url, ".git"); if (s != 0) *s = '\0'; sprintf(s_path, "%s/", path + 8); char *source = complete_path + len_source_dir; sprintf(m_path, "%s/HEAD", path + 8); char commit[41]; read_commit_hash(modules_dir, commit); //fprintf(fout, "Head file: '%s': %s\n", modules_dir, commit); sprintf(m_path, "%s/modules/", path + 8); int len = strlen(url); char complete_url[MAX_FILENAME_LEN+1]; if (strncmp(url + 7, "https://github.com/", 19) == 0) { if (commit[0] == '\0') sprintf(complete_url, "%s/blob/main/%%s", url + 7); else sprintf(complete_url, "%s/blob/%s/%%s", url + 7, commit); } else if (strncmp(url + 7, "https://git.savannah.nongnu.org/git/", 36) == 0) { if (commit[0] == '\0') sprintf(complete_url, "https://git.savannah.nongnu.org/gitweb/?p=%s.git;a=blob;f=%%s", url + 43); else sprintf(complete_url, "https://git.savannah.nongnu.org/gitweb/?p=%s.git;a=blob;f=%%s;id=%s", url + 43, commit); } else sprintf(complete_url, "%s%%s", url + 7); //printf("Submodule %s %s|\n", source, complete_url); addSubModule(source, complete_url); read_sub_modules(complete_path, modules_dir); } } } char live_bootstrap_commit[41]; void init_subModules() { // Root directory: char path[MAX_FILENAME_LEN+1]; snprintf(path, MAX_FILENAME_LEN, "%s.git/refs/heads/master", source_dir); read_commit_hash(path, live_bootstrap_commit); snprintf(path, MAX_FILENAME_LEN, "https://github.com/fosslinux/live-bootstrap/blob/%s/%%s", live_bootstrap_commit[0] == '\0' ? "master" : live_bootstrap_commit); addSubModule("", path); // Sub modules char modules_dir[MAX_FILENAME_LEN+1]; snprintf(modules_dir, MAX_FILENAME_LEN, "%s.git/modules/", source_dir); read_sub_modules(source_dir, modules_dir); // Exclude some directories addSubModule("distfiles", "--"); // Extra for files that have been unpacked from distribution addDistSubModule("/steps/tcc-0.9.26/build/mes-0.26/", "https://git.savannah.gnu.org/cgit/mes.git/tree/%s?h=v0.26"); addDistSubModule("/steps/tcc-0.9.26/build/tcc-0.9.26-1147-gee75a10c/", "https://github.com/TinyCC/tinycc/tree/d5e22108a0dc48899e44a158f91d5b3215eb7fe6/%s"); addDistSubModule("/steps/mes-0.26/build/mes-0.26/", "https://git.savannah.gnu.org/cgit/mes.git/tree/%s?h=v0.26"); addDistSubModule("/steps/mes-0.26/build/nyacc-1.00.2/", "https://git.savannah.gnu.org/cgit/nyacc.git/tree/%s?h=V1.00.2"); } char *get_url(const char *path, SubModule *subModules) { for (SubModule *subModule = subModules; subModule != 0; subModule = subModule->next) { size_t path_len = strlen(subModule->path); if (strncmp(path, subModule->path, path_len) == 0) { char url[MAX_FILENAME_LEN+1]; snprintf(url, MAX_FILENAME_LEN, subModule->url, path + path_len); return copystr(url); } } return 0; } // --------------------------------- class File; class Action; class Process; class LineInFile { public: const char *text; File *file; long line; LineInFile *next; LineInFile(const char *t, File *f, long l) : text(copystr(t)), file(f), line(l), next(0) {} LineInFile(LineInFile *lif, int offset = 0) : text(lif->text + offset), file(lif->file), line(lif->line), next(0) {} }; class MergeChild; int nr_files = 0; class File { public: char *name; int nr; bool is_source; char *source_name; char *url; File *copy_from; Action *actions; File *next; File(const char *fn) : is_source(false), source_name(0), url(0), copy_from(0), actions(0), next(0) { name = copystr(fn); nr = nr_files++; } void init_source(); bool exec_before_created(); bool used_as_input(); bool produced_and_not_removed(); }; File *files = 0; int nr_processes = 0; class Process { public: int nr; unsigned long pid; Process *parent; Action *actions; Process *next; Process(unsigned long _pid) : pid(_pid), parent(0), actions(0), next(0) { nr = ++nr_processes; } bool hasInputUseOf(File *f); Action *lastOpenAction(int handle); }; Process *all_processes = 0; Process **ref_next = &all_processes; Process *next_process(unsigned long pid) { Process *process = new Process(pid); *ref_next = process; ref_next = &process->next; return process; } Process *find_process(unsigned long pid) { Process *cached_process = 0; if (cached_process != 0 && cached_process->pid == pid) return cached_process; for (Process *process = all_processes; process != 0; process = process->next) if (process->pid == pid) { cached_process = process; return process; } return 0; } class MergeChild { public: File *child; MergeChild *next; MergeChild(File *file) : child(file), next(0) {} }; File *get_file(const char *full_fn /*, bool use_alias = true*/) { File **ref = &files; for (; *ref != 0; ref = &(*ref)->next) if (strcmp((*ref)->name, full_fn) == 0) return /*(*ref)->alias != 0 && use_alias ? (*ref)->alias :*/ (*ref); *ref = new File(full_fn); return *ref; } unsigned long read_unsigned_long(char *&s) { unsigned long result = 0; if (*s == '0') { for (s++; '0' <= *s && *s <= '7'; s++) result = 8 * result + *s - '0'; return result; } for (; '0' <= *s && *s <= '9'; s++) result = 10 * result + *s - '0'; return result; } long read_long(char *&s) { long sign = 1; long result = 0; if (*s == '-') { sign = -1; s++; } for (; '0' <= *s && *s <= '9'; s++) result = 10 * result + *s - '0'; return sign * result; } int indent_depth = 0; void indent(FILE *fout) { fprintf(fout, "%*.*s", indent_depth, indent_depth, ""); } FILE *fout_usage = 0; class Action { public: char kind; // one of 'e' - execute, 'o' - open, 'r' - removed, 'c' - change mode, 'E' - execute child bool o_rdonly; bool o_wronly; bool o_rdwr; bool o_creat; bool o_trunc; bool o_excl; int file_handle; bool is_closed; int mode; bool from_archive; Process *child_process; File *file; Process *process; Action *next_in_process; Action *next_on_file; Action (File *_file, Process *_process, char _kind) : kind(_kind), o_rdonly(false), o_wronly(false), o_rdwr(false), o_creat(false), o_trunc(false), o_excl(false), file_handle(-1), is_closed(false), mode(0), from_archive(false), child_process(0), file(_file), process(_process), next_in_process(0), next_on_file(0) { Action **ref_action_in_process = &process->actions; while (*ref_action_in_process != 0) ref_action_in_process = &(*ref_action_in_process)->next_in_process; *ref_action_in_process = this; if (file != 0) { Action **ref_action_on_file = &file->actions; while (*ref_action_on_file != 0) ref_action_on_file = &(*ref_action_on_file)->next_on_file; *ref_action_on_file = this; } } const char *oper_name() { return kind == 'e' ? "Executes" : kind == 'r' ? "Delete" : is_produced() ? "Produces" : kind != 'o' ? 0 : o_rdonly ? "Uses as input" : o_wronly ? "Writes" : o_rdwr ? "Modifies" : "Uses"; } bool is_produced() { return kind == 'o' && (o_creat || ((o_wronly || o_rdwr) && o_trunc)); } }; bool Process::hasInputUseOf(File *f) { for (Action *action = actions; action != 0; action = action->next_in_process) if (action->file == f && action->o_rdonly) return true; return false; } Action *Process::lastOpenAction(int handle) { Action *last_open_action = 0; for (Action *action = actions; action != 0; action = action->next_in_process) if (action->kind == 'o' && action->file_handle == handle) last_open_action = action; return last_open_action; } void File::init_source() { if (actions == 0 || actions->next_on_file != 0) return; if (actions->kind == 'o' && actions->is_produced()) { if (actions->process->actions->kind == 'e' && strcmp(actions->process->actions->file->name, "/usr/bin/untar") == 0) { actions->from_archive = true; url = get_url(name, dist_subModules); } } else if (actions->kind == 'e' || actions->kind == 'o') { is_source = true; char *n = name; if (n[0] == '/') n++; if (strncmp(n, "external/distfiles/", 19) == 0) n += 9; static const char *paths[] = { "replacement/", "*seed/", "*seed/stage0-posix/", "*"}; for (size_t i = 0; i < sizeof(paths)/sizeof(paths[0]); i++) { char poss_source_name[MAX_FILENAME_LEN]; if (paths[i][0] == '*') { strcpy(poss_source_name, source_dir); strcat(poss_source_name, paths[i] + 1); } else strcpy(poss_source_name, paths[i]); strcat(poss_source_name, n); if (access(poss_source_name, R_OK) == 0) { source_name = copystr(poss_source_name); break; } } if (source_name != 0) url = get_url(source_name + len_source_dir, lbs_subModules); } } bool File::exec_before_created() { bool is_created = false; for (Action *action = actions; action != 0; action = action->next_on_file) if (action->kind == 'o' && action->o_creat && !action->from_archive) is_created = true; else if (action->kind == 'r') is_created = false; else if (action->kind == 'e' && !is_created) return true; return false; } bool File::used_as_input() { return actions != 0 && (actions->o_rdonly || actions->o_rdwr); } bool File::produced_and_not_removed() { if (used_as_input()) return false; bool is_created = false; for (Action *action = actions; action != 0; action = action->next_on_file) if (action->kind == 'o' && (action->o_creat || action->o_wronly || (action->o_rdwr && action->o_trunc)) && !action->from_archive) is_created = true; else if (action->kind == 'r') is_created = false; else if (action->kind == 'e' && !is_created) return false; return is_created; } // ---------------------------------- bool accept_string(const char *str, char *&s) { char *t = s; while (*str != '\0' && *t != '\0') { if (*str != *t) return false; t++; if (*str == ' ') { while (*t == ' ') t++; } str++; } s = t; return true; } bool parse_filename(char *filename, char *&s) { if (*s != '"') return false; s++; for (int i = 0; i < MAX_FILENAME_LEN; i++) { if (*s == '"') { filename[i] = '\0'; s++; return true; } while (s[0] == '/' && s[1] == '/') s++; filename[i] = *s++; } fprintf(fout, "file name too long\n"); exit(-1); return false; } char cd_path[MAX_FILENAME_LEN] = "/"; void add_cd_path(char *filename) { char buf[2*MAX_FILENAME_LEN+1]; //fprintf(log_file, "add_cd_path %s %s => ", cd_path, filename); if (filename[0] == '/') { char *s = filename; while (s[1] == '/') s++; strcpy(buf, s); //fprintf(fout, "add_cd_path %s %s => %s\n", cd_path, filename, buf); strcpy(filename, buf); return; } strcpy(buf, cd_path); int i = strlen(buf); while (i > 0 && buf[i-1] == '/') i--; char *f = filename; while (f[0] != '\0') { if (f[0] == '.' && (f[1] == '\0' || f[1] == '/')) { f++; while (f[0] == '/') f++; } else if (f[0] == '.' && f[1] == '.' && (f[2] == '\0' || f[2] == '/')) { f += 2; while (f[0] == '/') f++; while (i > 0 && buf[i-1] != '/') i--; while (i > 0 && buf[i-1] == '/') i--; } else { buf[i++] = '/'; while (f[0] != '\0' && f[0] != '/') buf[i++] = *f++; while (f[0] == '/') f++; } } buf[i] = '\0'; if (i > MAX_FILENAME_LEN) { fprintf(fout, "add_cd_path reached lengt %d\n", i); exit(1); } //fprintf(fout, "add_cd_path %s %s => %s\n", cd_path, filename, buf); strcpy(filename, buf); //fprintf(log_file, "%s\n", filename); } void read_filename(char *filename, char *&s) { if (!parse_filename(filename, s)) { fprintf(fout, "Failed to parse filename from '%s'\n", s); exit(0); } add_cd_path(filename); } #define NR_PARR_COMMANDS 4 bool process_trace_file(const char *trace_fn) { FILE *f = fopen(trace_fn, "r"); FILE *fout_usage = 0; char buffer[10000]; struct Command { bool active; unsigned long pid; char cmd[10000]; }; Command cmd[NR_PARR_COMMANDS]; for (int i = 0; i < NR_PARR_COMMANDS; i++) cmd[i].active = false; long line_nr = 0; while (fgets(buffer, 9999, f)) { // Skip the first 8 lines line_nr++; if (line_nr <= 8) continue; int len = strlen(buffer); if (len > 0 && buffer[len-1] != '\n') { printf("Line '%s' does not end with newline\n", buffer); return false; } //fprintf(fout, "Line: %s", buffer); char filename[MAX_FILENAME_LEN+1]; char *s = buffer; unsigned long pid = read_unsigned_long(s); Process *process = line_nr == 9 ? next_process(pid) : find_process(pid); while (*s == ' ' || *s == '\t') s++; //fprintf(fout, "%lu: %s", pid, s); if (strncmp(s, "<... ", 5) == 0) { printf("DEBUG: resumd\n"); char *ns = strstr(s, "resumed>"); if (ns == 0) { printf("Line '%s' expect 'resumed>'\n", buffer); return false; } s = ns + 8; bool found = false; for (int i = 0; i < NR_PARR_COMMANDS; i++) if (cmd[i].active && cmd[i].pid == pid) { printf("DEBUG: more resumed: '%s'\n", s); strcat(cmd[i].cmd, s); printf("DEBUG: makes: '%s'\n", cmd[i].cmd); char *s_unf = strstr(cmd[i].cmd, " <unfinished ...>"); if (s_unf != 0) { *s_unf = '\0'; s = 0; } else { cmd[i].active = false; s = cmd[i].cmd; } found = true; break; } if (!found) { printf("Line: '%s' is not correct continuation\n", buffer); return false; } } else if (strstr(s, " <unfinished ...>") != 0) { bool found = false; for (int i = 0; i < NR_PARR_COMMANDS; i++) if (!cmd[i].active) { cmd[i].active = true; cmd[i].pid = pid; strcpy(cmd[i].cmd, s); char *s_unf = strstr(cmd[i].cmd, " <unfinished ...>"); if (s_unf != 0) { *s_unf = '\0'; s = 0; } printf("DEBUG: Unfinshed %lu: '%s'\n", pid, cmd[i].cmd); found = true; break; } if (!found) { printf("Line: '%s' too many parralel commands. Increase NR_PARR_COMMANDS\n", buffer); return false; } } if (s == 0) { // nothing to process } else if (accept_string("execve(", s)) { read_filename(filename, s); File *exec_file = get_file(filename); Action *action = new Action(exec_file, process, 'e'); if (strcmp(filename, "/usr/bin/tcc-boot0") == 0) { fprintf(fout, "Stop at %lu: %s\n", pid, s); break; } exec_file->init_source(); } else if (accept_string("open(", s)) { read_filename(filename, s); //fprintf(fout, "open %s", s); bool o_rdonly = false; bool o_wronly = false; bool o_rdwr = false; bool o_creat = false; bool o_trunc = false; bool o_excl = false; if (!accept_string(", ", s)) fprintf(fout, "Expecting ', at '%s'\n", s); for (;*s != '\0'; s++) { if (accept_string("O_RDONLY", s)) o_rdonly = true; else if (accept_string("O_WRONLY", s)) o_wronly = true; else if (accept_string("O_RDWR", s)) o_rdwr = true; else if (accept_string("O_CREAT", s)) o_creat = true; else if (accept_string("O_TRUNC", s)) o_trunc = true; else if (accept_string("O_EXCL", s)) o_excl = true; else { fprintf(fout, "Unknown %s", s); break; } if (*s == ',') break; if (*s != '|') break; } unsigned long mode = 0; if (accept_string(", ", s)) { mode = read_unsigned_long(s); } if (!accept_string(") = ", s)) { fprintf(fout, "Expecting ') = ' at '%s'\n", s); return false; } long handle = read_long(s); //if (*s != '\n') // fprintf(fout, "open end with '%s'\n", s); if ((o_rdonly ? 1 : 0) + (o_wronly ? 1 : 0) + (o_rdwr ? 1 : 0) != 1) fprintf(fout, "Warning: Open '%s' as undefined read/write mode\n", filename); if (handle > -1) { File *file = get_file(filename); Action *action = new Action(file, process, 'o'); action->file_handle = handle; action->o_rdonly = o_rdonly; action->o_wronly = o_wronly; action->o_rdwr = o_rdwr; action->o_creat = o_creat; action->o_trunc = o_trunc; action->o_excl = o_excl; if (o_creat) action->mode = mode; file->init_source(); } } else if (accept_string("close(", s)) { unsigned long handle = read_unsigned_long(s); if (*s != ')') fprintf(fout, "Expecting ')' at '%s'\n", s); Action *last_open_action = process->lastOpenAction(handle); if (last_open_action == 0) fprintf(fout, "Error: Handle %ld not opened by process %d\n", handle, process->nr); else if (last_open_action->is_closed) fprintf(fout, "Warning: File %s already closed for process %d\n", last_open_action->file->name, process->nr); else last_open_action->is_closed = true; } else if (accept_string("chmod(", s)) { read_filename(filename, s); unsigned long mode = 0; if (accept_string(", ", s)) { mode = read_unsigned_long(s); } if (*s != ')') { fprintf(fout, "Expecting ')' at '%s'\n", s); return false; } File *file = get_file(filename); Action *action = new Action(file, process, 'c'); action->mode = mode; } else if (accept_string("chdir(", s)) { read_filename(filename, s); if (!accept_string(") = ", s)) { fprintf(fout, "Expecting ') = ' at '%s'\n", s); return false; } long result = read_long(s); //if (!accept_string("-1 ENOENT (No such file or directory)", s)) // result = read_unsigned_long(s); if (result == 0) strcpy(cd_path, filename); } else if (accept_string("unlink(", s)) { read_filename(filename, s); if (*s != ')') fprintf(fout, "Expecting ')' at '%s'\n", s); File *file = get_file(filename); new Action(file, process, 'r'); } else if (accept_string("fork(", s)) { if (!accept_string(") = ", s)) { fprintf(fout, "Expecting ') = ' at '%s'\n", s); return false; } long new_pid = read_unsigned_long(s); if (*s != '\n') fprintf(fout, "fork end with '%s'\n", s); Process *new_process = next_process(new_pid); //fprintf(fout, "fork created %lu %lu\n", new_pid, new_process->pid); new_process->parent = process; Action *action = new Action(0, process, 'E'); action->child_process = new_process; } else if (accept_string("+++ exited with ", s)) { } else if (accept_string("--- SIGCHLD ", s)) { } else { fprintf(fout, "Unknown: '%s'\n", buffer); break; } } fclose(f); return true; } // ---------------------------------------------------- class Source { public: const char *url; Source *next; Source(const char *u, Source *n) : url(u), next(n) {} }; void collect_sources(Process *process, Source **ref_sources) { //indent(fout); fprintf(fout, "Process %d\n", process->nr); //indent_depth += 4; for (Action *action = process->actions; action != 0; action = action->next_in_process) { if (action->kind == 'o' && (action->o_rdonly || (action->o_rdwr && !action->o_trunc))) { File *file = action->file; if (file->url != 0) { const char *url = file->url; //indent(fout); fprintf(fout, "Found %s\n", url); Source **ref_source = ref_sources; while (*ref_source != 0 && strcmp((*ref_source)->url, url) < 0) ref_source = &(*ref_source)->next; if (*ref_source == 0 || strcmp((*ref_source)->url, url) > 0) *ref_source = new Source(url, *ref_source); } else { Process *produced_by = 0; for (Action *file_action = file->actions; file_action != 0; file_action = file_action->next_on_file) if (file_action->process == process) break; else if (file_action->kind == 'r') produced_by = 0; else if (file_action->is_produced()) produced_by = file_action->process; if (produced_by != 0) collect_sources(produced_by, ref_sources); } } } //indent_depth -= 4; } bool include_source = false; void output_file(FILE *f, FILE *f_source, bool binary) { if (f_source == 0) return; fprintf(f, "<PRE>"); if (binary) { int i = 0; unsigned char ch = fgetc(f_source); while (!feof(f_source)) { fprintf(f, " %02X", ch); if (++i % 10 == 0) fprintf(f, "\n"); ch = fgetc(f_source); } } else { char ch = fgetc(f_source); if (ch != -1) { int col = 0; while (!feof(f_source)) { col++; if (ch == '<') fprintf(f, "<"); else if (ch == '>') fprintf(f, ">"); else if (ch == '&') fprintf(f, "&"); else if ((unsigned char)ch == 160) fprintf(f, " "); else if ((unsigned char)ch == 169) fprintf(f, "©"); else if ((unsigned char)ch == 194) fprintf(f, "Â"); else if ((unsigned char)ch == 195) fprintf(f, "Ã"); else if ((unsigned char)ch == 197) fprintf(f, "Å"); else if ((unsigned char)ch == 216) fprintf(f, "Ø"); else if ((unsigned char)ch == 231) fprintf(f, "ç"); else if ((unsigned char)ch == 246) fprintf(f, "ö"); else if (ch < 0) fprintf(f, "&#%d;", (unsigned char)ch); else if (ch == '\n' || ch == 12) { fprintf(f, "\n"); col = 0; } else if (ch == '\t') { fprintf(f, " "); while (col % 4 > 0) { fprintf(f, " "); col++; } } else if (ch < ' ') ; // skip control characters else fprintf(f, "%c", ch); ch = fgetc(f_source); } } } fprintf(f, "</PRE>"); fclose(f_source); } void write_html_file(FILE *f, File *file, bool binary) { fprintf(f, "<H3><A NAME=\"F%d\">File %s</A></H3>\n\n<UL>\n", file->nr, file->name); for (Action *action = file->actions; action != 0; action = action->next_on_file) { if (action->kind == 'r') break; if (action->kind == 'e') fprintf(f, "<LI>Executed in <A HREF=\"#S%d\">Process %d</A>\n", action->process->nr, action->process->nr); if (action->kind == 'o') { if (action->o_wronly || action->o_rdwr) break; if (action->o_rdonly) fprintf(f, "<LI>Input for <A HREF=\"#S%d\">Process %d</A>\n", action->process->nr, action->process->nr); } } fprintf(f, "</UL>\n\n"); FILE *f_source = fopen(file->source_name, "r"); if (f_source == 0) { fprintf(f, "(Source not found at '%s')\n", file->source_name); return; } if (strncmp(file->source_name, source_dir, len_source_dir) == 0) fprintf(f, "Live-bootstrap source file is '%s'.<BR>\n", file->source_name + len_source_dir); else fprintf(f, "Source file is '%s'.<BR>\n", file->source_name); if (file->url != 0) { fprintf(f, "URL: <A HREF=\"%s\">%s</A>\n", file->url, file->url); //fprintf(fout, "Source: %s, URL: %s\n", file->source_name, file->url); } else fprintf(f, "<B>No URL</B>\n"); size_t len = strlen(file->source_name); if ( (len > 7 && strcmp(file->source_name + len - 7, ".tar.gz") == 0) || (len > 8 && strcmp(file->source_name + len - 8, ".tar.bz2") == 0)) { fprintf(f, "(Not shown)\n"); fclose(f_source); return; } output_file(f, f_source, binary); } void write_html(FILE *f) { fprintf(f, "<HTML><HEAD>\n<TITLE>live-bootstrap</TITLE>\n" "</HEAD><BODY>\n\n<H1>live-bootstrap</H1>" "<!--ONEWAY-->\n" "This page is produced by the version of the program <TT>scan_trace.cpp</TT>\n" "listed at <A HREF=\"#Parser\">the bottom</A> of this page.\n" "The program parsers the contents of <TT>trace.txt</TT> file that is produced by\n" "running the <TT>run_chroot</TT> Bash script from a sibling directory of a clone of\n" "<A HREF=\"https://github.com/fosslinux/live-bootstrap\">fosslinux/live-bootstrap</A>\n" "(the commit <A HREF=\"https://github.com/fosslinux/live-bootstrap/commit/%s\"><TT>%.8s</TT></A>)\n" "in which the <A HREF=\"https://github.com/fosslinux/live-bootstrap/blob/%s/download-distfiles.sh\"\n" "><TT>download-distfiles.sh</TT></A> script has been executed as well.\n" "(This is still work in progress.)\n" "<P>\n" "The code displayed on this page is not copyrighted by me but by the owners of\n" "respective repositories as also mentioned in the headers of the various files.\n" "<UL>\n" "<LI><A HREF=\"#Seeds\">Binary seeds files</A>\n" "<LI><A HREF=\"#Processes\">Processes</A>\n" "<LI><A HREF=\"#Input\">Input source files</A>\n" "<LI><A HREF=\"#Output\">Output files</A>\n" "<LI><A HREF=\"#Parser\">Parse program</A>\n" "</UL>\n", live_bootstrap_commit, live_bootstrap_commit, live_bootstrap_commit); fprintf(f, "\n\n<H2><A NAME=\"Seeds\">Binary seeds files</A></H2>\n\n"); for (File *file = files; file != 0; file = file->next) if (file->exec_before_created()) write_html_file(f, file, true); fprintf(f, "\n<H2><A NAME=\"Processes\">Processes</A></H2>\n\n"); for (Process *process = all_processes; process != 0; process = process->next) { fprintf(f, "<H3><A NAME=\"S%d\">Process %d</A></H3>\n\n", process->nr, process->nr); if (process->parent != 0) fprintf(f, "(Executed by <A HREF=\"#S%d\">Process %d</A>)\n", process->parent->nr, process->parent->nr); fprintf(f, "<UL>\n"); for (Action *action = process->actions; action != 0; action = action->next_in_process) { if (action->kind == 'E' && action->child_process != 0) { fprintf(f, "<LI>Executes <A HREF=\"#S%d\">Process %d</A>\n", action->child_process->nr, action->child_process->nr); } else { const char *oper = action->oper_name(); if (oper != 0) { bool repeated = false; for (Action *prev_action = process->actions; prev_action != action; prev_action = prev_action->next_in_process) if (prev_action->kind == 'o' && prev_action->file == action->file && prev_action->oper_name() == oper) { repeated = true; break; } if (!repeated) { File *file = action->file; fprintf(f, "<LI>%s ", oper); if (action->is_produced()) { fprintf(f, "%s\n<UL>\n", file->name); for (Action *file_action = action->next_on_file; file_action != 0; file_action = file_action->next_on_file) if (file_action->kind == 'r') { fprintf(f, "<LI>Deleted by <A HREF=\"#S%d\">process %d</A>\n", file_action->process->nr, file_action->process->nr); break; } else if (file_action->kind == 'o' && (file_action->o_creat || ((file_action->o_wronly || file_action->o_rdwr) && file_action->o_trunc))) break; else if (file_action->kind == 'e' || file_action->o_rdonly) fprintf(f, "<LI>%s <A HREF=\"#S%d\">process %d</A>\n", file_action->kind == 'e' ? "Used as executable" : file_action->kind == 'o' ? (file_action->o_rdonly ? "Used as input" : file_action->o_wronly ? "Produced by" : file_action->o_rdwr ? "Modified by" : "Modified by") : "Used in", file_action->process->nr, file_action->process->nr); fprintf(f, "</UL>\n\n"); } else { Process *produced_by = 0; for (Action *file_action = file->actions; file_action != 0; file_action = file_action->next_on_file) if (file_action->process == process) break; else if (file_action->kind == 'r') produced_by = 0; else if (file_action->is_produced()) produced_by = file_action->process; if (file->is_source) fprintf(f, "<A HREF=\"#F%d\">%s</A>", file->nr, file->name); else fprintf(f, "%s", action->file->name); File *file_copy_from = file; while (file_copy_from->copy_from != 0) file_copy_from = file_copy_from->copy_from; if (file_copy_from->url != 0) { fprintf(f, " from <A HREF=\"%s\">source</A>", file_copy_from->url); if (file != file_copy_from) fprintf(f, " (through copy)"); if (produced_by) fprintf(f, " (produced by <A HREF=\"#S%d\">process %d</A>)", produced_by->nr, produced_by->nr); } else if (produced_by != 0) fprintf(f, " produced by <A HREF=\"#S%d\">process %d</A>", produced_by->nr, produced_by->nr); fprintf(f, "\n"); } } } } } fprintf(f, "</UL>\n\n"); if (process->nr == 731) { Source *sources = 0; //fprintf(fout, "Process %d\n", process->nr); collect_sources(process, &sources); fprintf(f, "<P>Sources used:\n<UL>\n"); for (Source *source = sources; source != 0; source = source->next) fprintf(f, "<LI> %s\n", source->url); fprintf(f, "</UL>\n"); } } fprintf(f, "<H2><A NAME=\"Input\">Input source files</A></H2>\n\n"); for (File *file = files; file != 0; file = file->next) if (file->used_as_input()) //(file->is_source && !file->exec_before_created()) write_html_file(f, file, false); fprintf(f, "\n<H2><A NAME=\"Output\">Output files</A></H2>\n\n\n"); for (int t = 0; t < 3; t++) { switch (t) { case 0: fprintf(f, "Executables files:\n<UL>\n"); break; case 1: fprintf(f, "Intermediary files (not from sources and used):\n<UL>\n"); break; case 2: fprintf(f, "Produced (not from source and also not used):\n<UL>\n"); break; } for (File *file = files; file != 0; file = file->next) if (!file->used_as_input() && !file->exec_before_created()) { bool used = false; bool executed = false; unsigned long mode = 0; int process_nr = -1; for (Action *action = file->actions; action != 0; action = action->next_on_file) { if (action->kind == 'e') executed = true; else if (action->kind == 'o') { if (action->o_creat || action->o_wronly) { mode = action->mode; process_nr = action->process->nr; } else if (action->o_rdonly || (action->o_rdwr && !action->o_trunc)) used = true; } else if (action->kind == 'r') { used = false; executed = false; process_nr = -1; } else if (action->kind == 'c') { mode = action->mode; } } if (process_nr != -1) { bool is_executable = (mode & 0700) == 0700; if ( (t == 0 && is_executable) || (t == 1 && file->url == 0 && !is_executable && used) || (t == 2 && file->url == 0 && !is_executable && !used)) { fprintf(f, "<LI> %s", file->name); if (process_nr > 0) fprintf(f, " produced by <A HREF=\"#S%d\">Process %d</A>", process_nr, process_nr); if (mode != 0 && mode != 0600 && mode != 0700) fprintf(f, " (mode is %lo)", mode); if (executed) fprintf(f, " (also executed)"); fprintf(f, "\n"); } } } fprintf(f, "</UL>\n\n"); } fprintf(f, "\n<H2><A NAME=\"Parser\">Parse program</A></H2>\n\n"); fprintf(f, "Below the Bash script <TT>run_chroot</TT> to produce the <TT>trace.txt</TT> file.\n<P>\n"); output_file(f, fopen("run_chroot", "r"), false); fprintf(f, "Below the version of the <TT>scan_trace.cpp</TT> program is given that is used to produce this page.\n<P>\n"); output_file(f, fopen("scan_trace.cpp", "r"), false); fprintf(f, "\n\n" "<P><HR>\n" "<ADDRESS>\n" "<A HREF=\"index.html\">Home</A>\n" "</ADDRESS>\n" "</BODY></HTML>\n"); } int main(int argc, char *argv[]) { len_source_dir = strlen(source_dir); init_subModules(); if (!process_trace_file("trace_AMD64.txt")) return 0; for (Process *process = all_processes; process != 0; process = process->next) { Action *action = process->actions; if ( action != 0 && action->kind == 'e' && action->file != 0 && strcmp(action->file->name, "/usr/bin/cp") == 0) { action = action->next_in_process; if (action != 0 && action->kind == 'o' && action->o_rdonly) { File *source = action->file; action = action->next_in_process; if (action != 0 && action->kind == 'o' && action->o_wronly) { fprintf(fout, "Copy %s -> %s\n", source->name, action->file->name); action->file->copy_from = source; } } } } SubModule *subModules = 0; FILE *f_html = fopen("docs/index_AMD64.html", "w"); if (f_html != 0) { write_html(f_html); fclose(f_html); } return 0; }