fixed the error case for escaped curly braces, and converted the tokenize function into a inner function

2026-06-22 16:37:10 +08:00 · 2023-10-06 16:56:19 -04:00 · 2023-10-06 16:56:19 -04:00 · 151e9467ae
commit 151e9467ae
parent ee54816421
1 changed files with 89 additions and 72 deletions
--- a/python/builtins.py
+++ b/python/builtins.py
@ -96,77 +96,94 @@ def sorted(iterable, reverse=False, key=None):
    return a
 ##### str #####
 def tokenize(s:str) -> list:
    tokens = []
    L, R = 0,0
    mode = None
    curPos = 0
    lookingForKword = False
    while(R<len(s) and not lookingForKword):
        curChar = s[R]
        nextChar = s[R+1] if R+1<len(s) else ''
        # Escaping case
        if (curChar == '{' and nextChar == '{') or (curChar == '}' and nextChar == '}'):
            tokens.append(curChar)
            L = R+2
            R = R+2
            continue
        # Regular command line arg case
        if curChar == '{' and nextChar == '}':
            if mode is not None and mode != 'auto':
                raise ValueError("Cannot switch from manual field numbering to automatic field specification")
            mode = 'auto'
            # best case {}, just for normal args
            if(L<R):
                tokens.append(s[L:R])
            tokens.append("{"+str(curPos)+"}")
            curPos+=1
            L = R+2
            R = R+2
            continue
        # Kwarg case
        elif (curChar == '{'):
            if mode is not None and mode != 'manual':
                raise ValueError("Cannot switch from automatic field specification to manual field numbering")
            mode = 'manual'
            if(L<R):
                tokens.append(s[L:R])
            lookingForKword = True
            kwL = R+1
            kwR = R+1
            while(kwR<len(s) and s[kwR]!='}'):
                kwR += 1
            tokens.append(s[R:kwR+1])
            if kwR<len(s) and s[kwR] == '}':
                lookingForKword = False
                L = kwR+1
                R = kwR+1
            continue
        R = R+1
    if lookingForKword:
        raise ValueError("Expected '}' before end of string")
    if(not lookingForKword and L<R):
        tokens.append(s[L:R])
    # print("Looking for kword: ", lookingForKword)
    return tokens
 def __f(self:str, *args, **kwargs) -> str:
-    tokens = tokenize(self)
+    def tokenizeString(s:str):
        tokens = []
        L, R = 0,0
        mode = None
        curArg = 0
        # lookingForKword = False
        while(R<len(s)):
            curChar = s[R]
            nextChar = s[R+1] if R+1<len(s) else ''
            # Invalid case 1: stray '}' encountered, example: "ABCD EFGH {name} IJKL}", "Hello {vv}}", "HELLO {0} WORLD}"
            if curChar == '}' and nextChar != '}':
                raise ValueError("Single '}' encountered in format string")        
            # Valid Case 1: Escaping case, we escape "{{ or "}}" to be "{" or "}", example: "{{}}", "{{My Name is {0}}}"
            if (curChar == '{' and nextChar == '{') or (curChar == '}' and nextChar == '}'):
                if (L<R): # Valid Case 1.1: make sure we are not adding empty string
                    tokens.append(s[L:R]) # add the string before the escape
                tokens.append(curChar) # Valid Case 1.2: add the escape char
                L = R+2 # move the left pointer to the next char
                R = R+2 # move the right pointer to the next char
                continue
            # Valid Case 2: Regular command line arg case: example:  "ABCD EFGH {} IJKL", "{}", "HELLO {} WORLD"
            elif curChar == '{' and nextChar == '}':
                if mode is not None and mode != 'auto':
                    # Invalid case 2: mixing automatic and manual field specifications -- example: "ABCD EFGH {name} IJKL {}", "Hello {vv} {}", "HELLO {0} WORLD {}" 
                    raise ValueError("Cannot switch from manual field numbering to automatic field specification")
                mode = 'auto'
                if(L<R): # Valid Case 2.1: make sure we are not adding empty string
                    tokens.append(s[L:R]) # add the string before the special marker for the arg
                tokens.append("{"+str(curArg)+"}") # Valid Case 2.2: add the special marker for the arg
                curArg+=1 # increment the arg position, this will be used for referencing the arg later
                L = R+2 # move the left pointer to the next char
                R = R+2 # move the right pointer to the next char
                continue
            # Valid Case 3: Key-word arg case: example: "ABCD EFGH {name} IJKL", "Hello {vv}", "HELLO {name} WORLD"
            elif (curChar == '{'):
                if mode is not None and mode != 'manual':
                    # # Invalid case 2: mixing automatic and manual field specifications -- example: "ABCD EFGH {} IJKL {name}", "Hello {} {1}", "HELLO {} WORLD {name}"
                    raise ValueError("Cannot switch from automatic field specification to manual field numbering")
                mode = 'manual'
                if(L<R): # Valid case 3.1: make sure we are not adding empty string
                    tokens.append(s[L:R]) # add the string before the special marker for the arg
                # We look for the end of the keyword          
                kwL = R # Keyword left pointer
                kwR = R+1 # Keyword right pointer
                while(kwR<len(s) and s[kwR]!='}'):
                    if s[kwR] == '{': # Invalid case 3: stray '{' encountered, example: "ABCD EFGH {n{ame} IJKL {", "Hello {vv{}}", "HELLO {0} WOR{LD}"
                        raise ValueError("Unexpected '{' in field name")
                    kwR += 1
                # Valid case 3.2: We have successfully found the end of the keyword
                if kwR<len(s) and s[kwR] == '}':
                    tokens.append(s[kwL:kwR+1]) # add the special marker for the arg
                    L = kwR+1
                    R = kwR+1
                # Invalid case 4: We didn't find the end of the keyword, throw error
                else:
                    raise ValueError("Expected '}' before end of string")
                continue
            R = R+1
        # Valid case 4: We have reached the end of the string, add the remaining string to the tokens 
        if L<R:
            tokens.append(s[L:R])
        # print(tokens)
        return tokens
    tokens = tokenizeString(self)
    argMap = {}
    for i, a in enumerate(args):
        argMap[str(i)] = a
@ -176,7 +193,7 @@ def __f(self:str, *args, **kwargs) -> str:
            key = t[1:-1]
            argMapVal = argMap.get(key, None)
            kwargsVal = kwargs.get(key, None)
-            
+                                    
            if argMapVal is None and kwargsVal is None:
                raise ValueError("No arg found for token: "+t)
            elif argMapVal is not None:
@ -187,7 +204,7 @@ def __f(self:str, *args, **kwargs) -> str:
            final_tokens.append(t)
    return ''.join(final_tokens)
- 
+
    # if '{}' in self:
    #     for i in range(len(args)):
    #         self = self.replace('{}', str(args[i]), 1)