î
æ^Q\j8  ã               @   sX   d  Z  d d l Z Gd d „  d e ƒ Z Gd d „  d e ƒ Z Gd d „  d e ƒ Z d S)	zÇ
This module contains a tokenizer for Excel formulae.

The tokenizer is based on the Javascript tokenizer found at
http://ewbi.blogs.com/develops/2004/12/excel_formula_p.html written by Eric
Bachtal
é    Nc               @   s   e  Z d  Z d Z d S)ÚTokenizerErrorz$Base class for all Tokenizer errors.N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__© r   r   úO/var/www/dbchiro/venv/lib/python3.4/site-packages/openpyxl/formula/tokenizer.pyr      s   r   c               @   s  e  Z d  Z d Z e j d ƒ Z e j d ƒ Z i e j d ƒ d 6e j d ƒ d 6Z d. Z	 d Z
 d d „  Z d d „  Z d d „  Z d d „  Z d d „  Z d d „  Z d d „  Z d d  „  Z d! d" „  Z d# d$ „  Z d% d& „  Z d' d( „  Z d) d* „  Z d+ d, „  Z d- S)/Ú	Tokenizeraf  
    A tokenizer for Excel worksheet formulae.

    Converts a unicode string representing an Excel formula (in A1 notation)
    into a sequence of `Token` objects.

    `formula`: The unicode string to tokenize

    Tokenizer defines a method `._parse()` to parse the formula into tokens,
    which can then be accessed through the `.items` attribute.

    z^[1-9](\.[0-9]+)?[Ee]$z +z"(?:[^"]*"")*[^"]*"(?!")ú"z'(?:[^']*'')*[^']*'(?!')ú'ú#NULL!ú#DIV/0!ú#VALUE!ú#REF!ú#NAME?ú#NUM!ú#N/Aú#GETTING_DATAz,;}) +-*/^&=><%c             C   s;   | |  _  g  |  _ g  |  _ d |  _ g  |  _ |  j ƒ  d  S)Nr   )ÚformulaÚitemsÚtoken_stackÚoffsetÚtokenÚ_parse)Úselfr   r   r   r   Ú__init__.   s    					zTokenizer.__init__c          	   C   s­  |  j  r d S|  j s d S|  j d d k r? |  j  d 7_  n# |  j j t |  j t j ƒ ƒ d Sd |  j f d |  j f d |  j f d |  j	 f d	 |  j
 f d
 |  j f d |  j f d |  j f f } i  } x- | D]% \ } } | j t j | | ƒ ƒ qÕ Wxž |  j  t |  j ƒ k  rž|  j ƒ  r+qn  |  j |  j  } | |  j k rW|  j ƒ  n  | | k r||  j  | | ƒ  7_  q|  j j | ƒ |  j  d 7_  qW|  j ƒ  d S)z5Populate self.items with the tokens from the formula.Nr   ú=é   z"'ú[ú#ú z
+-*/^&=><%z{(z)}z;,)r   r   r   ÚappendÚTokenÚLITERALÚ_parse_stringÚ_parse_bracketsÚ_parse_errorÚ_parse_whitespaceÚ_parse_operatorÚ_parse_openerÚ_parse_closerÚ_parse_separatorÚupdateÚdictÚfromkeysÚlenÚcheck_scientific_notationÚTOKEN_ENDERSÚ
save_tokenr   )r   Z	consumersZ
dispatcherÚcharsZconsumerÚ	curr_charr   r   r   r   7   s<    		zTokenizer._parsec             C   sé   |  j  ƒ  |  j |  j } | d k s, t ‚ |  j | } | j |  j |  j d … ƒ } | d k r˜ | d k rv d n d } t d | |  j f ƒ ‚ n  | j d ƒ } | d k rÏ |  j j	 t
 j | ƒ ƒ n |  j j	 | ƒ t | ƒ S)	a¹  
        Parse a "-delimited string or '-delimited link.

        The offset must be pointing to either a single quote ("'") or double
        quote ('"') character. The strings are parsed according to Excel
        rules where to escape the delimiter you just double it up. E.g.,
        "abc""def" in Excel is parsed as 'abc"def' in Python.

        Returns the number of characters matched. (Does not update
        self.offset)

        r
   r   NÚstringÚlinkz-Reached end of formula while parsing %s in %sr   )r
   r   )Úassert_empty_tokenr   r   ÚAssertionErrorÚSTRING_REGEXESÚmatchr   Úgroupr   r!   r"   Úmake_operandr   r/   )r   ÚdelimÚregexr:   Úsubtyper   r   r   r$   ^   s    
zTokenizer._parse_stringc             C   s…   |  j  |  j d k s t ‚ |  j  j d |  j ƒ d } | d k rZ t d |  j  ƒ ‚ n  |  j j |  j  |  j | … ƒ | |  j S)zœ
        Consume all the text between square brackets [].

        Returns the number of characters matched. (Does not update
        self.offset)

        r   ú]r   r   zEncountered unmatched '[' in %s)r   r   r8   Úfindr   r   r!   )r   Úrightr   r   r   r%   |   s     zTokenizer._parse_bracketsc             C   s¢   |  j  ƒ  |  j |  j d k s& t ‚ |  j |  j d … } xC |  j D]8 } | j | ƒ rF |  j j t j	 | ƒ ƒ t
 | ƒ SqF Wt d |  j |  j f ƒ ‚ d S)zÃ
        Consume the text following a '#' as an error.

        Looks for a match in self.ERROR_CODES and returns the number of
        characters matched. (Does not update self.offset)

        r   Nz)Invalid error code at position %d in '%s')r7   r   r   r8   ÚERROR_CODESÚ
startswithr   r!   r"   r<   r/   r   )r   Z
subformulaÚerrr   r   r   r&   Œ   s    
zTokenizer._parse_errorc             C   s^   |  j  |  j d k s t ‚ |  j j t d t j ƒ ƒ |  j j |  j  |  j d … ƒ j	 ƒ  S)z†
        Consume a string of consecutive spaces.

        Returns the number of spaces found. (Does not update self.offset).

        r    N)
r   r   r8   r   r!   r"   ÚWSPACEÚ	WSPACE_REr:   Úend)r   r   r   r   r'   Ÿ   s    zTokenizer._parse_whitespacec             C   s`  |  j  |  j |  j d … d	 k rZ |  j j t |  j  |  j |  j d … t j ƒ ƒ d S|  j  |  j } | d k s| t ‚ | d k r t d t j ƒ } n¯ | d k r¾ t | t j ƒ } nŽ |  j sÜ t | t j ƒ } np |  j d
 } | j	 t j
 k p| j t j k p| j t j k } | r:t | t j ƒ } n t | t j ƒ } |  j j | ƒ d S)zœ
        Consume the characters constituting an operator.

        Returns the number of charactes consumed. (Does not update
        self.offset)

        é   ú>=ú<=ú<>z
%*/^&=><+-ú%z*/^&=><r   )rJ   rK   rL   éÿÿÿÿ)r   r   r   r!   r"   ÚOP_INr8   ÚOP_POSTÚOP_PREr?   ÚCLOSEÚtypeÚOPERAND)r   r4   r   ÚprevZis_infixr   r   r   r(   ª   s,    #	zTokenizer._parse_operatorc             C   sÂ   |  j  |  j d k s t ‚ |  j  |  j d k rN |  j ƒ  t j d ƒ } nP |  j r d j |  j ƒ d } |  j d d … =t j | ƒ } n t j d ƒ } |  j j	 | ƒ |  j
 j	 | ƒ d S)zˆ
        Consumes a ( or { character.

        Returns the number of charactes consumed. (Does not update
        self.offset)

        ú(ú{Ú Nr   )rV   rW   )r   r   r8   r7   r"   Úmake_subexpr   Újoinr   r!   r   )r   r   Ztoken_valuer   r   r   r)   Ï   s    
	zTokenizer._parse_openerc             C   st   |  j  |  j d k s t ‚ |  j j ƒ  j ƒ  } | j |  j  |  j k r` t d |  j  ƒ ‚ n  |  j j	 | ƒ d S)zˆ
        Consumes a } or ) character.

        Returns the number of charactes consumed. (Does not update
        self.offset)

        ú)ú}zMismatched ( and { pair in '%s'r   )r[   r\   )
r   r   r8   r   ÚpopÚ
get_closerÚvaluer   r   r!   )r   r   r   r   r   r*   å   s    zTokenizer._parse_closerc             C   sÂ   |  j  |  j } | d k s" t ‚ | d k r@ t j d ƒ } nn y |  j d j } Wn$ t k
 rz t d t j ƒ } Yn4 X| t j	 k rŸ t d t j ƒ } n t j d ƒ } |  j
 j | ƒ d S)zˆ
        Consumes a ; or , character.

        Returns the number of charactes consumed. (Does not update
        self.offset)

        ú;ú,r   )r`   ra   rN   )r   r   r8   r"   Úmake_separatorr   rS   Ú
IndexErrorrO   ÚPARENr   r!   )r   r4   r   Ztop_typer   r   r   r+   õ   s    zTokenizer._parse_separatorc             C   sv   |  j  |  j } | d k rr t |  j ƒ d k rr |  j j d j |  j ƒ ƒ rr |  j j | ƒ |  j d 7_ d Sd S)z¾
        Consumes a + or - character if part of a number in sci. notation.

        Returns True if the character was consumed and self.offset was
        updated, False otherwise.

        z+-r   rX   TF)r   r   r/   r   ÚSN_REr:   rZ   r!   )r   r4   r   r   r   r0     s    z#Tokenizer.check_scientific_notationc             C   s,   |  j  r( t d |  j |  j f ƒ ‚ n  d S)zØ
        Ensure that there's no token currently being parsed.

        If there are unconsumed token contents, it means we hit an unexpected
        token transition. In this case, we raise a TokenizerError

        z+Unexpected character at position %d in '%s'N)r   r   r   r   )r   r   r   r   r7     s    	zTokenizer.assert_empty_tokenc             C   sE   |  j  rA |  j j t j d j |  j  ƒ ƒ ƒ |  j  d d … =n  d S)z9If there's a token being parsed, add it to the item list.rX   N)r   r   r!   r"   r<   rZ   )r   r   r   r   r2   ,  s    	%zTokenizer.save_tokenc             C   sU   |  j  s d S|  j  d j t j k r4 |  j  d j Sd d j d d „  |  j  Dƒ ƒ S)z+Convert the parsed tokens back to a string.rX   r   r   c             s   s   |  ] } | j  Vq d  S)N)r_   )Ú.0r   r   r   r   ú	<genexpr>8  s    z#Tokenizer.render.<locals>.<genexpr>)r   rS   r"   r#   r_   rZ   )r   r   r   r   Úrender2  s
    	zTokenizer.renderN)r   r   r   r   r   r   r   r   )r   r   r   r   ÚreÚcompilere   rG   r9   rC   r1   r   r   r$   r%   r&   r'   r(   r)   r*   r+   r0   r7   r2   rh   r   r   r   r   r	      s.    	'%r	   c               @   s÷   e  Z d  Z d Z d d d g Z d Z d Z d Z d Z d	 Z	 d
 Z
 d Z d Z d Z d Z d d d „ Z d Z d Z d Z d Z d Z d d „  Z e d d „  ƒ Z d Z d Z e d d d „ ƒ Z d  d! „  Z d" Z d# Z e d$ d% „  ƒ Z d& S)'r"   a)  
    A token in an Excel formula.

    Tokens have three attributes:

    * `value`: The string value parsed that led to this token
    * `type`: A string identifying the type of token
    * `subtype`: A string identifying subtype of the token (optional, and
                 defaults to "")

    r_   rS   r?   r#   rT   ÚFUNCÚARRAYrd   ÚSEPzOPERATOR-PREFIXzOPERATOR-INFIXzOPERATOR-POSTFIXzWHITE-SPACErX   c             C   s   | |  _  | |  _ | |  _ d  S)N)r_   rS   r?   )r   r_   Útype_r?   r   r   r   r   V  s    		zToken.__init__ÚTEXTÚNUMBERÚLOGICALÚERRORÚRANGEc             C   s   d j  |  j |  j |  j ƒ S)Nz{0} {1} {2}:)ÚformatrS   r?   r_   )r   r   r   r   Ú__repr__h  s    zToken.__repr__c             C   s–   | j  d ƒ r |  j } nh | j  d ƒ r6 |  j } nM | d k rN |  j } n5 y t | ƒ |  j } Wn t k
 r‚ |  j } Yn X|  | |  j | ƒ S)zCreate an operand token.r
   r   ÚTRUEÚFALSE)zTRUEzFALSE)	rD   ro   rr   rq   Úfloatrp   Ú
ValueErrorrs   rT   )Úclsr_   r?   r   r   r   r<   k  s    
zToken.make_operandÚOPENrR   Fc             C   s§   | d
 d k s t  ‚ | r@ t j d | ƒ s4 t  ‚ t j } n9 | d k rX t j } n! | d k rp t j } n	 t j } | d	 k rŽ |  j n |  j } |  | | | ƒ S)z•
        Create a subexpression token.

        `value`: The value of the token
        `func`: If True, force the token to be of type FUNC

        r   rW   r\   rV   r[   z.+\(|\)z{}z()z)}rN   )rW   r\   rV   r[   )	r8   ri   r:   r"   rk   rl   rd   rR   r{   )rz   r_   Úfuncrn   r?   r   r   r   rY   ˆ  s    		zToken.make_subexpc             C   s|   |  j  |  j |  j |  j f k s' t ‚ |  j |  j k s? t ‚ |  j  |  j k rW d n d } |  j | d |  j  |  j k ƒS)z6Return a closing token that matches this token's type.r\   r[   r|   )rS   rk   rl   rd   r8   r?   r{   rY   )r   r_   r   r   r   r^   ž  s    'zToken.get_closerÚARGÚROWc             C   sC   | d k s t  ‚ | d k r' |  j n |  j } |  | |  j | ƒ S)zCreate a separator tokenra   r`   )ra   r`   )r8   r}   r~   rm   )rz   r_   r?   r   r   r   rb   °  s    zToken.make_separatorN)r   r   r   r   Ú	__slots__r#   rT   rk   rl   rd   rm   rQ   rO   rP   rF   r   ro   rp   rq   rr   rs   ru   Úclassmethodr<   r{   rR   rY   r^   r}   r~   rb   r   r   r   r   r"   ;  s8   r"   )r   ri   Ú	Exceptionr   Úobjectr	   r"   r   r   r   r   Ú<module>   s
   ÿ ,