3 \A@sBdZddlmZddlmZddlmZddlmZddlZyddlm Z m Z Wn$e k rtddl m Z m Z YnXydd l m Z Wne k rddlZ YnXd d Zd Zd eddeddedZdZdZdZdZdZdZdZdZeedZdedZeedZedZdedZdZd Z d!Z!d"Z"d#Z#d$Z$d%d&Z%ej&d'Z'd(d)Z(Gd*d+d+e)Z*Gd,d-d-e*Z+Gd.d/d/e*Z,Gd0d1d1e*Z-Gd2d3d3e-Z.Gd4d5d5e*Z/Gd6d7d7e-Z0Gd8d9d9e*Z1Gd:d;d;e*Z2Gdd?d?e2Z4Gd@dAdAe4Z5GdBdCdCe*Z6GdDdEdEe*Z7dS)Fa INLINE PATTERNS ============================================================================= Inline patterns such as *emphasis* are handled by means of auxiliary objects, one per pattern. Pattern objects must be instances of classes that extend markdown.Pattern. Each pattern object uses a single regular expression and needs support the following methods: pattern.getCompiledRegExp() # returns a regular expression pattern.handleMatch(m) # takes a match object and returns # an ElementTree element or just plain text All of python markdown's built-in patterns subclass from Pattern, but you can add additional patterns that don't. Also note that all the regular expressions used by inline must capture the whole block. For this reason, they all start with '^(.*)' and end with '(.*)!'. In case with built-in expression Pattern takes care of adding the "^(.*)" and "(.*)!". Finally, the order in which regular expressions are applied is very important - e.g. if we first replace http://.../ links with tags and _then_ try to replace inline html, we would end up with a mess. So, we apply the expressions in the following order: * escape and backticks have to go before everything else, so that we can preempt any markdown patterns by escaping them. * then we handle auto-links (must be done before inline html) * then we handle inline HTML. At this point we will simply replace all inline HTML strings with a placeholder and add the actual HTML to a hash. * then inline images (must be done before links) * then bracketed links, first regular then reference-style * finally we apply strong and emphasis )absolute_import)unicode_literals)util)odictN)urlparse urlunparse)entitiescKstj}tt|d<tt||d<tt||d<tt ||d<t t ||d<t t ||d<tt||d<tt||d<tt||d <ttd |d <|jdkrtt||d <tt||d <tt|d<ttd|d<ttd|d<ttd|d<|j rtt!d|d<ntt"d|d<|S)z8 Build the default set of inline patterns for Markdown. Zbacktickescape referencelinkZ image_linkZimage_referenceZshort_referenceZautolinkZautomailbrZ linebreakhtmlentityZ not_strongz strong,emZ strong_emZstrongZemZemphasisZ emphasis2)#r OrderedDictBacktickPattern BACKTICK_RE EscapePattern ESCAPE_REReferencePattern REFERENCE_RE LinkPatternLINK_RE ImagePattern IMAGE_LINK_REImageReferencePatternIMAGE_REFERENCE_RE SHORT_REF_REAutolinkPattern AUTOLINK_REAutomailPattern AUTOMAIL_RESubstituteTagPattern LINE_BREAK_REsafeMode HtmlPatternHTML_RE ENTITY_RESimpleTextPattern NOT_STRONG_REDoubleTagPattern STRONG_EM_RESimpleTagPattern STRONG_RE EMPHASIS_REsmart_emphasisSMART_EMPHASIS_RE EMPHASIS_2_RE) md_instancekwargsinlinePatternsr5$build/lib/markdown/inlinepatterns.pybuild_inlinepatterns;s,   r7z[^\]\[]*z\[(z(\[z\])*z)\]z(?|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)z\!z%\s*\((<.*?>|([^")]+"[^"]*"|[^\)]*))\)z\s?\[([^\]]*)\]z \[([^\]]+)\]z((^| )(\*|_)( |$))z*<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>z<([^> \!]*@[^> ]*)>z"(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)z(&[\#a-zA-Z0-9]*;)z \ncCs<|jdr|jds(|jdr4|jdr4|ddS|SdS)z#Remove quotes from around a string."'rN) startswithendswith)stringr5r5r6dequote|s r?z\{@([^\}]*)=([^\}]*)}csfdd}tj||S)zDSet values of an element based on attribute definitions ({@id=123}).cs$j|jd|jdjdddS)Nr  )setgroupreplace)match)parentr5r6attributeCallbacksz+handleAttributes..attributeCallback)ATTR_REsub)textrGrHr5)rGr6handleAttributess rLc@s:eZdZdZd ddZddZddZd d Zd d ZdS)Patternz*Base class that inline patterns subclass. NcCs4||_tjd|tjtjB|_d|_|r0||_dS)z Create an instant of an inline pattern. Keyword arguments: * pattern: A regular expression that matches a pattern z^(.*?)%s(.*?)$FN)patternrecompileDOTALLUNICODE compiled_re safe_modemarkdown)selfrNmarkdown_instancer5r5r6__init__s  zPattern.__init__cCs|jS)z' Return a compiled regular expression. )rS)rVr5r5r6getCompiledRegExpszPattern.getCompiledRegExpcCsdS)zReturn a ElementTree element from the given match. Subclasses should override this method. Keyword arguments: * m: A re match object containing a match of the pattern. Nr5)rVmr5r5r6 handleMatchs zPattern.handleMatchcCs|jjS)z+ Return class name, to define pattern type ) __class____name__)rVr5r5r6typesz Pattern.typec sPy|jjdjWntk r&|SXfddfdd}tjj||S)z> Return unescaped text given text with an inline placeholder. inlinec3sf|j}t|tj r |dk r dS|jr.|jVx2|D]*}x|D] }|VqBW|jr4|jVq4WdS)z8 Reimplement Element.itertext for older python versions N)tag isinstancer string_typerKtail)elr`es)itertextr5r6rgs  z"Pattern.unescape..itertextcs>|jd}|kr:j|}t|tjr,|Sdj|SdS)Nr)rDgetrarrbjoin)rZidvalue)rgstashr5r6 get_stashs    z#Pattern.unescape..get_stash)rUtreeprocessors stashed_nodesKeyErrorrINLINE_PLACEHOLDER_RErJ)rVrKrnr5)rgrmr6unescapes  zPattern.unescape)N) r] __module__ __qualname____doc__rXrYr[r^rsr5r5r5r6rMs   rMc@seZdZdZddZdS)r(z0 Return a simple text of group(2) of a Pattern. cCs|jd}|tjkrdS|S)Nr@)rDrINLINE_PLACEHOLDER_PREFIX)rVrZrKr5r5r6r[s  zSimpleTextPattern.handleMatchN)r]rtrurvr[r5r5r5r6r(sr(c@seZdZdZddZdS)rz Return an escaped character. cCs4|jd}||jjkr,dtjt|tjfSdSdS)Nr@z%s%s%s)rDrU ESCAPED_CHARSrSTXordETX)rVrZcharr5r5r6r[s  zEscapePattern.handleMatchN)r]rtrurvr[r5r5r5r6rsrc@s eZdZdZddZddZdS)r,z[ Return element of type `tag` with a text attribute of group(3) of a Pattern. cCstj||||_dS)N)rMrXr`)rVrNr`r5r5r6rXs zSimpleTagPattern.__init__cCstjj|j}|jd|_|S)N)retreeElementr`rDrK)rVrZrdr5r5r6r[s zSimpleTagPattern.handleMatchN)r]rtrurvrXr[r5r5r5r6r,sr,c@seZdZdZddZdS)r"z3 Return an element of type `tag` with no children. cCstjj|jS)N)rr~rr`)rVrZr5r5r6r[sz SubstituteTagPattern.handleMatchN)r]rtrurvr[r5r5r5r6r"sr"c@s eZdZdZddZddZdS)rz9 Return a `` element containing the matching text. cCstj||d|_dS)Ncode)rMrXr`)rVrNr5r5r6rXs zBacktickPattern.__init__cCs(tjj|j}tj|jdj|_|S)Nr})rr~rr` AtomicStringrDstriprK)rVrZrdr5r5r6r[ szBacktickPattern.handleMatchN)r]rtrurvrXr[r5r5r5r6rsrc@seZdZdZddZdS)r*zfReturn a ElementTree element nested in tag2 nested in tag1. Useful for strong emphasis etc. cCs:|jjd\}}tjj|}tjj||}|jd|_|S)N,r})r`splitrr~r SubElementrDrK)rVrZZtag1Ztag2Zel1Zel2r5r5r6r[s   zDoubleTagPattern.handleMatchN)r]rtrurvr[r5r5r5r6r*sr*c@s eZdZdZddZddZdS)r%z1 Store raw inline html and return a placeholder. cCs"|j|jd}|jjj|}|S)Nr@)rsrDrU htmlStashstore)rVrZZrawhtmlZ place_holderr5r5r6r[szHtmlPattern.handleMatchc sDyjjdjWntk r&|SXfdd}tjj||S)z> Return unescaped text given text with an inline placeholder. r_c s<|jd}j|}|dk r8y jj|Sd|SdS)Nrz\%s)rDrirU serializer)rZrkrl)rVrmr5r6rn*s   z'HtmlPattern.unescape..get_stash)rUrorprqrrrrJ)rVrKrnr5)rVrmr6rs$s  zHtmlPattern.unescapeN)r]rtrurvr[rsr5r5r5r6r%sr%c@s eZdZdZddZddZdS)rz- Return a link element from the given match. cCstjjd}|jd|_|jd}|jd}|rf|ddkrH|dd }|jd|j|j|jn |jdd |rt |j|}|jd ||S) Nar@ r), we whitelist known safe url formats. Most urls contain a network location, however some are known not to (i.e.: mailto links). Script urls do not contain a location. Additionally, for `javascript:...`, the scheme would be "javascript" but some aliases will appear to `urlparse()` to have no scheme. On top of that relative links (i.e.: "foo/bar.html") have no scheme. Therefore we must check "path", "parameters", "query" and "fragment" for any literal colons. We don't check "scheme" for colons because it *should* never have any and "netloc" must allow the form: `username:password@host:port`. rhmailtoZnewsZhttpZhttpsZftpZftpsr@N:)rUr$r ValueErrorr) rVurlschemeZnetlocpathZparamsZqueryZfragmentZlocless_schemesZallowed_schemespartr5r5r6rJs  zLinkPattern.sanitize_urlN)r]rtrurvr[rr5r5r5r6r6src@seZdZdZddZdS)rz, Return a img element from the given match. cCstjjd}|jdj}|rd|d}|ddkrJ|d dkrJ|dd}|jd|j|j|n |jddt|dkr|jd t |jd j |dd|j j rt |jd |}n |jd }|jd |j||S)Nimgrrrr>srcrhrrBr@altr;r;)rr~rrDrrCrrslenr?rjrUenable_attributesrL)rVrZrdZ src_partsrZtruealtr5r5r6r[ys    $ zImagePattern.handleMatchN)r]rtrurvr[r5r5r5r6rwsrc@s.eZdZdZejdejZddZddZ dS)rz6 Match to a stored reference and return link element. z[ ]?\nc Csy|jdj}Wntk r*d}YnX|s>|jdj}|jjd|}||jjkr\dS|jj|\}}|jd}|j|||S)Nrr@rB)rDlower IndexErrorNEWLINE_CLEANUP_RErJrU referencesmakeTag)rVrZrkrrrKr5r5r6r[s   zReferencePattern.handleMatchcCs8tjjd}|jd|j||r.|jd|||_|S)Nrrr)rr~rrCrrK)rVrrrKrdr5r5r6rs   zReferencePattern.makeTagN) r]rtrurvrOrP MULTILINErr[rr5r5r5r6rsrc@seZdZdZddZdS)rz5 Match to a stored reference and return img element. cCsVtjjd}|jd|j||r.|jd||jjr@t||}|jd|j||S)Nrrrr) rr~rrCrrUrrLrs)rVrrrKrdr5r5r6rs   zImageReferencePattern.makeTagN)r]rtrurvrr5r5r5r6rsrc@seZdZdZddZdS)rzC Return a link Element given an autolink (``). cCs:tjjd}|jd|j|jdtj|jd|_|S)Nrrr@)rr~rrCrsrDrrK)rVrZrdr5r5r6r[s zAutolinkPattern.handleMatchN)r]rtrurvr[r5r5r5r6rsrc@seZdZdZddZdS)r zT Return a mailto link Element given an automail link (``). cstjjd}|j|jd}|jdr6|tdd}ddfdd|D}tjdj||_ d|}djd d|D}|j d ||S) Nrr@zmailto:cSs0tjj|}|rdtj|fSdtj|fSdS)z=Return entity definition by code, or the code if not defined.z%s%s;z%s#%d;N)r codepoint2namerirAMP_SUBSTITUTE)rrr5r5r6rs z3AutomailPattern.handleMatch..codepoint2namecsg|]}t|qSr5)rz).0letter)rr5r6 sz/AutomailPattern.handleMatch..rhcSsg|]}tjdt|qS)z#%d;)rrrz)rrr5r5r6rsr) rr~rrsrDr<rrrjrKrC)rVrZrdemailZlettersrr5)rr6r[s     zAutomailPattern.handleMatchN)r]rtrurvr[r5r5r5r6r sr )8rv __future__rrrhrrrOZ urllib.parserr ImportErrorrr Zhtmlentitydefsr7Z NOBRACKETZBRKZNOIMGrrr.r-r+r0r1rrrrrr)rr!r&r'r#r?rPrIrLobjectrMr(rr,r"rr*r%rrrrrr r5r5r5r6*sj    !$     G   A#