HEX
Server: Apache
System: Linux nc-ph-4101.simplemoneygoals.com 5.14.0-503.21.1.el9_5.x86_64 #1 SMP PREEMPT_DYNAMIC Sun Jan 12 09:45:05 EST 2025 x86_64
User: dailygoldindex (1004)
PHP: 8.1.33
Disabled: NONE
Upload Files
File: //lib/python3.9/site-packages/html2text/__pycache__/__init__.cpython-39.pyc
a

!q ^a��@s�dZddlZddlZddlZddlmZddlm	Z	ddl
mZmZm
Z
mZmZddlmZddlmZmZddl
mZdd	lmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#d
Z$Gdd�dej%j&�Z'de(e(e
e)e(d�dd�Z*dS)z>html2text: Turn HTML into equivalent Markdown-structured text.�N)�wrap)�Dict�List�Optional�Tuple�Union�)�config)�
AnchorElement�ListElement)�OutCallback)
�dumb_css_parser�
element_style�	escape_md�escape_md_section�google_fixed_width_font�google_has_height�google_list_style�google_text_emphasis�hn�list_numbering_start�pad_tables_in_text�skipwrap�unifiable_n)i�r�cs�eZdZddejfeeeedd��fdd�
Z	edd��fdd�Z
eed�d	d
�Zedd�dd
�Zed�dd�Z
edd�dd�Zedd�dd�Zeeeeeefdd�dd�Zedd�dd�Zeeeefeed�dd�Zeeeefeeefdd�d d!�Zeeeeefedd"�d#d$�Zdd�d%d&�Zdd�d'd(�Zdd�d)d*�Zd=eeeeefdd,�d-d.�Zd>eedd/�d0d1�Zeed2�d3d4�Zeed�d5d6�Z eeefed7�d8d9�Z!eed:�d;d<�Z"�Z#S)?�	HTML2TextN�)�out�baseurl�	bodywidth�returncs
t�jdd�d|_d|_d|_tj|_tj|_	tj
|_||_tj
|_tj|_tj|_tj|_tj|_tj|_tj|_tj|_tj|_tj|_ tj!|_"tj#|_$d|_%d|_&d|_'d|_(tj)|_*tj+|_,d|_-tj.|_/tj0|_1tj2|_3tj4|_5tj6|_7d|_8tj9|_:tj;|_<|du�r|j=|_>n||_>g|_?d|_@d|_Ad|_Bd|_Cd|_Dg|_Eg|_Fd|_Gd|_HtI�Jd	�|_Kd|_Lg|_Md|_Nd|_Od|_Pd|_Qd|_Rd
|_Sd|_Td|_Ud|_Vi|_Wg|_Xd|_Yd|_Zd|_[d|_\d|_]i|_^||__d|_`d|_ad
|_bd
|_cdtjdd<dS)
z�
        Input parameters:
            out: possible custom replacement for self.outtextf (which
                 appends lines of text).
            baseurl: base URL of the document we process
        F)Zconvert_charrefsr�*�_z**NTz^[a-zA-Z+]+://r�&nbsp_place_holder;�nbsp)e�super�__init__�
split_next_td�td_count�table_startr	ZUNICODE_SNOB�unicode_snobZESCAPE_SNOB�escape_snobZLINKS_EACH_PARAGRAPH�links_each_paragraph�
body_widthZSKIP_INTERNAL_LINKS�skip_internal_linksZINLINE_LINKS�inline_linksZ
PROTECT_LINKS�
protect_linksZGOOGLE_LIST_INDENT�google_list_indentZIGNORE_ANCHORS�ignore_linksZ
IGNORE_IMAGES�
ignore_imagesZIMAGES_AS_HTML�images_as_htmlZ
IMAGES_TO_ALT�
images_to_altZIMAGES_WITH_SIZE�images_with_sizeZIGNORE_EMPHASIS�ignore_emphasisZ
BYPASS_TABLES�
bypass_tablesZ
IGNORE_TABLES�
ignore_tables�
google_doc�ul_item_mark�
emphasis_mark�strong_markZSINGLE_LINE_BREAK�single_line_breakZUSE_AUTOMATIC_LINKS�use_automatic_links�hide_strikethroughZ	MARK_CODE�	mark_codeZWRAP_LIST_ITEMS�wrap_list_itemsZ
WRAP_LINKS�
wrap_linksZ
PAD_TABLES�
pad_tablesZDEFAULT_IMAGE_ALT�default_image_alt�tag_callbackZ
OPEN_QUOTE�
open_quoteZCLOSE_QUOTE�close_quote�outtextfr�outtextlist�quiet�p_p�outcount�start�space�a�astack�maybe_automatic_link�
empty_link�re�compile�absolute_url_matcher�acount�list�
blockquote�pre�startpre�code�quote�	br_toggle�	lastWasNL�lastWasList�style�	style_def�	tag_stack�emphasis�drop_white_space�inheader�
abbr_title�	abbr_data�	abbr_listr�stressed�preceding_stressed�preceding_data�current_tag�	UNIFIABLE)�selfrrr��	__class__��6/usr/lib/python3.9/site-packages/html2text/__init__.pyr&%s�

�zHTML2Text.__init__)�datar cs|�dd�}t��|�dS)Nz</' + 'script>z	</ignore>)�replacer%�feed)rortrprrrsrv�szHTML2Text.feedcCs8|�|�|�d�|�|���}|jr0t|�S|SdS)Nr)rv�optwrap�finishrDr)rortZmarkdownrrrrrs�handle�s

zHTML2Text.handle)�sr cCs"|j�|�|r|ddk|_dS)N����
)rJ�appendr_)rorzrrrrrsrI�szHTML2Text.outtextf)r cCsX|��|��|jddd�d�|j�}|jr>tjjd}nd}|�	d|�}g|_|S)Nr�end��forceznbsp;� r#)
�close�pbr�o�joinrJr*�html�entities�html5ru)roZouttextr$rrrrrsrx�szHTML2Text.finish)�cr cCs|�|�|�d�dS�NT)�handle_data�charref)ror�rrrrrs�handle_charref�szHTML2Text.handle_charrefcCs|�|�}|r|�|d�dSr�)�	entityrefr�)ror��refrrrrrs�handle_entityref�s
zHTML2Text.handle_entityref)�tag�attrsr cCs|j|t|�dd�dS)NT�rN)�
handle_tag�dict)ror�r�rrrrrs�handle_starttag�szHTML2Text.handle_starttag)r�r cCs|j|idd�dS)NFr�)r�)ror�rrrrrs�
handle_endtag�szHTML2Text.handle_endtag)r�r cCs�d|vrdSd}t|j�D]p\}}d|jvr~|jd|dkr~d|jvsPd|vrzd|jvr~d|vr~|jd|dkr~d}nd}|r|SqdS)z�
        :type attrs: dict

        :returns: The index of certain set of attributes (of a link) in the
        self.a list. If the set of attributes is not found, returns None
        :rtype: int
        �hrefNF�titleT)�	enumeraterPr�)ror��match�irPrrrrrs�
previousIndex�s"���
zHTML2Text.previousIndex)rN�	tag_style�parent_styler cCs�t|�}t|�}d|vo|j}d}tjD]}||vo:||v}|r(qFq(d|voTd|v}	t|�ont|�on|j}
|�r|s�|	s�|
r�|jd7_|r�|jd7_|	r�|�|j	�|j
d7_
|r�|�|j�|j
d7_
|
�r�|�d�|j
d7_
d|_n�|�s|	�s|
�r*|jd8_d|_
|
�rX|j
�rH|j
d8_
n
|�d�d|_|�r�|j
�rv|j
d8_
n|�|j�|	�r�|j
�r�|j
d8_
n|�|j	�|�s�|	�r�|j�s�|�d�|�r�|jd8_dS)	z/
        Handles various text emphases
        zline-throughF�italicr�`Tr�N)rr@r	ZBOLD_TEXT_STYLE_VALUESrrZrdrKr�r<rer=r\rO)rorNr�r�Ztag_emphasisZparent_emphasisZ
strikethroughZboldZbold_markerr��fixedrrrrrs�handle_emphasis�sb
��


zHTML2Text.handle_emphasis)r�r�rNr cCs�||_|jdur(|�||||�dur(dS|rb|jdurb|dvrb|dksL|jrb|�d�d|_d|_|jr�i}|r�|jr�|jdd}t||j	|�}|j�
|||f�n4|jr�|j��ndiif\}}}|jr�|jdd}t|��r|�
�|�rd|_|�t|�dd	�n
d|_dS|d
v�rl|j�rP|�rFt|��rF|�
�n|��n|j�rd|dk�rdn|�
�|dk�r�|�r�|jd
k�r�|�d�n
|�d�|dk�r�|�r�|�
�|�d�|�
�|dv�r�|�r�|jd7_n|jd8_|dk�r$|�r|jd7_n|jd8_|dv�r4d
|_|dk�r�|�rp|�
�|jddd�d|_|jd7_n|jd8_|�
�ttd�dd�}|dv�r�|j�s�|�r�||��r�d	|j}n|j}|�|�|�r�d|_|dv�r*|j�s*|�r||��rd	|j}	n|j}	|�|	�|�r*d|_|dv�rd|�rJ||��rJd}
nd }
|�|
�|�rdd|_|j�r�|j�s�|�|||�|d!v�r�|j�s�|�d"�|j|_|d#k�r|�r�d|_d$|_ d%|v�r|d%|_n6|jdu�r
|j du�s�J�|j|j!|j <d|_d|_ |d&k�rF|j"�s0|�|j#�n|�|j$�|j"|_"dVtt%t%dd'�d(d)�}|d*k�r�|j&�s�|�r�d+|v�r�|d+du�r�|j'�r�|d+�(d��s�|j�
|�|d+|_d|_|j)�r�d,|d+d-|d+<n|j�
d�n�|j�r�|j��}|j�r|j�sd|_n�|�r�|d+du�s.J�|j�rL|�d�d|_d|_|j*�r~|�+d%��pbd$}
t,|
�}
|||d+|
�nb|�-|�}|du�r�|j.|}n*|j/d7_/t0||j/|j1�}|j.�
|�|�d.t%|j2�d/�|dk�r4|�r4|j�s4d0|v�r4|d0du�sJ�|j3�s(|d0|d+<|�+d1��p8|j4}|j5�s^|j6�r�d2|v�s^d3|v�r�|�d4|d0d5�d2|v�r�|d2du�s�J�|�d6|d2d5�d3|v�r�|d3du�s�J�|�d7|d3d5�|�r�|�d8|d5�|�d9�dS|jdu�rj|j}|j3�rTt,|�|k�rT|j7�8|��rT|�d,t,|�d-�d|_dS|�d�d|_d|_|j3�r�|�t,|��n�|�d:t,|�d/�|j*�r�|�+d+��p�d$}|�d;t,t9�:|j;|��d<�nb|�-|�}|du�r�|j.|}n*|j/d7_/t0||j/|j1�}|j.�
|�|�dt%|j2�d/�|d=k�rL|�rL|�
�|d>k�rd|�sd|�<�|d?k�r~|�r~|�d@�|d?k�r�|�s�|�<�|dAv�	r$|j=�s�|j>�s�|�
�|�r�|j�r�t?|�}n|}t@|�}|j=�
tA||��n,|j=�	r|j=��|j�	s|j=�	s|�dB�d|_>nd|_>|dCk�	r�|�<�|�	r�|j=�	rV|j=d}n
tAdDd
�}|j�	rt|�B|�}n
tC|j=�}|�dE|�|jDdDk�	r�|�|jEd	�n.|jDdFk�	r�|jFd7_F|�t%|jF�dG�d|_|dHv�r�|jG�
r|dIk�r�|�
rn|��n�n�|jH�
r�|�
r&|��|dJv�
rZ|�
rH|�dK�I|��n|�dL�I|��n(|�
rr|�dM�I|��n|�dN�I|���n|dOk�
r�|�
r�d|_J|jK�
r�|�d,tLjMd-�|�d�n&|jK�
r�|�dPtLjMd-�|�d�|dJv�r|�r|jN�r|�dQ�d|_N|dIk�r(|�r(d
|_O|dIk�rF|�sFd|_N|��|dIk�r�|�s�|jJ�r�|�dR�PdSg|jO��|��d|_J|dJv�r�|�r�|jOd7_O|dTk�r�|�r�d|_Qd|_nd|_|jR�r�|�SdU�|�
�dS)WNT)�p�divra�dl�dtZimg�[Fr{��#r�)r�r�r��brrz  
> �  
�hrz* * *)�headraZscriptrra)�bodyrY�> r)ror cSst|jot�d|jd��S)Nz[^\s]r{)�boolrlrTr��rorrrrrs�no_preceding_space�s�z0HTML2Text.handle_tag.<locals>.no_preceding_space)Zemr��u)�strong�b)�del�strikerzz ~~z~~)Zkbdr\�ttr��abbrrr��q)ro�linkr�r cSs@t�|j|�}|��r d�|�nd}|�djt|�|d��dS)Nz "{}"rz]({url}{title}))�urlr�)�urlparse�urljoinr�strip�formatr�r)ror�r�r�rrrrrs�link_url�sz&HTML2Text.handle_tag.<locals>.link_urlrPr��<�>z][�]�src�alt�widthZheightz
<img src='z' zwidth='zheight='zalt='z/>z![�(�)r�r��dd�    )�ol�ulr|�lir��  r�z. )�table�tr�td�thr�)r�r�z<{}>

z
</{}>z<{}>z</{}>r�z</z| �|z---rZz
[/code])r)TrmrFrRr3r�rSr:rcrrbr}�poprr�rfr�soft_brrQrYrKrarNrr�r7r<rjr=r�rZr\rgrhrir]rGrH�strr2r.�
startswithr0r/�getrr�rPrWr
rM�countr5rEr4r6rVr�r�r�rr�rXr`rrr�google_nest_count�len�namer;Znumr9r8r�r)rDr	ZTABLE_MARKER_FOR_PADr'r(r�r[rAr)ror�r�rNr�r�Zdummyr�rdr�r�r�rPr�r�Za_propsr�r�Z
list_styleZnumbering_startr��
nest_countrrrrrsr�)sH
�����
�



















�
���





���


�
�
�
�


















zHTML2Text.handle_tagcCs|jdkrd|_dS)zPretty print has a line breakrrN)rLr�rrrrrsr��s
z
HTML2Text.pbrcCs|jr
dnd|_dS)z Set pretty print to 1 or 2 linesrr�N)r>rLr�rrrrrsr��szHTML2Text.pcCs|��d|_dS)zSoft breaksr�N)r�r^r�rrrrrsr��szHTML2Text.soft_brF)rt�puredatar�r c
	Cs2|jdur|j|7_|j�s.|jrR|��}|jrD|jsD|jsD|}|dkrRd|_|r�|js�t�dd|�}|r�|ddkr�d|_	|dd�}|s�|s�dS|j
r�|�d�s�|�d	�s�d|}|jr�|�
d
�d|_d|j}|r�|r�|ddks�|jr�|d7}|j�r6|j�s|d7}|dt|j�7}|�dd|�}|j
�rVd
|_
|j�rV|�d�}|j�rpd
|_	d|_d
|_|dk�r�d|_|�
d�d
|_	|j�r�|�
|jd||j�d
|_	d|_|j	�r�|j�s�|�
d�d
|_	|j�r�|jdk�r�|j�s|dk�r�|dk�r|�
d�g}|jD]�}|j|jk�r�|�
dt|j�dt�|j|jd��d|jv�r�|jddu�s�J�|�
d|jdd�|�
d�n
|�|��q"|j|k�r�|�
d�||_|j �r|dk�r|j �!�D]$\}}	|�
d|d|	d��q�d|_|�
|�|jd7_dS)z6
        Deal with indentation and whitespace
        Nrrz\s+r�Trr|z
z
[code]r�r�Fr~r�z   [z]: r�r�z (r�z  *[)"rhrKr:�lstriprerZr\rT�subrOr[r�rArrLrYrXr�rurNr^r_rPr,rMr�r�r�r�rr�r}ri�items)
rortr�r�Zlstripped_dataZbqZnewar�r�Z
definitionrrrrrsr��s�







���


����

zHTML2Text.o)rt�entity_charr cCs|sdS|jr$|��}d|_d|_n8|jr\t�d|d�rVt|j�sV|jdvrVd|}d|_|jrr|j�	t
|��|jdur�|j}||kr�|j�|�r�|j
r�|�d|d�d|_dS|�d	�d|_d|_|js�|js�|s�t||jd
�}||_|j|dd�dS)NFTz[^\s.!?]r)rPr\rZr�r�r�r�)Zsnob)r�)rjr�rkrTr�rrmrarb�updater
rRrVr?r�rSr\rZrr+rl)rortr�r�rrrrrsr�!sF���
�
��
zHTML2Text.handle_data)r�r cCsb|ddvr t|dd�d�}nt|�}|js>|tvr>t|Sz
t|�WSty\YdS0dS)Nr)�x�Xrrr)�intr*r�chr�
ValueError)ror�r�rrrrrsr�Ls
zHTML2Text.charrefcCsd|js|tjvrtj|Sztjj|d}WntyLd|dYS0|dkr`tj|S|S)N�;�&r$)r*r	rnr�r�r��KeyError)ror�Zchrrrrrsr�Zs
zHTML2Text.entityref)rar cCs*d}d|vr&t|ddd��|j}|S)zq
        Calculate the nesting count of google doc lists

        :type style: dict

        :rtype: int
        rzmargin-leftN���)r�r1)rorar�rrrrrsr�cszHTML2Text.google_nest_count)�textr cCs|js
|Sd}d}|jsd|_|�d�D]�}t|�dkr�t||j|j�s�d}|�d|j�rbd}n|�d�rpd}t	||jd|d�}|d�
|�7}|�d�r�|d	7}d
}q�|r�|d7}d
}q�|d7}d}q�tj
�|�s�||d7}d
}q(|dkr(|d7}|d
7}q(|S)
zi
        Wrap all paragraphs in the provided text.

        :type text: str

        :rtype: str
        rrFr|r�r�r�)Zbreak_long_words�subsequent_indentr�rz

r�)r-rCr/�splitr�rrBr�r;rr��endswithr	ZRE_SPACEr�)ror��result�newlinesZpara�indent�wrappedrrrrrsrwqsH
�

zHTML2Text.optwrap)FF)F)$�__name__�
__module__�__qualname__r	�
BODY_WIDTHrrr�r�r&rvryrIrxr�r�rrr�r�rr�r�r�r�r�r�r�rr�r�r�r�r�rw�
__classcell__rrrrrprsr$sN��d	" �J�{��q+	rr)r�rrr cCs$|durtj}t||d�}|�|�S)N)rr)r	r�rry)r�rr�hrrrrrs�	html2text�sr�)rN)+�__doc__Z
html.entitiesr�Zhtml.parserrT�urllib.parse�parser��textwrapr�typingrrrrrrr	�elementsr
rrZutilsr
rrrrrrrrrrrr�__version__�parserZ
HTMLParserrr�r�r�rrrrrrrs�<module>s(<