;ò
~$„@c           @   sB   d  k  Z  d  k Z d  k Z d  k Z d k Td f  d „  ƒ  YZ d  S(   N(   s   *s	   Tokenizerc        †   B   s  t  Z d „  Z d d „ Z h  d d <d d <d d	 <d
 d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d  d! <d" d# <d$ d% <d& d' <d( d) <d* d+ <d, d- <d. d/ <d0 d1 <d2 d3 <d4 d5 <d6 d7 <d8 d9 <d: d; <d< d= <d> d? <d@ dA <dB dC <dD dE <dF dG <dH dI <dJ dK <dL dM <dN dO <dP dQ <dR dS <dT dU <dV dW <dX dY <dZ d[ <d\ d] <d^ d_ <d` da <Z h  d db <d dc <d dd <d
 de <d df <d dg <d dh <d di <d dj <dk dl <d dm <d dn <d do <d dp <d dq <d  dr <d" ds <d$ dt <d& du <d( dv <d* dw <dx dw <d, dy <d. dz <d0 d{ <d2 d| <d4 d} <d~ d <d6 d€ <d8 d <d‚ d <d: dƒ <d< d„ <d… d† <d> d‡ <d@ dˆ <dB d‰ <dD dŠ <dF d‹ <dH dŒ <dJ d <dL dŽ <d dŽ <dN d <dP d‘ <dR d’ <dT d“ <d” d• <dV d– <dX d— <d˜ d— <dZ d™ <d\ dš <d^ d› <d` dœ <d dž <dŸ d{ <d  d¡ <d¢ d£ <d¤ d¥ <d¦ d§ <d¨ d© <dª d« <d¬ d­ <d® d¯ <d° do <d± d² <d³ d´ <dµ d¶ <d· d¸ <d¹ dº <d» d¼ <d½ d¾ <d¿ dÀ <Z dÁ dÂ dÃ dÄ dÅ dÆ dÇ dÈ dÉ dÊ dË dÌ dÍ dÎ dÏ dÐ dÑ dÒ dÓ dÔ dÕ dÖ d× dØ dÙ dÚ dÛ dÜ dÝ dÞ dß dà dá dâ dã dä då dæ dç dè dé dê dë dì dí dî dï dð dñ dò dó dô dõ dö d÷ dø dù dú dû dü dý dþ dÿ d ddddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFg† Z RS(G  Nc         C   s   d  S(   N(    (   s   self(    (    sN   C:\Documents and Settings\Administrator\Desktop\pyCSVoice\omcsnet\Tokenizer.pys   __init__   s    i    c         C   sœ  d | d } d d d d d d d d	 d
 d d d d d d d d d d d d d d g }	 d d d d d g } x+ |	 D]# } t i | | d | d ƒ } qu Wt i | ƒ } xt t	 | ƒ ƒ D]} | | } | i ƒ  |  i j o q¾ n t i d | ƒ }
 t |
 ƒ t j o q¾ n t i d | ƒ }
 t |
 ƒ t j o; |
 i d  ƒ d j o | | d!  d d | | <q¾ q¾ n x3 | D]+ } t i | | | d | d ƒ | | <q~Wt i | | ƒ | | <q¾ Wt i | d ƒ } | o |  i } n
 |  i } d" } x" | i ƒ  D] } | | d 7} q
W| d!  } | d# 7} d$ } xÖ | oÎ d% } t i | t i | ƒ ƒ }
 t |
 ƒ t j o– | |
 i d& ƒ } | |
 i  d' ƒ t i! j o | d% i" ƒ  | d$ } n | d% i ƒ  | d$ } | |
 i  d' ƒ  | | |
 i# d( ƒ } d$ } q?q?Wt i | d) d* ƒ } t i | d+ d, ƒ } | o t i | d- d. ƒ } n t i | d- d/ ƒ } | o t i | d0 d1 ƒ } n | Sd  S(2   Ns    s   `s   ^s   *s   =s   +s   |s   \s   [s   ]s   }s   {s   ,s   !s   ?s   #s   &s   (s   )s   "s   >s   <s   ~s   ;s   .s   @s   /s   -s   :s   ^([A-Z][.])+$s,   ^[$][0-9]{1,3}[.][0-9][0-9](?P<period>[.]?)$s   periodiÿÿÿÿs    (?P<begin>)(?P<word>s   )(?P<end>) i   i    s   words   begins   ends   's s    's s   'd s    'd s   'll s    will s    'll s    i s    I ($   s   sentences   punctuations   special_punctuations   ps   strings   replaces   splits   tokss   ranges   lens   is   toks   lowers   selfs   common_abbrev_and_acros   res   searchs   ms   types   NoneTypes   groups   strips   joins   expand_contractions_ps   contractions_unwounds   contractionss   contractions_separateds   regexps   keyss   words   dirtyBits   replace_withs   starts	   uppercases   uppers   end(   s   selfs   sentences   expand_contractions_ps   tokss   contractionss   words   regexps   special_punctuations   is   punctuations   ms   replace_withs   ps   dirtyBits   tok(    (    sN   C:\Documents and Settings\Administrator\Desktop\pyCSVoice\omcsnet\Tokenizer.pys   tokenize	   sl    K ! 
 )	 

 (s   ain'ts   ai n'ts   aren'ts   are n'ts   isn'ts   is n'ts   wasn'ts   was n'ts   weren'ts   were n'ts   didn'ts   did n'ts   doesn'ts   does n'ts   don'ts   do n'ts   hadn'ts   had n'ts   hasn'ts   has n'ts   haven'ts   have n'ts   can'ts   ca n'ts   couldn'ts	   could n'ts   needn'ts   need n'ts	   shouldn'ts
   should n'ts   shan'ts   sha n'ts   won'ts   wo n'ts   wouldn'ts	   would n'ts   i'ms   i 'ms   you'res   you 'res   he'ss   he 'ss   she'ss   she 'ss   it'ss   it 'ss   we'res   we 'res   they'res   they 'res   i'ves   i 'ves   you'ves   you 'ves   we'ves   we 'ves   they'ves   they 'ves   who'ves   who 'ves   what'ves   what 'ves   when'ves   when 'ves   where'ves	   where 'ves   why'ves   why 'ves   how'ves   how 'ves   i'ds   i 'ds   you'ds   you 'ds   he'ds   he 'ds   she'ds   she 'ds   we'ds   we 'ds   they'ds   they 'ds   i'lls   i 'lls   you'lls   you 'lls   he'lls   he 'lls   she'lls   she 'lls   we'lls   we 'lls   they'lls   they 'lls   ai nots   are nots   is nots   was nots   were nots   did nots   does nots   do nots   had nots   haftas   have tos   has nots   have nots   can nots	   could nots   need nots
   should nots	   shall nots   will nots	   would nots   I ams   you ares   u'res   he iss   she iss   it iss   we ares   they ares   y'all'res   you all ares   I haves   you haves   u'ves   we haves	   they haves   y'all'ves   you all haves   who haves	   what haves	   when haves
   where haves   why haves   how haves   I woulds	   you woulds   u'ds   he woulds	   she woulds   we woulds
   they woulds   y'all'ds   you all woulds   I wills   you wills   u'lls   he wills   she wills   we wills	   they wills   y'all'lls   you all wills   'tiss   'twass   it wass   'tweres	   they weres   'twoulds   it woulds   y'alls   you alls   i'd'ves   I would haves   aints   am nots   aintchas   are you nots   c'mons   come ons   cannots   dunnos   do not knows   gimmes   give mes   gonnas   going tos   gottas   got tos   oughtas   ought tos   wannas   want tos   yas   yous   urs   yours   mr.s   mrs.s   ms.s   sr.s   esq.s   jr.s   dr.s   s.b.s   ph.d.s   m.d.s   m.eng.s   m.f.a.s   d.d.s.s   sc.d.s   b.s.s   b.sc.s   b.a.s   a.b.s   m.a.s   c.p.a.s   prof.s   capt.s   col.s   gen.s   sgt.s   lt.s   priv.s   ft.s   nav.s   a.f.s   u.s.a.f.s
   a.f.b.i.e.s   etc.s   e.g.s   c.f.s   p.s.s   q.e.d.s   i.s   ii.s   iii.s   iv.s   v.s   vi.s   vii.s   viii.s   ix.s   x.s   a.m.s   p.m.s   morn.s   eve.s   corp.s   inc.s   co.s   ltd.s   reg.s   u.p.s.s   u.s.p.s.s   fedex.s   i.b.m.s   a.o.l.s   jan.s   feb.s   febr.s   mar.s   apr.s   may.s   jun.s   jul.s   aug.s   sep.s   sept.s   oct.s   nov.s   dec.s   ala.s   ariz.s   ark.s   calif.s   colo.s   conn.s   del.s   d.c.s   fla.s   ga.s   ill.s   ind.s   kans.s   ky.s   la.s   md.s   mass.s   mich.s   minn.s   miss.s   mo.s   nebr.s   nev.s   n.h.s   n.j.s   n.m.s   n.y.s   n.c.s   n.d.s   oklas   ore.s   pa.s   p.r.s   r.i.s   s.c.s   s.d.s   tenn.s   tex.s   vt.s   va.s   v.i.s   wash.s   w.va.s   wis.s   wyo.s   v.c.r.s   v.h.s.s   d.v.d.s   v.c.d.s   c.d.s   tele.s   tv.s   t.v.s   p.c.s   d.s.l.s   a.s.a.p.s   r.s.v.p.s   n.y.c.s   c.o.d.(   s   __name__s
   __module__s   __init__s   tokenizes   contractions_separateds   contractions_unwounds   common_abbrev_and_acro(    (    (    sN   C:\Documents and Settings\Administrator\Desktop\pyCSVoice\omcsnet\Tokenizer.pys	   Tokenizer   s   	6ÿ ®1ÿ ÿ ¢L(   s   syss   strings   oss   res   typess	   Tokenizer(   s   syss   res   oss   strings	   Tokenizer(    (    sN   C:\Documents and Settings\Administrator\Desktop\pyCSVoice\omcsnet\Tokenizer.pys   ?   s   $