;ò
x$„@c           @   s—  d  Z  d Z d k Z d k Z d k Z d k Td k Z d k Z d k Z d k	 Z	 d k
 Z
 d f  d „  ƒ  YZ e d j od e i j p d e i j o d	 GHe i d
 ƒ n d e i j oN e d
 ƒ Z x? n o3 e i i ƒ  Z d e i e i e ƒ ƒ GHd GHq× Wn d e i j o
 d Z n d
 Z d e i j o
 d Z n d
 Z d e i j o
 d Z n d
 Z d GHe o	 d GHn e o	 d GHn e o	 d GHn e e ƒ Z d GHd GHd GHd GHy£ xœ n o” d Z y e d ƒ Z Wn
 ‚  n Xe i ƒ  Z e o d e i e ƒ GHn d e i e d
 e ƒ GHe i ƒ  Z d Ge e e e d ƒ ƒ Gd GHqÑWWq“e j
 o d GHe i d
 ƒ q“Xn d S(    s   Hugo Liu <hugo@media.mit.edu>s   1.3N(   s   *s   MontyTaggerc           B   sb   t  Z e Z e Z e Z e Z d Z d d „ Z d d d „ Z	 d d „ Z
 d d „ Z d „  Z RS(   Ni    c         C   s~   | |  _  t i ƒ  |  _ g  } t i | ƒ |  _ t | ƒ d j o t i ƒ  |  _ n t	 i	 |  i ƒ |  _
 t i ƒ  |  _ d  S(   Ni    (   s   trace_ps   selfs	   Tokenizers   theTokenizers   notifys   LexiconFasts
   theLexicons   lens   LexiconEfficients   LexicalRuleParsers   theLRPs   ContextualRuleParsers   theCRP(   s   selfs   trace_ps   notify(    (    s   ./omcsnet/MontyTagger.pys   __init__   s    	c         C   s/   |  i i | | ƒ } |  i | | ƒ } | Sd  S(   N(	   s   selfs   theTokenizers   tokenizes   texts   expand_contractions_ps	   tokenizeds   tag_tokenizeds	   all_pos_ps   output(   s   selfs   texts   expand_contractions_ps	   all_pos_ps	   tokenizeds   output(    (    s   ./omcsnet/MontyTagger.pys   tag   s    c         C   s>  g  } t i | ƒ } xÒ | D]Ê } d | j o* | | i d ƒ i ƒ  | | i d ƒ j o" | i d ƒ \ } } | g }	 n |  i i	 | ƒ }	 |	 g  j o d } |	 i d ƒ n |	 d } | i h  d | <d | <d |	 <ƒ q Wh  d d <d d <d g  <} | i d | i ƒ  ƒ | i | i ƒ  ƒ |  i o d	 G|  i | ƒ GHn x£ t t | ƒ ƒ D] } | | }
 |
 d d j o qgn |
 d d t i j o d
 | | d <n d | | d <|  i i | | ƒ d | | d g | | d <qgW|  i o d G|  i | ƒ GHn |  i i | ƒ |  i | | ƒ Sd  S(   Ns   /s   UNKi    s   words   poss   all_poss	   S-T-A-R-Ts   STAARTs(   TRACE: [output after lexicon lookup]:
  s   NNPs   NNs4   TRACE: [output after lexical rules were applied]:
  (   s   text_arrs   strings   splits   texts   tokss   words   indexs   uppers   apriori_poss   all_poss   selfs
   theLexicons   poss   appends   boundary_tokens   inserts   copys   trace_ps   form_outputs   ranges   lens   is	   word_dicts	   uppercases   theLRPs   apply_all_ruless   theCRPs   apply_rules_to_all_words_brills	   all_pos_p(   s   selfs   texts	   all_pos_ps   tokss   words   boundary_tokens   text_arrs   is   poss   all_poss	   word_dicts   apriori_pos(    (    s   ./omcsnet/MontyTagger.pys   tag_tokenized   s@     :
,!
 
 
c         C   sÞ   d }
 x¾ | d d !D]¯ } | d } | d } | ox | d } g  }	 x, | D]$ } | | j o |	 i	 | ƒ qL qL W|	 } | g | } |
 | d t i | d ƒ d 7}
 q |
 | d | d 7}
 q Wt i |
 ƒ }
 |
 Sd  S(	   Ns    i   iÿÿÿÿs   words   poss   all_poss   /s    (   s   outputs   text_arrs	   word_dicts   words   theposs	   all_pos_ps   all_poss   filtereds   ps   appends   pos_arrs   strings   joins   strip(   s   selfs   text_arrs	   all_pos_ps   words   all_poss   pos_arrs   theposs   ps	   word_dicts   filtereds   output(    (    s   ./omcsnet/MontyTagger.pys   form_output@   s$     


 &c   
      C   s2  g  } t i | ƒ } xŒ | D]„ } t i | d ƒ } | d } | d } |  i
 i | ƒ }	 |	 g  j o |	 i d ƒ n | i h  d | <d | <d |	 <ƒ q Wh  d d <d d	 <d g  <} | i d | i ƒ  ƒ | i | i ƒ  ƒ |  i o d
 G|  i | ƒ GHn |  i i | ƒ |  i | t ƒ Sd  S(   Ns   /i    i   s   UNKs   words   poss   all_poss	   S-T-A-R-Ts   STAARTs   TRACE: [inputted as]:
  (   s   text_arrs   strings   splits   taggeds   tokss   word_poss   word_pos_splits   words   poss   selfs
   theLexicons   all_poss   appends   boundary_tokens   inserts   copys   trace_ps   form_outputs   theCRPs   apply_rules_to_all_words_brills	   all_pos_p(
   s   selfs   taggeds   text_arrs   word_poss   words   boundary_tokens   tokss   poss   word_pos_splits   all_pos(    (    s   ./omcsnet/MontyTagger.pys   verify_and_repairS   s$     

,!
(   s   __name__s
   __module__s   Nones   theTokenizers
   theLexicons   theLRPs   theCRPs   trace_ps   __init__s   tags   tag_tokenizeds   form_outputs   verify_and_repair(    (    (    s   ./omcsnet/MontyTagger.pys   MontyTagger   s   
#s   __main__s   /?s   -?s6  
        USAGE: >> python MontyTagger.py [-trace] [-allpos] [-repair]
        -trace   shows intermediary steps and debug messages
        -allpos  displays all plausible POS tags, ranked
        -repair  in repair mode, enter tagged text at the
                 prompt, monty will attempt to fix the tags
    i    s
   -noverbosei   s   
s   --

s   -traces   -allposs   -repairs   
***** INITIALIZING ******s   TRACE is on!s   ALL POS is on!s   REPAIR MODE is on!s   *************************
s   MontyTagger v1.2s*   --send bug reports to hugo@media.mit.edu--s    s   > s   
REPAIRED: s   -- monty tooki   s   seconds. --
s   
-- monty says goodbye! --(   s
   __author__s   __version__s   syss   strings   times   typess	   Tokenizers   LexiconEfficients   LexicalRuleParsers   ContextualRuleParsers   LexiconFasts   MontyTaggers   __name__s   argvs   exits   ms   stdins   readlines   sentences   strips   tags   trace_ps	   all_pos_ps   repair_ps	   raw_inputs   time1s   verify_and_repairs   time2s   strs   rounds   KeyboardInterrupt(   s   sentences	   all_pos_ps   time1s   time2s   __version__s   trace_ps   strings
   __author__s   syss   MontyTaggers   LexicalRuleParsers   repair_ps	   Tokenizers   ms   LexiconEfficients   ContextualRuleParsers   LexiconFasts   time(    (    s   ./omcsnet/MontyTagger.pys   ?   sl   -`  


 	 	 	 )