# Pantera - Web Pen-Test Proxy
#
# FILENAME      : panteraSnitch.py
# CODER         : Simon Roses Femerling
# DATE          : 08/11/2006
# LAST UPDATE   : 08/1/2006
# ABSTRACT      : Python Web Pen-Test Proxy :)
#                 Pantera "The Snitch"
#
# - Roses Labs Innovations (RL+I)
# Roses Labs
# http://www.roseslabs.com
#
# Copyright (c) 2003-2006 Roses Labs.
#
# You may not distribute, transmit, repost this software for commercial 
# purposes without Roses Labs written permission. 
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, publish,
# distribute the Software, and to permit persons to whom the Software 
# is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#

'''
@author:       Simon Roses Femerling
@license:      GNU General Public License 2.0 or later
@contact:      pantera.proxy@gmail.com
@organization: OWASP / Roses Labs
'''

import panterautils
import os
import sys
import cPickle
import re
import sets
import threading
import Queue

#############################################################################################
# Our Functions
#############################################################################################

class PanteraSnithStore:
    '''
    Pantera Snitch Store Class.
    '''
        
    def __init__(self):
        '''
        Init class.
        '''
        self._id = -1
        self._urls = ''
        self._comment = ''
        self._script = ''
        
    def InsertStore(self, id, u,c,s):
        '''
        Insert data into store.
        
        @type id: int
        @param id: Link ID
        @type u: list
        @param u: List of urls
        @type c: list
        @param c: List of comments
        @type s: list
        @param s: List of Scripts
        '''
        self._id = id
        self._urls = u
        self._comment = c
        self._script = s
            
    def ReturnURLS(self):
        '''
        Return URLS.
        
        @return: Return list.
        '''
        return self._urls
        
    def ReturnComment(self):
        '''
        Return Comments.
        
        @return: Return list.
        '''
        return self._comment
    
    def ReturnScript(self):
        '''
        Return Scripts.
        
        @return: Return list.
        '''
        return self._script
    
    def ReturnID(self):
        '''
        Return ID.
        
        @return: Return number.
        '''
        return self._id
    
#############################################################################################
# FUNC     : 
# PARAMS   : 
# RETURN   : 
# ABSTRACT : 
class PanteraSnitch:
    '''
    Pantera Snitch Class.
    '''
    
    #############################################################################################
    # FUNC     : 
    # PARAMS   : 
    # RETURN   : 
    # ABSTRACT : 
    def __init__(self,data):
        '''
        Init class.
        
        @type data: string
        @param data: Data to be parser by snitch.
        '''
        self._data = data
        self._urls = sets.Set([])
        self._comment = sets.Set([])
        self._script = sets.Set([])

    def ReturnURLS(self):
        '''
        Return URLS.
        
        @return: Return list.
        '''
        return self._urls
        
    def ReturnComment(self):
        '''
        Return Comments.
        
        @return: Return list.
        '''
        return self._comment
    
    def ReturnScript(self):
        '''
        Return Script.
        
        @return: Return list.
        '''
        return self._script
            
    def BeginSnitch(self):
        '''
        Begin parsing data.
        
        @return: Return 0 on success.
        '''
        self.url_re = re.compile(r"""(?:https?)://(?:[^"'><\s]*)""",re.IGNORECASE)
        #self.url_re = re.compile(r'\\"https?://([^\\"])*',re.IGNORECASE)
        #self.url_re = re.compile(r"""('|")((?:https?)://[^"'><]*)\1""",re.IGNORECASE)
        #self.url_re = re.compile(r"""((?:https?)://(?:[^ \n\r<\)]+))(\s)""",re.IGNORECASE)

        #self.form_re = re.compile('<form([^>.]*)>*?</form>',re.IGNORECASE | re.VERBOSE | re.DOTALL)
        self.script_re = re.compile('(<script(.*?)>.*?</script>)',re.IGNORECASE | re.VERBOSE | re.DOTALL)

        #self.comment_re = re.compile('[!--.*?--]', re.IGNORECASE | re.VERBOSE | re.DOTALL)
        #self.comment_re = re.compile('<!--[^(-->)]*-->', re.IGNORECASE | re.VERBOSE | re.DOTALL)        
        #self.comment_re = re.compile('<!--\s*(.*?)-->', re.IGNORECASE | re.VERBOSE | re.DOTALL)
        self.comment_re = re.compile('<!--(?:(?:[^-]|(?:-[^-]))*)-->', re.IGNORECASE | re.VERBOSE | re.DOTALL)                
        
        # Begin with links gathering        
        r = self.url_re.findall(self._data)
        if r:
            add_url = self._urls.add
            for i in r:
                add_url(i)
        
        # Begin with script gathering
        r = self.script_re.findall(self._data)
        if r:
            add_script = self._script.add
            for i in r:
                add_script(i)

        # Begin with comment gathering
        r = self.comment_re.findall(self._data)
        if r:
            add_comment = self._comment.add
            for i in r:
                add_comment(i)

        return 0

# RL+I EOF