Files
2014-09-23 19:35:48 +02:00

666 lines
23 KiB
Python

#!~/.wine/drive_c/Python25/python.exe
# -*- coding: utf-8 -*-
# Process memory finder
# Copyright (c) 2009-2014, Mario Vilas
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice,this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Process memory search.
@group Memory search:
Search,
Pattern,
BytePattern,
TextPattern,
RegExpPattern,
HexPattern
"""
__revision__ = "$Id$"
__all__ = [
'Search',
'Pattern',
'BytePattern',
'TextPattern',
'RegExpPattern',
'HexPattern',
]
from winappdbg.textio import HexInput
from winappdbg.util import StaticClass, MemoryAddresses
from winappdbg import win32
import warnings
try:
# http://pypi.python.org/pypi/regex
import regex as re
except ImportError:
import re
#==============================================================================
class Pattern (object):
"""
Base class for search patterns.
The following L{Pattern} subclasses are provided by WinAppDbg:
- L{BytePattern}
- L{TextPattern}
- L{RegExpPattern}
- L{HexPattern}
@see: L{Search.search_process}
"""
def __init__(self, pattern):
"""
Class constructor.
The only mandatory argument should be the pattern string.
This method B{MUST} be reimplemented by subclasses of L{Pattern}.
"""
raise NotImplementedError()
def __len__(self):
"""
Returns the maximum expected length of the strings matched by this
pattern. Exact behavior is implementation dependent.
Ideally it should be an exact value, but in some cases it's not
possible to calculate so an upper limit should be returned instead.
If that's not possible either an exception must be raised.
This value will be used to calculate the required buffer size when
doing buffered searches.
This method B{MUST} be reimplemented by subclasses of L{Pattern}.
"""
raise NotImplementedError()
def read(self, process, address, size):
"""
Reads the requested number of bytes from the process memory at the
given address.
Subclasses of L{Pattern} tipically don't need to reimplement this
method.
"""
return process.read(address, size)
def find(self, buffer, pos = None):
"""
Searches for the pattern in the given buffer, optionally starting at
the given position within the buffer.
This method B{MUST} be reimplemented by subclasses of L{Pattern}.
@type buffer: str
@param buffer: Buffer to search on.
@type pos: int
@param pos:
(Optional) Position within the buffer to start searching from.
@rtype: tuple( int, int )
@return: Tuple containing the following:
- Position within the buffer where a match is found, or C{-1} if
no match was found.
- Length of the matched data if a match is found, or undefined if
no match was found.
"""
raise NotImplementedError()
def found(self, address, size, data):
"""
This method gets called when a match is found.
This allows subclasses of L{Pattern} to filter out unwanted results,
or modify the results before giving them to the caller of
L{Search.search_process}.
If the return value is C{None} the result is skipped.
Subclasses of L{Pattern} don't need to reimplement this method unless
filtering is needed.
@type address: int
@param address: The memory address where the pattern was found.
@type size: int
@param size: The size of the data that matches the pattern.
@type data: str
@param data: The data that matches the pattern.
@rtype: tuple( int, int, str )
@return: Tuple containing the following:
* The memory address where the pattern was found.
* The size of the data that matches the pattern.
* The data that matches the pattern.
"""
return (address, size, data)
#------------------------------------------------------------------------------
class BytePattern (Pattern):
"""
Fixed byte pattern.
@type pattern: str
@ivar pattern: Byte string to search for.
@type length: int
@ivar length: Length of the byte pattern.
"""
def __init__(self, pattern):
"""
@type pattern: str
@param pattern: Byte string to search for.
"""
self.pattern = str(pattern)
self.length = len(pattern)
def __len__(self):
"""
Returns the exact length of the pattern.
@see: L{Pattern.__len__}
"""
return self.length
def find(self, buffer, pos = None):
return buffer.find(self.pattern, pos), self.length
#------------------------------------------------------------------------------
# FIXME: case insensitive compat.unicode searches are probably buggy!
class TextPattern (BytePattern):
"""
Text pattern.
@type isUnicode: bool
@ivar isUnicode: C{True} if the text to search for is a compat.unicode string,
C{False} otherwise.
@type encoding: str
@ivar encoding: Encoding for the text parameter.
Only used when the text to search for is a Unicode string.
Don't change unless you know what you're doing!
@type caseSensitive: bool
@ivar caseSensitive: C{True} of the search is case sensitive,
C{False} otherwise.
"""
def __init__(self, text, encoding = "utf-16le", caseSensitive = False):
"""
@type text: str or compat.unicode
@param text: Text to search for.
@type encoding: str
@param encoding: (Optional) Encoding for the text parameter.
Only used when the text to search for is a Unicode string.
Don't change unless you know what you're doing!
@type caseSensitive: bool
@param caseSensitive: C{True} of the search is case sensitive,
C{False} otherwise.
"""
self.isUnicode = isinstance(text, compat.unicode)
self.encoding = encoding
self.caseSensitive = caseSensitive
if not self.caseSensitive:
pattern = text.lower()
if self.isUnicode:
pattern = text.encode(encoding)
super(TextPattern, self).__init__(pattern)
def read(self, process, address, size):
data = super(TextPattern, self).read(address, size)
if not self.caseSensitive:
if self.isUnicode:
try:
encoding = self.encoding
text = data.decode(encoding, "replace")
text = text.lower()
new_data = text.encode(encoding, "replace")
if len(data) == len(new_data):
data = new_data
else:
data = data.lower()
except Exception:
data = data.lower()
else:
data = data.lower()
return data
def found(self, address, size, data):
if self.isUnicode:
try:
data = compat.unicode(data, self.encoding)
except Exception:
## traceback.print_exc() # XXX DEBUG
return None
return (address, size, data)
#------------------------------------------------------------------------------
class RegExpPattern (Pattern):
"""
Regular expression pattern.
@type pattern: str
@ivar pattern: Regular expression in text form.
@type flags: int
@ivar flags: Regular expression flags.
@type regexp: re.compile
@ivar regexp: Regular expression in compiled form.
@type maxLength: int
@ivar maxLength:
Maximum expected length of the strings matched by this regular
expression.
This value will be used to calculate the required buffer size when
doing buffered searches.
Ideally it should be an exact value, but in some cases it's not
possible to calculate so an upper limit should be given instead.
If that's not possible either, C{None} should be used. That will
cause an exception to be raised if this pattern is used in a
buffered search.
"""
def __init__(self, regexp, flags = 0, maxLength = None):
"""
@type regexp: str
@param regexp: Regular expression string.
@type flags: int
@param flags: Regular expression flags.
@type maxLength: int
@param maxLength: Maximum expected length of the strings matched by
this regular expression.
This value will be used to calculate the required buffer size when
doing buffered searches.
Ideally it should be an exact value, but in some cases it's not
possible to calculate so an upper limit should be given instead.
If that's not possible either, C{None} should be used. That will
cause an exception to be raised if this pattern is used in a
buffered search.
"""
self.pattern = regexp
self.flags = flags
self.regexp = re.compile(regexp, flags)
self.maxLength = maxLength
def __len__(self):
"""
Returns the maximum expected length of the strings matched by this
pattern. This value is taken from the C{maxLength} argument of the
constructor if this class.
Ideally it should be an exact value, but in some cases it's not
possible to calculate so an upper limit should be returned instead.
If that's not possible either an exception must be raised.
This value will be used to calculate the required buffer size when
doing buffered searches.
"""
if self.maxLength is None:
raise NotImplementedError()
return self.maxLength
def find(self, buffer, pos = None):
if not pos: # make sure pos is an int
pos = 0
match = self.regexp.search(buffer, pos)
if match:
start, end = match.span()
return start, end - start
return -1, 0
#------------------------------------------------------------------------------
class HexPattern (RegExpPattern):
"""
Hexadecimal pattern.
Hex patterns must be in this form::
"68 65 6c 6c 6f 20 77 6f 72 6c 64" # "hello world"
Spaces are optional. Capitalization of hex digits doesn't matter.
This is exactly equivalent to the previous example::
"68656C6C6F20776F726C64" # "hello world"
Wildcards are allowed, in the form of a C{?} sign in any hex digit::
"5? 5? c3" # pop register / pop register / ret
"b8 ?? ?? ?? ??" # mov eax, immediate value
@type pattern: str
@ivar pattern: Hexadecimal pattern.
"""
def __new__(cls, pattern):
"""
If the pattern is completely static (no wildcards are present) a
L{BytePattern} is created instead. That's because searching for a
fixed byte pattern is faster than searching for a regular expression.
"""
if '?' not in pattern:
return BytePattern( HexInput.hexadecimal(pattern) )
return object.__new__(cls, pattern)
def __init__(self, hexa):
"""
Hex patterns must be in this form::
"68 65 6c 6c 6f 20 77 6f 72 6c 64" # "hello world"
Spaces are optional. Capitalization of hex digits doesn't matter.
This is exactly equivalent to the previous example::
"68656C6C6F20776F726C64" # "hello world"
Wildcards are allowed, in the form of a C{?} sign in any hex digit::
"5? 5? c3" # pop register / pop register / ret
"b8 ?? ?? ?? ??" # mov eax, immediate value
@type hexa: str
@param hexa: Pattern to search for.
"""
maxLength = len([x for x in hexa
if x in "?0123456789ABCDEFabcdef"]) / 2
super(HexPattern, self).__init__(HexInput.pattern(hexa),
maxLength = maxLength)
#==============================================================================
class Search (StaticClass):
"""
Static class to group the search functionality.
Do not instance this class! Use its static methods instead.
"""
# TODO: aligned searches
# TODO: method to coalesce search results
# TODO: search memory dumps
# TODO: search non-ascii C strings
@staticmethod
def search_process(process, pattern, minAddr = None,
maxAddr = None,
bufferPages = None,
overlapping = False):
"""
Search for the given pattern within the process memory.
@type process: L{Process}
@param process: Process to search.
@type pattern: L{Pattern}
@param pattern: Pattern to search for.
It must be an instance of a subclass of L{Pattern}.
The following L{Pattern} subclasses are provided by WinAppDbg:
- L{BytePattern}
- L{TextPattern}
- L{RegExpPattern}
- L{HexPattern}
You can also write your own subclass of L{Pattern} for customized
searches.
@type minAddr: int
@param minAddr: (Optional) Start the search at this memory address.
@type maxAddr: int
@param maxAddr: (Optional) Stop the search at this memory address.
@type bufferPages: int
@param bufferPages: (Optional) Number of memory pages to buffer when
performing the search. Valid values are:
- C{0} or C{None}:
Automatically determine the required buffer size. May not give
complete results for regular expressions that match variable
sized strings.
- C{> 0}: Set the buffer size, in memory pages.
- C{< 0}: Disable buffering entirely. This may give you a little
speed gain at the cost of an increased memory usage. If the
target process has very large contiguous memory regions it may
actually be slower or even fail. It's also the only way to
guarantee complete results for regular expressions that match
variable sized strings.
@type overlapping: bool
@param overlapping: C{True} to allow overlapping results, C{False}
otherwise.
Overlapping results yield the maximum possible number of results.
For example, if searching for "AAAA" within "AAAAAAAA" at address
C{0x10000}, when overlapping is turned off the following matches
are yielded::
(0x10000, 4, "AAAA")
(0x10004, 4, "AAAA")
If overlapping is turned on, the following matches are yielded::
(0x10000, 4, "AAAA")
(0x10001, 4, "AAAA")
(0x10002, 4, "AAAA")
(0x10003, 4, "AAAA")
(0x10004, 4, "AAAA")
As you can see, the middle results are overlapping the last two.
@rtype: iterator of tuple( int, int, str )
@return: An iterator of tuples. Each tuple contains the following:
- The memory address where the pattern was found.
- The size of the data that matches the pattern.
- The data that matches the pattern.
@raise WindowsError: An error occurred when querying or reading the
process memory.
"""
# Do some namespace lookups of symbols we'll be using frequently.
MEM_COMMIT = win32.MEM_COMMIT
PAGE_GUARD = win32.PAGE_GUARD
page = MemoryAddresses.pageSize
read = pattern.read
find = pattern.find
# Calculate the address range.
if minAddr is None:
minAddr = 0
if maxAddr is None:
maxAddr = win32.LPVOID(-1).value # XXX HACK
# Calculate the buffer size from the number of pages.
if bufferPages is None:
try:
size = MemoryAddresses.\
align_address_to_page_end(len(pattern)) + page
except NotImplementedError:
size = None
elif bufferPages > 0:
size = page * (bufferPages + 1)
else:
size = None
# Get the memory map of the process.
memory_map = process.iter_memory_map(minAddr, maxAddr)
# Perform search with buffering enabled.
if size:
# Loop through all memory blocks containing data.
buffer = "" # buffer to hold the memory data
prev_addr = 0 # previous memory block address
last = 0 # position of the last match
delta = 0 # delta of last read address and start of buffer
for mbi in memory_map:
# Skip blocks with no data to search on.
if not mbi.has_content():
continue
# Get the address and size of this block.
address = mbi.BaseAddress # current address to search on
block_size = mbi.RegionSize # total size of the block
if address >= maxAddr:
break
end = address + block_size # end address of the block
# If the block is contiguous to the previous block,
# coalesce the new data in the buffer.
if delta and address == prev_addr:
buffer += read(process, address, page)
# If not, clear the buffer and read new data.
else:
buffer = read(process, address, min(size, block_size))
last = 0
delta = 0
# Search for the pattern in this block.
while 1:
# Yield each match of the pattern in the buffer.
pos, length = find(buffer, last)
while pos >= last:
match_addr = address + pos - delta
if minAddr <= match_addr < maxAddr:
result = pattern.found(
match_addr, length,
buffer [ pos : pos + length ] )
if result is not None:
yield result
if overlapping:
last = pos + 1
else:
last = pos + length
pos, length = find(buffer, last)
# Advance to the next page.
address = address + page
block_size = block_size - page
prev_addr = address
# Fix the position of the last match.
last = last - page
if last < 0:
last = 0
# Remove the first page in the buffer.
buffer = buffer[ page : ]
delta = page
# If we haven't reached the end of the block yet,
# read the next page in the block and keep seaching.
if address < end:
buffer = buffer + read(process, address, page)
# Otherwise, we're done searching this block.
else:
break
# Perform search with buffering disabled.
else:
# Loop through all memory blocks containing data.
for mbi in memory_map:
# Skip blocks with no data to search on.
if not mbi.has_content():
continue
# Get the address and size of this block.
address = mbi.BaseAddress
block_size = mbi.RegionSize
if address >= maxAddr:
break;
# Read the whole memory region.
buffer = process.read(address, block_size)
# Search for the pattern in this region.
pos, length = find(buffer)
last = 0
while pos >= last:
match_addr = address + pos
if minAddr <= match_addr < maxAddr:
result = pattern.found(
match_addr, length,
buffer [ pos : pos + length ] )
if result is not None:
yield result
if overlapping:
last = pos + 1
else:
last = pos + length
pos, length = find(buffer, last)
@classmethod
def extract_ascii_strings(cls, process, minSize = 4, maxSize = 1024):
"""
Extract ASCII strings from the process memory.
@type process: L{Process}
@param process: Process to search.
@type minSize: int
@param minSize: (Optional) Minimum size of the strings to search for.
@type maxSize: int
@param maxSize: (Optional) Maximum size of the strings to search for.
@rtype: iterator of tuple(int, int, str)
@return: Iterator of strings extracted from the process memory.
Each tuple contains the following:
- The memory address where the string was found.
- The size of the string.
- The string.
"""
regexp = r"[\s\w\!\@\#\$\%%\^\&\*\(\)\{\}\[\]\~\`\'\"\:\;\.\,\\\/\-\+\=\_\<\>]{%d,%d}\0" % (minSize, maxSize)
pattern = RegExpPattern(regexp, 0, maxSize)
return cls.search_process(process, pattern, overlapping = False)