summaryrefslogtreecommitdiff
path: root/smmap/buf.py
blob: c4d25225167faf9225e6963a111c3a39ea734c7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""Module with a simple buffer implementation using the memory manager"""
from mman import WindowCursor

import sys

__all__ = ["SlidingWindowMapBuffer"]

class SlidingWindowMapBuffer(object):
	"""A buffer like object which allows direct byte-wise object and slicing into 
	memory of a mapped file. The mapping is controlled by the provided cursor.
	
	The buffer is relative, that is if you map an offset, index 0 will map to the 
	first byte at the offset you used during initialization or begin_access
	
	:note: Although this type effectively hides the fact that there are mapped windows
		underneath, it can unfortunately not be used in any non-pure python method which
		needs a buffer or string"""
	__slots__ = (
				'_c',			# our cursor
				'_size', 		# our supposed size
				)
	
	
	def __init__(self, cursor = None, offset = 0, size = sys.maxint, flags = 0):
		"""Initalize the instance to operate on the given cursor.
		:param cursor: if not None, the associated cursor to the file you want to access
			If None, you have call begin_access before using the buffer and provide a cursor 
		:param offset: absolute offset in bytes
		:param size: the total size of the mapping. Defaults to the maximum possible size
			From that point on, the __len__ of the buffer will be the given size or the file size.
			If the size is larger than the mappable area, you can only access the actually available
			area, although the length of the buffer is reported to be your given size.
			Hence it is in your own interest to provide a proper size !
		:param flags: Additional flags to be passed to os.open
		:raise ValueError: if the buffer could not achieve a valid state"""
		self._c = cursor
		if cursor and not self.begin_access(cursor, offset, size, flags):
			raise ValueError("Failed to allocate the buffer - probably the given offset is out of bounds")
		# END handle offset

	def __del__(self):
		self.end_access()
		
	def __len__(self):
		return self._size
		
	def __getitem__(self, i):
		c = self._c
		assert c.is_valid()
		if not c.includes_ofs(i):
			c.use_region(i, 1)
		# END handle region usage
		return c.buffer()[i-c.ofs_begin()]
	
	def __getslice__(self, i, j):
		c = self._c
		# fast path, slice fully included - safes a concatenate operation and 
		# should be the default
		assert c.is_valid()
		if (c.ofs_begin() <= i) and (j < c.ofs_end()):
			b = c.ofs_begin()
			return c.buffer()[i-b:j-b]
		else:
			l = j-i					# total length
			ofs = i
			# Keeping tokens in a list could possible be faster, but the list
			# overhead outweighs the benefits (tested) !
			md = str()
			while l:
				c.use_region(ofs, l)
				d = c.buffer()[:l]
				ofs += len(d)
				l -= len(d)
				md += d
			#END while there are bytes to read
			return md
		# END fast or slow path
	#{ Interface
	
	def begin_access(self, cursor = None, offset = 0, size = sys.maxint, flags = 0):
		"""Call this before the first use of this instance. The method was already
		called by the constructor in case sufficient information was provided.
		
		For more information no the parameters, see the __init__ method
		:param path: if cursor is None the existing one will be used. 
		:return: True if the buffer can be used"""
		if cursor:
			self._c = cursor
		#END update our cursor
		
		# reuse existing cursors if possible
		if self._c is not None and self._c.is_associated():
			res = self._c.use_region(offset, size, flags).is_valid()
			if res:
				# if given size is too large or default, we computer a proper size
				# If its smaller, we assume the combination between offset and size
				# as chosen by the user is correct and use it !
				# If not, the user is in trouble.
				if size > self._c.file_size():
					size = self._c.file_size() - offset
				#END handle size
				self._size = size 
			#END set size
			return res
		return False
		
	def end_access(self):
		"""Call this method once you are done using the instance. It is automatically 
		called on destruction, and should be called just in time to allow system
		resources to be freed.
		
		Once you called end_access, you must call begin access before reusing this instance!"""
		self._size = 0
		if self._c is not None:
			self._c.unuse_region()
		#END unuse region
		
	def cursor(self):
		""":return: the currently set cursor which provides access to the data"""
		return self._c
		
	#}END interface