Couldn't find answer anywhere, so I'm sharing my solution. If you have a better option, please post.
This worked for me (using asus Xtion Pro):
def pixelTo3DPoint(cloud, u, v):
width = cloud.width
height = cloud.height
point_step = cloud.point_step
row_step = cloud.row_step
array_pos = v*row_step + u*point_step
bytesX = [ord(x) for x in cloud.data[array_pos:array_pos+4]]
bytesY = [ord(x) for x in cloud.data[array_pos+4: array_pos+8]]
bytesZ = [ord(x) for x in cloud.data[array_pos+8:array_pos+12]]
byte_format=struct.pack('4B', *bytesX)
X = struct.unpack('f', byte_format)[0]
byte_format=struct.pack('4B', *bytesY)
Y = struct.unpack('f', byte_format)[0]
byte_format=struct.pack('4B', *bytesZ)
Z = struct.unpack('f', byte_format)[0]
return [X, Y, Z]