View Issue Details
| ID | Project | Category | View Status | Date Submitted | Last Update |
|---|---|---|---|---|---|
| 0002178 | GNUnet | GNS | public | 2012-02-25 02:35 | 2012-03-02 20:35 |
| Reporter | Christian Grothoff | Assigned To | schanzen | ||
| Priority | normal | Severity | feature | Reproducibility | N/A |
| Status | closed | Resolution | fixed | ||
| Summary | 0002178: need to choose HTTP proxy to use as a starting point for our own | ||||
| Description | In this bug, we should list some proxies and discuss their advantages/disadvantages. Naturally, any proxy we choose must be free software (GPL, LGPL, Public Domain, Apache, ...). | ||||
| Additional Information | http://www.privoxy.org/ - proxy previously used by Tor, likely not bad for security, clearly has capability of modifying HTML; most likely does not implement caching (which is good, as that's a feature we don't want or need and would have to remove). | ||||
| Tags | No tags attached. | ||||
| Attached Files | proxy.py (4,653 bytes)
#!/usr/bin/python
__doc__ = """Tiny HTTP Proxy.
This module implements GET, HEAD, POST, PUT and DELETE methods
on BaseHTTPServer, and behaves as an HTTP proxy. The CONNECT
method is also implemented experimentally, but has not been
tested yet.
Any help will be greatly appreciated. SUZUKI Hisao
"""
__version__ = "0.2.1"
import BaseHTTPServer, select, socket, SocketServer, urlparse, re
class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
__base = BaseHTTPServer.BaseHTTPRequestHandler
__base_handle = __base.handle
server_version = "TinyHTTPProxy/" + __version__
rbufsize = 0 # self.rfile Be unbuffered
def handle(self):
(ip, port) = self.client_address
if hasattr(self, 'allowed_clients') and ip not in self.allowed_clients:
self.raw_requestline = self.rfile.readline()
if self.parse_request(): self.send_error(403)
else:
self.__base_handle()
def _connect_to(self, netloc, soc):
i = netloc.find(':')
if i >= 0:
host_port = netloc[:i], int(netloc[i+1:])
else:
host_port = netloc, 80
print "\t" "connect to %s:%d" % host_port
try: soc.connect(host_port)
except socket.error, arg:
try: msg = arg[1]
except: msg = arg
self.send_error(404, msg)
return 0
return 1
def do_CONNECT(self):
soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
if self._connect_to(self.path, soc):
self.log_request(200)
self.wfile.write(self.protocol_version +
" 200 Connection established\r\n")
self.wfile.write("Proxy-agent: %s\r\n" % self.version_string())
self.wfile.write("\r\n")
self._read_write(soc, 300)
finally:
print "\t" "bye"
soc.close()
self.connection.close()
def do_GET(self):
(scm, netloc, path, params, query, fragment) = urlparse.urlparse(
self.path, 'http')
if scm != 'http' or fragment or not netloc:
self.send_error(400, "bad url %s" % self.path)
return
soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
if self._connect_to(netloc, soc):
self.log_request()
soc.send("%s %s %s\r\n" % (
self.command,
urlparse.urlunparse(('', '', path, params, query, '')),
self.request_version))
self.headers['Connection'] = 'close'
del self.headers['Proxy-Connection']
for key_val in self.headers.items():
soc.send("%s: %s\r\n" % key_val)
soc.send("\r\n")
self._read_write(soc)
finally:
print "\t" "bye"
soc.close()
self.connection.close()
def _read_write(self, soc, max_idling=20):
iw = [self.connection, soc]
ow = []
count = 0
msg = ''
while 1:
count += 1
(ins, _, exs) = select.select(iw, ow, iw, 3)
if exs:
break
if ins:
for i in ins:
if i is soc:
out = self.connection
else:
out = soc
data = i.recv(8192)
if data:
data = re.sub(r'(a href="http://(\w+\.)*)(\+)',
r'\1gnunet', data)
out.send(data)
count = 0
else:
print "\t" "idle", count
print msg
if count == max_idling: break
do_HEAD = do_GET
do_POST = do_GET
do_PUT = do_GET
do_DELETE=do_GET
class ThreadingHTTPServer (SocketServer.ThreadingMixIn,
BaseHTTPServer.HTTPServer): pass
if __name__ == '__main__':
from sys import argv
if argv[1:] and argv[1] in ('-h', '--help'):
print argv[0], "[port [allowed_client_name ...]]"
else:
if argv[2:]:
allowed = []
for name in argv[2:]:
client = socket.gethostbyname(name)
allowed.append(client)
print "Accept: %s (%s)" % (client, name)
ProxyHandler.allowed_clients = allowed
del argv[2:]
else:
print "Any clients will be served..."
BaseHTTPServer.test(ProxyHandler, ThreadingHTTPServer)
proxy-gns0.patch (1,495 bytes)
--- proxy.py 2012-02-29 12:18:45.325986699 +0100
+++ Download/proxy.py 2012-02-29 11:29:16.532743821 +0100
@@ -20,7 +20,6 @@
server_version = "TinyHTTPProxy/" + __version__
rbufsize = 0 # self.rfile Be unbuffered
- host_port = ()
def handle(self):
(ip, port) = self.client_address
@@ -33,11 +32,11 @@
def _connect_to(self, netloc, soc):
i = netloc.find(':')
if i >= 0:
- self.host_port = netloc[:i], int(netloc[i+1:])
+ host_port = netloc[:i], int(netloc[i+1:])
else:
- self.host_port = netloc, 80
- print "\t" "connect to %s:%d" % self.host_port
- try: soc.connect(self.host_port)
+ host_port = netloc, 80
+ print "\t" "connect to %s:%d" % host_port
+ try: soc.connect(host_port)
except socket.error, arg:
try: msg = arg[1]
except: msg = arg
@@ -103,9 +102,8 @@
out = soc
data = i.recv(8192)
if data:
- if (re.match("(\w+\.)*gnunet", self.host_port[0])):
- data = re.sub(r'(a href="http://(\w+\.)*)(\+)', r'\1'+self.host_port[0], data)
- print data
+ data = re.sub(r'(a href="http://(\w+\.)*)(\+)',
+ r'\1gnunet', data)
out.send(data)
count = 0
else:
| ||||
|
|
http://www.pps.jussieu.fr/~jch/software/polipo/ --- claims to be 'small', with support for IPv4 and IPv6; does support caching and other HTTP-optimizations which are likely not needed (not so good), also not clear if it has build-in support for modifying HTML |
|
|
http://www.acme.com/software/micro_proxy/ --- HTTP/HTTPs IPv6-capable proxy in 320 lines of code? Won't have an HTML parser, but for that we could likely use libtidy or something like that.... |
|
|
http://www.membrane-soa.org/esb/ --- proxy for URL rewriting (plus some other features we don't need, but no caching). |
|
|
http://swiftsurf.sourceforge.net/index-eng.html --- another proxy advertising URL rewriting. |
|
|
http://webcleaner.sourceforge.net/ --- another proxy advertising URL rewriting. |
|
|
https://banu.com/tinyproxy/ --- mentions that it is small, says nothing about modifying the HTML. |
|
|
Ok, I've gone over those above, and while far from 'perfect', I think among those the only one I'd consider as a starting point is the 'micro_proxy'. Some of the others FTBFS or are far more complex (ESB, cough, cough) without offering significantly more of what we'd want. Still, we should probably keep looking a bit more... |
|
|
http://www.oki-osk.jp/esc/python/proxy/ --- python, simple script, no ssl |
|
|
http://www-scf.usc.edu/~csci571/Special/HTTP/proxy.pl --- same as above, in perl |
|
|
I think we have to define exactly what we want (SSL/IPV6) etc first. If we limit the HTTP usage to no ssl,ipv6 then I see it as a clear disadvantage since "normal" browsing shouldn't be affected by a gns deployment. Writing this makes me think wheather a browser plugin is a "nicer" way to make this work. EDIT: I think for a proof of concept we should use a REALLY simple proxy (no https or any other fancy stuff) and modify it so it makes gns work. This is the least amount of code and it shows that it works. In the end though I think browser plugins for the common browsers (chrome, ff) would be a better way to integrate gns (usability, features). All other browsers will still be able to use the proxy with reduced functionalities but working gns resolution. |
|
|
http://code.google.com/p/linktweak/source/browse/trunk/scripts/Content.js?r=2 - example for chrome plugin that rewrites specific a hrefs (i think) |
|
|
Proof of concept. modified python script. line 105 data = re.sub(r'(a href="http://(\w+\.)*)(\+)', r'\1gnunet', data) does the magic for a href's. Tested here: http://home.in.tum.de/~schanzen/ EDIT line above is not escaped see source EDIT2 patch will replace only on .gnunet hosts and not only .gnunet but the whole domain. (used patch incorrectly... see file) |
|
|
SSL-enabled proxy http://www.thoughtcrime.org/software/sslsniff/ [^] |
|
|
We'll use the python script for the prototype and will likely use SSLsniff for the production system. |
| Date Modified | Username | Field | Change |
|---|---|---|---|
| 2012-02-25 02:35 | Christian Grothoff | New Issue | |
| 2012-02-25 02:36 | Christian Grothoff | Note Added: 0005510 | |
| 2012-02-25 02:41 | Christian Grothoff | Note Added: 0005511 | |
| 2012-02-25 02:44 | Christian Grothoff | Note Added: 0005512 | |
| 2012-02-25 02:46 | Christian Grothoff | Note Added: 0005513 | |
| 2012-02-25 02:46 | Christian Grothoff | Note Added: 0005514 | |
| 2012-02-25 02:50 | Christian Grothoff | Note Added: 0005515 | |
| 2012-02-25 22:02 | Christian Grothoff | Note Added: 0005518 | |
| 2012-02-28 20:09 | Christian Grothoff | Status | new => feedback |
| 2012-02-28 20:10 | Christian Grothoff | Assigned To | => schanzen |
| 2012-02-28 20:35 | schanzen | Note Added: 0005534 | |
| 2012-02-28 20:40 | schanzen | Note Added: 0005535 | |
| 2012-02-28 20:44 | schanzen | Note Added: 0005536 | |
| 2012-02-28 20:53 | schanzen | Note Edited: 0005536 | |
| 2012-02-28 21:01 | schanzen | Note Added: 0005537 | |
| 2012-02-29 11:24 | schanzen | Note Added: 0005539 | |
| 2012-02-29 11:24 | schanzen | File Added: proxy.py | |
| 2012-02-29 11:28 | schanzen | Note Edited: 0005539 | |
| 2012-02-29 12:20 | schanzen | File Added: proxy-gns0.patch | |
| 2012-02-29 12:22 | schanzen | Note Edited: 0005539 | |
| 2012-03-01 11:42 | Christian Grothoff | Note Added: 0005549 | |
| 2012-03-01 11:42 | Christian Grothoff | Status | feedback => assigned |
| 2012-03-01 19:24 | Christian Grothoff | Note Added: 0005551 | |
| 2012-03-01 19:24 | Christian Grothoff | Status | assigned => resolved |
| 2012-03-01 19:24 | Christian Grothoff | Fixed in Version | => 0.9.3 |
| 2012-03-01 19:24 | Christian Grothoff | Resolution | open => fixed |
| 2012-03-01 19:25 | Christian Grothoff | Fixed in Version | 0.9.3 => |
| 2012-03-02 20:35 | Christian Grothoff | Status | resolved => closed |
| 2012-03-02 20:35 | Christian Grothoff | Product Version | Git master => |