View Issue Details

IDProjectCategoryView StatusLast Update
0002178GNUnetGNSpublic2012-03-02 20:35
ReporterChristian Grothoff Assigned Toschanzen  
PrioritynormalSeverityfeatureReproducibilityN/A
Status closedResolutionfixed 
Summary0002178: need to choose HTTP proxy to use as a starting point for our own
DescriptionIn this bug, we should list some proxies and discuss their advantages/disadvantages. Naturally, any proxy we choose must be free software (GPL, LGPL, Public Domain, Apache, ...).
Additional Informationhttp://www.privoxy.org/ - proxy previously used by Tor, likely not bad for security, clearly has capability of modifying HTML; most likely does not implement caching (which is good, as that's a feature we don't want or need and would have to remove).
TagsNo tags attached.
Attached Files
proxy.py (4,653 bytes)   
#!/usr/bin/python

__doc__ = """Tiny HTTP Proxy.

This module implements GET, HEAD, POST, PUT and DELETE methods
on BaseHTTPServer, and behaves as an HTTP proxy.  The CONNECT
method is also implemented experimentally, but has not been
tested yet.

Any help will be greatly appreciated.   SUZUKI Hisao
"""

__version__ = "0.2.1"

import BaseHTTPServer, select, socket, SocketServer, urlparse, re

class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
    __base = BaseHTTPServer.BaseHTTPRequestHandler
    __base_handle = __base.handle

    server_version = "TinyHTTPProxy/" + __version__
    rbufsize = 0                        # self.rfile Be unbuffered

    def handle(self):
        (ip, port) =  self.client_address
        if hasattr(self, 'allowed_clients') and ip not in self.allowed_clients:
            self.raw_requestline = self.rfile.readline()
            if self.parse_request(): self.send_error(403)
        else:
            self.__base_handle()

    def _connect_to(self, netloc, soc):
        i = netloc.find(':')
        if i >= 0:
            host_port = netloc[:i], int(netloc[i+1:])
        else:
            host_port = netloc, 80
        print "\t" "connect to %s:%d" % host_port
        try: soc.connect(host_port)
        except socket.error, arg:
            try: msg = arg[1]
            except: msg = arg
            self.send_error(404, msg)
            return 0
        return 1

    def do_CONNECT(self):
        soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            if self._connect_to(self.path, soc):
                self.log_request(200)
                self.wfile.write(self.protocol_version +
                                 " 200 Connection established\r\n")
                self.wfile.write("Proxy-agent: %s\r\n" % self.version_string())
                self.wfile.write("\r\n")
                self._read_write(soc, 300)
        finally:
            print "\t" "bye"
            soc.close()
            self.connection.close()

    def do_GET(self):
        (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
            self.path, 'http')
        if scm != 'http' or fragment or not netloc:
            self.send_error(400, "bad url %s" % self.path)
            return
        soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            if self._connect_to(netloc, soc):
                self.log_request()
                soc.send("%s %s %s\r\n" % (
                    self.command,
                    urlparse.urlunparse(('', '', path, params, query, '')),
                    self.request_version))
                self.headers['Connection'] = 'close'
                del self.headers['Proxy-Connection']
                for key_val in self.headers.items():
                    soc.send("%s: %s\r\n" % key_val)
                soc.send("\r\n")
                self._read_write(soc)
        finally:
            print "\t" "bye"
            soc.close()
            self.connection.close()

    def _read_write(self, soc, max_idling=20):
        iw = [self.connection, soc]
        ow = []
        count = 0
        msg = ''
        while 1:
            count += 1
            (ins, _, exs) = select.select(iw, ow, iw, 3)
            if exs:
              break
            if ins:
                for i in ins:
                    if i is soc:
                        out = self.connection
                    else:
                        out = soc
                    data = i.recv(8192)
                    if data:
                        data = re.sub(r'(a href="http://(\w+\.)*)(\+)',
                        r'\1gnunet', data)
                        out.send(data)
                        count = 0
            else:
                print "\t" "idle", count
                print msg
            if count == max_idling: break

    do_HEAD = do_GET
    do_POST = do_GET
    do_PUT  = do_GET
    do_DELETE=do_GET

class ThreadingHTTPServer (SocketServer.ThreadingMixIn,
                           BaseHTTPServer.HTTPServer): pass

if __name__ == '__main__':
    from sys import argv
    if argv[1:] and argv[1] in ('-h', '--help'):
        print argv[0], "[port [allowed_client_name ...]]"
    else:
        if argv[2:]:
            allowed = []
            for name in argv[2:]:
                client = socket.gethostbyname(name)
                allowed.append(client)
                print "Accept: %s (%s)" % (client, name)
            ProxyHandler.allowed_clients = allowed
            del argv[2:]
        else:
            print "Any clients will be served..."
        BaseHTTPServer.test(ProxyHandler, ThreadingHTTPServer)

proxy.py (4,653 bytes)   
proxy-gns0.patch (1,495 bytes)   
--- proxy.py	2012-02-29 12:18:45.325986699 +0100
+++ Download/proxy.py	2012-02-29 11:29:16.532743821 +0100
@@ -20,7 +20,6 @@
 
     server_version = "TinyHTTPProxy/" + __version__
     rbufsize = 0                        # self.rfile Be unbuffered
-    host_port = ()
 
     def handle(self):
         (ip, port) =  self.client_address
@@ -33,11 +32,11 @@
     def _connect_to(self, netloc, soc):
         i = netloc.find(':')
         if i >= 0:
-            self.host_port = netloc[:i], int(netloc[i+1:])
+            host_port = netloc[:i], int(netloc[i+1:])
         else:
-            self.host_port = netloc, 80
-        print "\t" "connect to %s:%d" % self.host_port
-        try: soc.connect(self.host_port)
+            host_port = netloc, 80
+        print "\t" "connect to %s:%d" % host_port
+        try: soc.connect(host_port)
         except socket.error, arg:
             try: msg = arg[1]
             except: msg = arg
@@ -103,9 +102,8 @@
                         out = soc
                     data = i.recv(8192)
                     if data:
-                        if (re.match("(\w+\.)*gnunet", self.host_port[0])):
-                            data = re.sub(r'(a href="http://(\w+\.)*)(\+)', r'\1'+self.host_port[0], data)
-                        print data
+                        data = re.sub(r'(a href="http://(\w+\.)*)(\+)',
+                        r'\1gnunet', data)
                         out.send(data)
                         count = 0
             else:
proxy-gns0.patch (1,495 bytes)   

Activities

Christian Grothoff

2012-02-25 02:36

manager   ~0005510

http://www.pps.jussieu.fr/~jch/software/polipo/ --- claims to be 'small', with support for IPv4 and IPv6; does support caching and other HTTP-optimizations which are likely not needed (not so good), also not clear if it has build-in support for modifying HTML

Christian Grothoff

2012-02-25 02:41

manager   ~0005511

http://www.acme.com/software/micro_proxy/ --- HTTP/HTTPs IPv6-capable proxy in 320 lines of code? Won't have an HTML parser, but for that we could likely use libtidy or something like that....

Christian Grothoff

2012-02-25 02:44

manager   ~0005512

http://www.membrane-soa.org/esb/ --- proxy for URL rewriting (plus some other features we don't need, but no caching).

Christian Grothoff

2012-02-25 02:46

manager   ~0005513

http://swiftsurf.sourceforge.net/index-eng.html --- another proxy advertising URL rewriting.

Christian Grothoff

2012-02-25 02:46

manager   ~0005514

http://webcleaner.sourceforge.net/ --- another proxy advertising URL rewriting.

Christian Grothoff

2012-02-25 02:50

manager   ~0005515

https://banu.com/tinyproxy/ --- mentions that it is small, says nothing about modifying the HTML.

Christian Grothoff

2012-02-25 22:02

manager   ~0005518

Ok, I've gone over those above, and while far from 'perfect', I think among those the only one I'd consider as a starting point is the 'micro_proxy'. Some of the others FTBFS or are far more complex (ESB, cough, cough) without offering significantly more of what we'd want. Still, we should probably keep looking a bit more...

schanzen

2012-02-28 20:35

administrator   ~0005534

http://www.oki-osk.jp/esc/python/proxy/ --- python, simple script, no ssl

schanzen

2012-02-28 20:40

administrator   ~0005535

http://www-scf.usc.edu/~csci571/Special/HTTP/proxy.pl --- same as above, in perl

schanzen

2012-02-28 20:44

administrator   ~0005536

Last edited: 2012-02-28 20:53

I think we have to define exactly what we want (SSL/IPV6) etc first.
If we limit the HTTP usage to no ssl,ipv6 then I see it as a clear disadvantage since "normal" browsing shouldn't be affected by a gns deployment. Writing this makes me think wheather a browser plugin is a "nicer" way to make this work.

EDIT:
I think for a proof of concept we should use a REALLY simple proxy (no https or any other fancy stuff) and modify it so it makes gns work. This is the least amount of code and it shows that it works.

In the end though I think browser plugins for the common browsers (chrome, ff) would be a better way to integrate gns (usability, features). All other browsers will still be able to use the proxy with reduced functionalities but working gns resolution.

schanzen

2012-02-28 21:01

administrator   ~0005537

http://code.google.com/p/linktweak/source/browse/trunk/scripts/Content.js?r=2 - example for chrome plugin that rewrites specific a hrefs (i think)

schanzen

2012-02-29 11:24

administrator   ~0005539

Last edited: 2012-02-29 12:22

Proof of concept. modified python script.

line 105
data = re.sub(r'(a href="http://(\w+\.)*)(\+)', r'\1gnunet', data)

does the magic for a href's.

Tested here: http://home.in.tum.de/~schanzen/

EDIT
line above is not escaped see source

EDIT2
patch will replace only on .gnunet hosts and not only .gnunet but the whole domain.
(used patch incorrectly... see file)

Christian Grothoff

2012-03-01 11:42

manager   ~0005549

SSL-enabled proxy http://www.thoughtcrime.org/software/sslsniff/ [^]

Christian Grothoff

2012-03-01 19:24

manager   ~0005551

We'll use the python script for the prototype and will likely use SSLsniff for the production system.

Issue History

Date Modified Username Field Change
2012-02-25 02:35 Christian Grothoff New Issue
2012-02-25 02:36 Christian Grothoff Note Added: 0005510
2012-02-25 02:41 Christian Grothoff Note Added: 0005511
2012-02-25 02:44 Christian Grothoff Note Added: 0005512
2012-02-25 02:46 Christian Grothoff Note Added: 0005513
2012-02-25 02:46 Christian Grothoff Note Added: 0005514
2012-02-25 02:50 Christian Grothoff Note Added: 0005515
2012-02-25 22:02 Christian Grothoff Note Added: 0005518
2012-02-28 20:09 Christian Grothoff Status new => feedback
2012-02-28 20:10 Christian Grothoff Assigned To => schanzen
2012-02-28 20:35 schanzen Note Added: 0005534
2012-02-28 20:40 schanzen Note Added: 0005535
2012-02-28 20:44 schanzen Note Added: 0005536
2012-02-28 20:53 schanzen Note Edited: 0005536
2012-02-28 21:01 schanzen Note Added: 0005537
2012-02-29 11:24 schanzen Note Added: 0005539
2012-02-29 11:24 schanzen File Added: proxy.py
2012-02-29 11:28 schanzen Note Edited: 0005539
2012-02-29 12:20 schanzen File Added: proxy-gns0.patch
2012-02-29 12:22 schanzen Note Edited: 0005539
2012-03-01 11:42 Christian Grothoff Note Added: 0005549
2012-03-01 11:42 Christian Grothoff Status feedback => assigned
2012-03-01 19:24 Christian Grothoff Note Added: 0005551
2012-03-01 19:24 Christian Grothoff Status assigned => resolved
2012-03-01 19:24 Christian Grothoff Fixed in Version => 0.9.3
2012-03-01 19:24 Christian Grothoff Resolution open => fixed
2012-03-01 19:25 Christian Grothoff Fixed in Version 0.9.3 =>
2012-03-02 20:35 Christian Grothoff Status resolved => closed
2012-03-02 20:35 Christian Grothoff Product Version Git master =>