diff options
authorAlin Năstac <>2005-09-19 05:35:35 +0000
committerAlin Năstac <>2005-09-19 05:35:35 +0000
commit5f91651a0543561aa0a96d28fc256ee38327132d (patch)
tree60ba8357366a4b961d164008c6f205e4b80b277c /net-proxy
parentInitial commit. Ebuild by Robin H. Johnson <>. (diff)
initial import from bug #103947
(Portage version:
Diffstat (limited to 'net-proxy')
9 files changed, 650 insertions, 0 deletions
diff --git a/net-proxy/bfilter/ChangeLog b/net-proxy/bfilter/ChangeLog
new file mode 100644
index 000000000000..f8b2952d0d9a
--- /dev/null
+++ b/net-proxy/bfilter/ChangeLog
@@ -0,0 +1,12 @@
+# ChangeLog for net-proxy/bfilter
+# Copyright 1999-2005 Gentoo Foundation; Distributed under the GPL v2
+# $Header: /var/cvsroot/gentoo-x86/net-proxy/bfilter/ChangeLog,v 1.1 2005/09/19 05:35:35 mrness Exp $
+*bfilter-0.9.4 (18 Sep 2005)
+ 18 Sep 2005; Alin Nastac <>
+ +files/bfilter-0.9.4-droppriv.patch, +files/bfilter.8,
+ +files/bfilter.conf, +files/bfilter.init, +metadata.xml,
+ +bfilter-0.9.4.ebuild:
+ Initial import from bug #103947, thanks to Alan Swanson <>.
diff --git a/net-proxy/bfilter/Manifest b/net-proxy/bfilter/Manifest
new file mode 100644
index 000000000000..00405c2a5fd4
--- /dev/null
+++ b/net-proxy/bfilter/Manifest
@@ -0,0 +1,6 @@
+MD5 900e9cb9e13a3514e2ac4fded18839e6 bfilter-0.9.4.ebuild 1155
+MD5 59c8af85f08eaafc38f6c3421caa8ca3 files/bfilter-0.9.4-droppriv.patch 5108
+MD5 e6e2c311139ed184131d6a945b12a3fb files/digest-bfilter-0.9.4 66
+MD5 51acec84c4acfc995ab2a02fd8a3c7fe files/bfilter.8 9512
+MD5 540f7cf4785103e470ac58cd7d998a00 files/bfilter.conf 275
+MD5 270e205b4f910fe004f69609fad563dc files/bfilter.init 395
diff --git a/net-proxy/bfilter/bfilter-0.9.4.ebuild b/net-proxy/bfilter/bfilter-0.9.4.ebuild
new file mode 100644
index 000000000000..1c3343874c6b
--- /dev/null
+++ b/net-proxy/bfilter/bfilter-0.9.4.ebuild
@@ -0,0 +1,53 @@
+# Copyright 1999-2005 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: /var/cvsroot/gentoo-x86/net-proxy/bfilter/bfilter-0.9.4.ebuild,v 1.1 2005/09/19 05:35:35 mrness Exp $
+inherit eutils
+DESCRIPTION="An ad-filtering web proxy featuring an effective heuristic ad-detection algorithm"
+IUSE="X debug"
+ dev-libs/popt
+ =dev-libs/libsigc++-1.2*
+ X? ( =dev-cpp/gtkmm-2.2* )"
+ dev-util/pkgconfig"
+src_unpack() {
+ unpack ${A}
+ # Provide user, group and chroot privilege lowering
+ epatch ${FILESDIR}/${P}-droppriv.patch
+src_compile() {
+ econf `use_enable debug` `use_with X gui` || die "econf failed"
+ emake || die "emake failed"
+src_install() {
+ make DESTDIR="${D}" install || die "make install failed"
+ # This is also created by openssh for privilege separation
+ keepdir /var/empty
+ doman ${FILESDIR}/bfilter.8
+ dodoc AUTHORS ChangeLog
+ dohtml doc/*.png doc/*.html
+ newinitd ${FILESDIR}/bfilter.init bfilter
+ newconfd ${FILESDIR}/bfilter.conf bfilter
+pkg_preinst() {
+ enewgroup bfilter
+ enewuser bfilter -1 -1 -1 bfilter
diff --git a/net-proxy/bfilter/files/bfilter-0.9.4-droppriv.patch b/net-proxy/bfilter/files/bfilter-0.9.4-droppriv.patch
new file mode 100644
index 000000000000..923988cfd0f9
--- /dev/null
+++ b/net-proxy/bfilter/files/bfilter-0.9.4-droppriv.patch
@@ -0,0 +1,170 @@
+diff -ur bfilter-0.9.4/main/main_unix.cpp bfilter-0.9.4-privdrop/main/main_unix.cpp
+--- bfilter-0.9.4/main/main_unix.cpp 2003-12-19 19:27:32.000000000 +0000
++++ bfilter-0.9.4-privdrop/main/main_unix.cpp 2005-08-27 19:43:13.000000000 +0100
+@@ -26,13 +26,16 @@
+ #include <popt.h>
+ #include "network.h"
+ #include <ipcportal.h>
++#include <sys/stat.h>
++#include <pwd.h>
++#include <grp.h>
+ // some older versions of popt don't define POPT_TABLEEND
+ #define POPT_TABLEEND { NULL, '\0', 0, 0, 0, NULL, NULL }
+ #endif
+-int main_unix_standalone(const std::string& confdir, bool nodaemon);
++int main_unix_standalone(const std::string& confdir, bool nodaemon, char *chroot, uid_t user, gid_t group);
+ int main_unix_backend(Network::Socket csock, IPCPortal* portal);
+ int main(int argc, char *argv[])
+@@ -40,16 +43,27 @@
+ enum {
+ };
+ bool backend = false;
+ bool nodaemon = false;
+ char *cdir = 0;
++ char *chroot = 0;
++ char *user = 0;
++ char *group = 0;
++ uid_t uid = 0;
++ uid_t gid = 0;
+ std::string confdir = CONFDIR;
+ struct poptOption options[] = {
+ { "version", 'v', POPT_ARG_NONE, NULL, ARG_VERSION, "Print version and exit" },
+ { "confdir", 'c', POPT_ARG_STRING, &cdir, ARG_CONFDIR, "Set custom config directory", "dir" },
++ { "chroot", 'r', POPT_ARG_STRING, &chroot, ARG_CHROOT, "Set chroot directory", "dir" },
++ { "user", 'u', POPT_ARG_STRING, &user, ARG_USER, "Set unprivileged user", "name" },
++ { "group", 'g', POPT_ARG_STRING, &group, ARG_GROUP, "Set unprivileged group", "name" },
+ { "nodaemon", 'n', POPT_ARG_NONE, NULL, ARG_NODAEMON, "Disable background daemon mode" },
+@@ -78,13 +92,49 @@
+ << ": " << poptStrerror(arg) << std::endl;
+ return 1;
+ }
++ if (!backend && (chroot || user || group)) {
++ struct stat stat_r;
++ struct passwd *user_r;
++ struct group *group_r;
++ if (getuid()) {
++ std::cerr << "Cannot lower privileges, not running as root" << std::endl;
++ return 1;
++ }
++ if (chroot && stat(chroot, &stat_r)) {
++ if (!S_ISDIR(stat_r.st_mode)){
++ std::cerr << "Cannot lower privileges, chroot directory does not exist" << std::endl;
++ return 1;
++ }
++ }
++ if (user) {
++ user_r = getpwnam(user);
++ if (user_r)
++ uid = user_r->pw_uid;
++ else {
++ std::cerr << "Cannot lower privileges, unknown user" << std::endl;
++ return 1;
++ }
++ }
++ if (group) {
++ group_r = getgrnam(group);
++ if (group_r)
++ gid = group_r->gr_gid;
++ else {
++ std::cerr << "Cannot lower privileges, unknown group" << std::endl;
++ return 1;
++ }
++ }
++ }
+ poptFreeContext(context);
+ if (backend) {
+ IPCPortal portal(0, 1);
+ return main_unix_backend(3, &portal);
+ } else {
+- return main_unix_standalone(confdir, nodaemon);
++ return main_unix_standalone(confdir, nodaemon, chroot, uid, gid);
+ }
+ }
+diff -ur bfilter-0.9.4/main/main_unix_standalone.cpp bfilter-0.9.4-privdrop/main/main_unix_standalone.cpp
+--- bfilter-0.9.4/main/main_unix_standalone.cpp 2003-12-11 03:34:51.000000000 +0000
++++ bfilter-0.9.4-privdrop/main/main_unix_standalone.cpp 2005-08-28 13:03:29.000000000 +0100
+@@ -26,6 +26,9 @@
+ #include "state.h"
+ #include <ipcportal.h>
+ #include "syscall.h"
++#include <pwd.h>
++#include <grp.h>
++#include <resolv.h>
+ class StandaloneState : public State
+ {
+@@ -175,7 +178,34 @@
+ return strm.str();
+ }
+-int main_unix_standalone(const std::string& confdir, bool nodaemon)
++static int drop_privileges(char *dir, uid_t uid, gid_t gid)
++ if (dir) {
++ // Using gethostbyname before chrooting means that the chroot
++ // directory can be empty (no etc/resolv.conf or dynamically
++ // loaded lib/libnss* libraries). Unfortunately simply using
++ // gethostbyname once in the parent process does not apply to
++ // forked children. Using localhost here to prevent remote
++ // name resolution also does not work.
++ gethostbyname("");
++ if (chroot(dir)) {
++ std::cerr << "Cannot lower privileges, chroot directory no longer exists" << std::endl;
++ return 1;
++ }
++ chdir("/");
++ }
++ if (gid) {
++ setgroups(0, NULL);
++ setgid(gid);
++ }
++ if (uid) {
++ setuid(uid);
++ }
++ return 0;
++int main_unix_standalone(const std::string& confdir, bool nodaemon, char *chroot, uid_t uid, gid_t gid)
+ {
+ Network::Socket serv_sock = Network::INVALID_SOCK;
+ Network::Socket clnt_sock = Network::INVALID_SOCK;
+@@ -249,6 +279,9 @@
+ daemon(1, 0);
+ }
+ setup_parent_signals();
++ if (!chroot) {
++ drop_privileges(NULL, uid, gid);
++ }
+ while (true) {
+ clnt_sock = Network::tcpServerWaitConn(serv_sock, &client);
+@@ -261,6 +294,10 @@
+ setup_child_signals();
+ Network::closeSocket(serv_sock);
+ Network::sockSetNodelay(clnt_sock, true);
++ if (drop_privileges(chroot, uid, gid)) {
++ Network::disconnectAndCloseSocket(clnt_sock, 10);
++ return 1;
++ }
+ BFilter filter(clnt_sock, &state);
+ Network::disconnectAndCloseSocket(clnt_sock, 10);
diff --git a/net-proxy/bfilter/files/bfilter.8 b/net-proxy/bfilter/files/bfilter.8
new file mode 100644
index 000000000000..3c37a1447daf
--- /dev/null
+++ b/net-proxy/bfilter/files/bfilter.8
@@ -0,0 +1,366 @@
+.\" Man Page for BFILTER
+.\" groff -man -Tascii bfilter.8
+.TH BFILTER 8 "August 2005"
+bfilter \- An ad-filtering web proxy using heuristic ad-detection algorithms
+.B bfilter
+[-u USER]
+[-g GROUP]
+.B bfilter
+is a web proxy that uses effective heuristic ad-detection algorithms to remove
+banner adverts, popups and webbugs from web pages. The traditional blocklist
+based approach is also implemented, but it is mostly used for dealing with false
+positives. Unlike other tools that require constant updates of their
+blocklists, bfilter manages to remove over 90% of adverts even with an empty
+All processing is done on the fly, it doesn't load the whole page or image
+before processing. It uses heuristic and regex-based approaches to detect
+adverts and webbugs. It also uses a Javascript engine to combat Javascript
+generated adverts and popups.
+The web proxy supports the following features;
+.B o
+HTTP/0.9 - HTTP/1.1 support
+.B o
+Persistent connections (HTTP/1.1 only)
+.B o
+Pipelining (HTTP/1.1 only)
+.B o
+HTTP compression
+.B o
+Forwarding to another proxy
+However, it does
+.B not
+support CONNECT requests typically used for HTTPS.
+.B -c, --confdir DIRECTORY
+Set custom config directory
+.B -r, --chroot DIRECTORY
+Set chroot directory
+.B -u, --user USER
+Set unprivileged user
+.B -g, --group GROUP
+Set unprivileged group
+.B -n, --nodaemon
+Disable background daemon mode
+.B -h, --help
+Show help
+.B -v, --version
+Print version
+.B /etc/bfilter/config
+.I listen_address = host:port
+The address to bind the proxy to. If unspecified, bind to all interfaces.
+.I client_compression = yes | no
+If set to yes, all the textual data with "Content-Type: text/*" will be
+compressed before sending it to the client. This option can be useful if you
+are on a slow connection and you set up bfilter somewhere on a fast connection.
+In other cases, setting this option to yes will just introduce additional
+latency to the loading process.
+.I ad_border = rrggbb | none
+The default behavior is to draw borders around removed adverts. You may want
+to change the border color or turn the borders off.
+.I no_flash = yes | no
+This option is for people who don't want to install a Flash plugin and don't
+want to be constantly prompted to do so. Setting it to yes will cause all
+Flash objects to be replaced with transparent GIF's. (You can't use rules to
+achieve the same effect because a Flash advert is normally replaced with a
+blank Flash object that loads the original into itself when you click on it.)
+.I use_proxy = yes | no
+.I proxy_host = host
+.I proxy_port = port
+When use_proxy is set to yes, you may specify a proxy for bfilter to forward
+requests onto.
+.I no_proxy_for = host, host, host
+When use_proxy is set to yes, you may specify some hosts to be contacted
+directly. The separator may be either a comma or a semicolon. If a host starts
+or ends with a dot it is assumed that any prefix or suffix can be appended to
+it, so for example "no_proxy_for =, 192.168."). Note however
+that won't cover itself but only its subdomains.
+(When matching no_proxy_for hosts, no DNS queries are being made. That means
+ won't act as localhost or the other way around.)
+.B /etc/bfilter/rules
+.I filter=0|1
+Enable filtering.
+0: Serve the page as is
+1: (Default) Check for ads and apply the appropriate transformations
+.I ad=0|1|2
+Advert detection options.
+0: (Default) Standard procedure for is_ad decision
+1: Force negative is_ad decision
+2: Force positive is_ad decision
+.I scripts=0|1|2|3|4|5|6|7
+Javascript filtering options. The default value of 3 is effective against
+js-generated ads, but breaks some sites which are too much dependent on
+Javascript. Fortunately, the built-in Javascript engine mostly solves this
+0: Leave as is
+1: Remove 3rd party scripts except in header
+2: Remove 3rd party scripts from everywhere
+3: (Default) Only allow scripts in header and those 1st party scripts that
+don't contain ".write"
+4: Only allow scripts in header and those 1st party scripts that contain
+"function "
+5: Only allow scripts in header
+6: Only allow 1st party scripts and only in header
+7: Remove all scripts
+.I jsengine=0|1
+Enable Javascript engine. When the Javascript engine is used, the scripts
+parameter is ignored. The output of a script (generated by document.write or
+writeln) is directed to the standard advert detector. If it detects an advert,
+the script gets removed.
+0: Don't use
+1: (Default) Use if possible
+.I target_blank=0|1
+New window attribue for link option. A link may be marked to be opened in a new
+window if target="_blank" is specified as attribute of an <A> tag.
+0: (Default) Leave as is
+1: Remove attribute
+.I [regex]
+For applying specific options to specific sites. Used after defaults have been
+setup. See
+section for further information.
+.B /etc/bfilter/rules.local
+For local rules and redefining the global parameters. Uses the same syntax as
+for the global rules file.
+Rules are used for blocking ads which aren't automatically detected and/or for
+dealing with false positives. The rule format is:
+The regex gets converted to "^http://"+regex+"$" and uses the POSIX extended
+syntax. For those unexperienced with regular expressions, a few explanations:
+.B .
+means any character
+.B \e.
+means the "." character
+.B \e?
+means the "?" character
+.B .*
+means any number of any characters including none
+.B (this|that)
+means "this" or "that"
+.B (something)?
+means "something" or nothing
+You may use any of the global parameters such as filter, ad, scripts or jsengine
+in rules. The parameters you don't specify are implicitly set to the
+corresponding default value.
+It is possible to have several rules match a single url. In this case the lowest
+values for each parameter are used. That is, the values for different parameters
+may be taken from different rules.
+.B Question:
+What is the relationship between rules and rules.local files? Do records in
+rules.local override the ones in rules or supplement them?
+.B Answer:
+It's a rather complex relationship which will be shown in the following
+Suppose the rules file looks like this:
+# Other parameters are omited
+And the rules.local file looks like this:
+First of all, the default
+.I filter=1
+parameter from rules is also implicitly present in rules.local as it's not
+overriden there. Then, although only one parameter is associated with each
+regex in this example, all of the other parameters are also implicitly
+associated with them and their values are taken from defaults of the
+corresponding file. So in reality the [regex1] record also contains
+.I jsengine=1
+and the [regex2] record also contains
+.I jsengine=0.
+Now suppose we want to get the jsengine parameter for an URL that matches
+regex1. First we look for a matching regex in rules.local. Having found none
+we continue to look in rules where we find the [regex1] record that matches the
+given URL. This record has an implicit
+.I jsengine=1
+parameter which we were looking for. If our URL doesn't match any of the
+regexes, we take the default parameter from rules.local which is
+.I jsengine=0
+.B 1)
+All images from hosts or paths with standard advert hostnames or paths are
+classified as adverts and filtered.
+.B 2)
+Allow images from the distributed content provider Akamai.
+.B 3)
+Disable Javascript engine for the Hitweb tracker and uses scripts rules
+setting instead for filtering.
+.B 4)
+Allow images used to count page views for projects hosted on SourceForge.
+Restart bfilter to reload configuration files.
+Sending a
+to all bfilter processes will cause the child processes only to exit after
+handling their last request.
+If the HTML processor is in doubt about an image or a Flash file, it defers
+the decision until the browser has requested that file. The response is then
+analyzed (redirects, cookies) as well as the file itself. For an image, the
+analyzer checks its dimensions and whether it's animated or not. For Flash
+files, the analyzer is trying to find a button that covers most of the object's
+area and has a getURL action associated with it. Depending on the results,
+the object is either forwarded to the client, or substituted with a generated
+replacement. (Unfortunately, analyzing objects that are placed with Javascript
+doesn't work, as their URLs in javascript source cannot be altered.)
+Please report any bugs you may find to:
+Joseph Artsimovich <>
diff --git a/net-proxy/bfilter/files/bfilter.conf b/net-proxy/bfilter/files/bfilter.conf
new file mode 100644
index 000000000000..284edb47cf97
--- /dev/null
+++ b/net-proxy/bfilter/files/bfilter.conf
@@ -0,0 +1,4 @@
+# Config file for /etc/init.d/bfilter
+# See the bfilter(8) man page for possible options to put here.
+BFILTER_OPTS="-u bfilter -g bfilter -r /var/empty"
diff --git a/net-proxy/bfilter/files/bfilter.init b/net-proxy/bfilter/files/bfilter.init
new file mode 100644
index 000000000000..81ebb686e0f1
--- /dev/null
+++ b/net-proxy/bfilter/files/bfilter.init
@@ -0,0 +1,20 @@
+# Copyright 1999-2005 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: /var/cvsroot/gentoo-x86/net-proxy/bfilter/files/bfilter.init,v 1.1 2005/09/19 05:35:35 mrness Exp $
+depend() {
+ need net
+start() {
+ ebegin "Starting bfilter"
+ start-stop-daemon --start --quiet --exec /usr/bin/bfilter -- ${BFILTER_OPTS}
+ eend $?
+stop() {
+ ebegin "Stopping bfilter"
+ start-stop-daemon --stop --quiet --exec /usr/bin/bfilter
+ eend $?
diff --git a/net-proxy/bfilter/files/digest-bfilter-0.9.4 b/net-proxy/bfilter/files/digest-bfilter-0.9.4
new file mode 100644
index 000000000000..6d53abbd4195
--- /dev/null
+++ b/net-proxy/bfilter/files/digest-bfilter-0.9.4
@@ -0,0 +1 @@
+MD5 72ca85565bd4c556b06e3a264c0c24f1 bfilter-0.9.4.tar.gz 1246053
diff --git a/net-proxy/bfilter/metadata.xml b/net-proxy/bfilter/metadata.xml
new file mode 100644
index 000000000000..3add9801350f
--- /dev/null
+++ b/net-proxy/bfilter/metadata.xml
@@ -0,0 +1,18 @@
+<?xml version = '1.0' encoding = 'UTF-8'?>
+<!DOCTYPE pkgmetadata SYSTEM "">
+ <herd>net-proxy</herd>
+ <longdescription>BFilter is a filtering web proxy. It was originally intended for removing banner ads only, but at some point
+it has been extended to remove popups and webbugs. It can't be used as a general purpose filtering proxy
+because it was never intended this way.
+For example you can't just block an arbitrary object, you can only hint the ad detector in its decision making.
+The main advantage BFilter has over the similar tools is its heuristic ad detection algorithm. The traditional
+blocklist-based approach is also implemented, but it's mostly used for dealing with false positives. Unlike
+other tools that require constant updates of their blocklists, BFilter manages to remove over 90% of ads
+even with an empty blocklist!
+The javascript generated ads are not a problem for BFilter, as it has a javascript engine to combat them.
+BFilter is expected work with any browser that supports proxies (nearly any browser does), and can forward
+requests to another HTTP proxy.</longdescription>