Learn to write DSL's the right way


by Ara T. Howard

I was working with a certain popular ruby testing library today to prepare a pull request and hit an issue with it i've hit many times: passing and/or failing tests due to the testing framework itself polluting the hell out of the global namespaces. Don't believe it? Check this out:


polluted_by('net-http-digest_auth') #=> 0
polluted_by('curb') #=> 0
polluted_by('xpath') #=> 0
polluted_by('mysql') #=> 0
polluted_by('wrap') #=> 0
polluted_by('database_cleaner') #=> 0
polluted_by('will_paginate') #=> 0
polluted_by('debugger-linecache') #=> 0
polluted_by('debugger-ruby_core_source') #=> 0
polluted_by('mustache') #=> 0
polluted_by('watirgrid') #=> 0
polluted_by('diff-lcs') #=> 0
polluted_by('warden') #=> 0
polluted_by('url_mount') #=> 0
polluted_by('ejs') #=> 0
polluted_by('multipart-post') #=> 0
polluted_by('erubis') #=> 0
polluted_by('multi_json') #=> 0
polluted_by('nokogiri') #=> 0
polluted_by('capybara') #=> 0
polluted_by('unf_ext') #=> 0
polluted_by('open4') #=> 0
polluted_by('tilt') #=> 0
polluted_by('orm_adapter') #=> 0
polluted_by('cancan') #=> 0
polluted_by('pg') #=> 0
polluted_by('fastthread') #=> 0
polluted_by('thor') #=> 0
polluted_by('ffi') #=> 0
polluted_by('temple') #=> 0
polluted_by('flock') #=> 0
polluted_by('tagz') #=> 0
polluted_by('forkify') #=> 0
polluted_by('formatador') #=> 0
polluted_by('columnize') #=> 0
polluted_by('syntax') #=> 0
polluted_by('fssm') #=> 0
polluted_by('subexec') #=> 0
polluted_by('rack') #=> 0
polluted_by('grb') #=> 0
polluted_by('spork') #=> 0
polluted_by('slop') #=> 0
polluted_by('slim') #=> 0
polluted_by('rack-cache') #=> 0
polluted_by('shoulda-matchers') #=> 0
polluted_by('mixlib-cli') #=> 0
polluted_by('hashie') #=> 0
polluted_by('rack-protection') #=> 0
polluted_by('mime-types') #=> 0
polluted_by('hike') #=> 0
polluted_by('hiredis') #=> 0
polluted_by('http_parser.rb') #=> 0
polluted_by('rails_current') #=> 0
polluted_by('ruby_gntp') #=> 0
polluted_by('i18n') #=> 0
polluted_by('ruby_cli') #=> 0
polluted_by('journey') #=> 0
polluted_by('rails_default_url_options') #=> 0
polluted_by('ruby-hmac') #=> 0
polluted_by('middleman-livereload') #=> 0
polluted_by('rspec-mocks') #=> 0
polluted_by('rmagick') #=> 0
polluted_by('libv8') #=> 0
polluted_by('libwebsocket') #=> 0
polluted_by('linguistics') #=> 0
polluted_by('listen') #=> 0
polluted_by('rails_helper') #=> 0
polluted_by('coderay') #=> 0
polluted_by('benchmark-ips') #=> 0
polluted_by('bigdecimal') #=> 0
polluted_by('redis-namespace') #=> 0
polluted_by('redis') #=> 0
polluted_by('redcarpet') #=> 0
polluted_by('rdoc') #=> 0
polluted_by('map') #=> 0
polluted_by('rbtree') #=> 0
polluted_by('rb-fsevent') #=> 0
polluted_by('childprocess') #=> 0
polluted_by('bson') #=> 0
polluted_by('rails_nav') #=> 0
polluted_by('builder') #=> 0
polluted_by('method_source') #=> 0
polluted_by('chunky_png') #=> 1
polluted_by('albino') #=> 1
polluted_by('webrobots') #=> 1
polluted_by('http_router') #=> 1
polluted_by('sanitize') #=> 1
polluted_by('net-http-persistent') #=> 1
polluted_by('faraday') #=> 1
polluted_by('shoulda') #=> 1
polluted_by('excon') #=> 1
polluted_by('simple_oauth') #=> 1
polluted_by('faraday_middleware') #=> 1
polluted_by('posix-spawn') #=> 1
polluted_by('shoulda-context') #=> 1
polluted_by('arrayfields') #=> 2
polluted_by('mini_magick') #=> 2
polluted_by('daemon_controller') #=> 2
polluted_by('fastercsv') #=> 2
polluted_by('unicode-display_width') #=> 3
polluted_by('eventmachine') #=> 3
polluted_by('rack-ssl') #=> 3
polluted_by('em-websocket') #=> 3
polluted_by('narray') #=> 3
polluted_by('babosa') #=> 3
polluted_by('rest-client') #=> 3
polluted_by('ruby-prof') #=> 3
polluted_by('launchy') #=> 4
polluted_by('ruby_ngrams') #=> 4
polluted_by('domain_name') #=> 4
polluted_by('unf') #=> 4
polluted_by('fukung') #=> 4
polluted_by('txt2img') #=> 5
polluted_by('hashr') #=> 5
polluted_by('io-console') #=> 23
polluted_by('arel') #=> 102
polluted_by('mysql2') #=> 102
polluted_by('logrotate') #=> 102
polluted_by('compass') #=> 103
polluted_by('highline') #=> 103
polluted_by('compass-rails') #=> 103
polluted_by('net-ssh-gateway') #=> 104
polluted_by('net-sftp') #=> 104
polluted_by('net-ssh') #=> 104
polluted_by('selenium-webdriver') #=> 104
polluted_by('net-scp') #=> 107
polluted_by('capistrano-file_db') #=> 108
polluted_by('capistrano') #=> 108
polluted_by('daemons') #=> 115
polluted_by('tzinfo') #=> 115
polluted_by('sqlite3') #=> 116
polluted_by('uuidtools') #=> 116
polluted_by('chronic') #=> 117
polluted_by('coerce') #=> 117
polluted_by('rack-test') #=> 118
polluted_by('thin') #=> 119
polluted_by('sinatra') #=> 119
polluted_by('ansi') #=> 119
polluted_by('amalgalite') #=> 120
polluted_by('mechanize') #=> 134
polluted_by('polyglot') #=> 276
polluted_by('metaclass') #=> 276
polluted_by('haml') #=> 278
polluted_by('uuid') #=> 279
polluted_by('systemu') #=> 279
polluted_by('threadify') #=> 279
polluted_by('macaddr') #=> 279
polluted_by('treetop') #=> 282
polluted_by('mongo') #=> 283
polluted_by('uglifier') #=> 283
polluted_by('coffee-script') #=> 283
polluted_by('execjs') #=> 283
polluted_by('moped') #=> 290
polluted_by('sass') #=> 380
polluted_by('ruby-growl') #=> 395
polluted_by('locomotive_liquid') #=> 402
polluted_by('pygments.rb') #=> 551
polluted_by('rubypython') #=> 551
polluted_by('blankslate') #=> 551
polluted_by('friendly_id') #=> 552
polluted_by('w3c_validators') #=> 554
polluted_by('json') #=> 554
polluted_by('rspec-expectations') #=> 555
polluted_by('dragonfly') #=> 556
polluted_by('rake') #=> 563
polluted_by('pry') #=> 582
polluted_by('pry-nav') #=> 582
polluted_by('mongoid-sequence') #=> 832
polluted_by('awesome_print') #=> 860
polluted_by('debugger') #=> 1387
polluted_by('maruku') #=> 1401
polluted_by('fattr') #=> 1662
polluted_by('guard-livereload') #=> 1762
polluted_by('guard') #=> 1762
polluted_by('guard-spork') #=> 1762
polluted_by('guard-rspec') #=> 1762
polluted_by('RedCloth') #=> 1762
polluted_by('unidecoder') #=> 1763
polluted_by('sprockets') #=> 1765
polluted_by('multi_xml') #=> 1775
polluted_by('httparty') #=> 1776
polluted_by('fog') #=> 1776
polluted_by('bundler') #=> 1783
polluted_by('main') #=> 1783
polluted_by('rspec-core') #=> 1936
polluted_by('faker') #=> 2042
polluted_by('mail') #=> 2064
polluted_by('factory_girl') #=> 2244
polluted_by('rubyforge') #=> 2333
polluted_by('rspec') #=> 2490
polluted_by('carrierwave') #=> 2502
polluted_by('redis-persistence') #=> 2876
polluted_by('grid') #=> 2901
polluted_by('origin') #=> 3179
polluted_by('rego') #=> 3443
polluted_by('aws-s3') #=> 3453
polluted_by('padrino-core') #=> 3758
polluted_by('routing-filter') #=> 4224
polluted_by('coffee-rails') #=> 5389
polluted_by('mocha') #=> 5627
polluted_by('middleman-blog') #=> 7412
polluted_by('middleman-core') #=> 7412
polluted_by('padrino-helpers') #=> 8555
polluted_by('turn') #=> 9110
polluted_by('rails-backbone') #=> 9289
polluted_by('rails-i18n') #=> 9289
polluted_by('seo_meta') #=> 9289
polluted_by('rails') #=> 9289
polluted_by('flash_cookie_session') #=> 9289
polluted_by('factory_girl_rails') #=> 10398
polluted_by('middleman') #=> 11376
polluted_by('middleman-sprockets') #=> 11376
polluted_by('middleman-more') #=> 11376
polluted_by('sass-rails') #=> 13315
polluted_by('devise') #=> 14956
polluted_by('kaminari') #=> 15248
polluted_by('globalize3') #=> 16173
polluted_by('acts_as_indexed') #=> 16173
polluted_by('whenever') #=> 19247
polluted_by('tinder') #=> 19248
polluted_by('redmon') #=> 19530
polluted_by('refinerycms-core') #=> 20593
polluted_by('refinerycms-dashboard') #=> 20593
polluted_by('refinerycms-codemirror') #=> 20593
polluted_by('refinerycms-copywriting') #=> 20593
polluted_by('refinerycms-i18n') #=> 20593
polluted_by('refinerycms-page-images') #=> 20633
polluted_by('refinerycms-testing') #=> 20869
polluted_by('refinerycms-videojs') #=> 20872
polluted_by('refinerycms-images') #=> 20872
polluted_by('refinerycms-resources') #=> 20872
polluted_by('refinerycms-pages') #=> 21147
polluted_by('refinerycms-authentication') #=> 21162
polluted_by('refinerycms') #=> 21443
polluted_by('custom_fields') #=> 21756
polluted_by('carrierwave-mongoid') #=> 21756
polluted_by('mongoid-bolt') #=> 27588
polluted_by('mongoid-lock') #=> 27588
polluted_by('mongoid') #=> 27588
polluted_by('mongoid-grid_fs') #=> 27588
view raw polluted.rb.txt hosted with ❤ by GitHub

the results were produced using this script


libs = ARGV.size == 0 ? all_gems : ARGV
list =
libs.
forkify(16){|lib| puts "#{ lib }..."; [lib, polluted_by(lib)]}.
sort_by{|pair| pair.last}
list.each do |lib, polluted|
puts "polluted_by('#{ lib }') #=> #{ polluted }"
end
BEGIN {
require 'forkify'
STDOUT.sync = true
def all_gems
`gem list --local`.split(/\n/).map{|line| line.scan(/[^\s]+/).first}.uniq - stupidly_loading_gems
end
def stupidly_loading_gems
%w[
]
end
def polluted_by(lib)
before = Methods.summary
required = false
open('/dev/null', 'w+') do |fd|
stderr = STDERR.dup
#STDERR.reopen(fd)
begin
libnames = [lib, lib.gsub('-', '_'), lib.gsub('-', '/')]
loaded = false
libnames.each do |libname|
begin
require(libname)
break(loaded = true)
rescue Object
next
end
end
return -1 unless loaded
ensure
STDERR.reopen(stderr)
end
end
after = Methods.summary
before.polluted_by(after)
end
class Methods
attr_accessor :klass
attr_accessor :class_methods
attr_accessor :instance_methods
def initialize(klass)
@klass = klass
@class_methods = klass.methods
@instance_methods = klass.instance_methods(false)
end
def polluted_by(other)
(other.class_methods - class_methods).size + (other.instance_methods - instance_methods).size
end
class Summary < ::Hash
def polluted_by(other)
total = 0
each do |klass, methods|
if other.has_key?(klass)
total += methods.polluted_by(other[klass])
end
end
total
end
end
def Methods.summary
summary = Summary.new
ObjectSpace.each_object(Class){|klass| summary[klass] = Methods.new(klass)}
summary
end
end
}
view raw polluted.rb hosted with ❤ by GitHub

some people have claimed that this will always be a side effect of having nice DSLs, but this is simply not true:


# the entire concept of building a dsl means defining domain terms on an
# object, it's so much simpler to start with the dsl itself being a blank
# slate that simply relays certain methods to a scope
#
class DSL
instance_methods.each do |m|
undef_method m unless m[%r/\A__|\Aobject_id\Z/]
end
def __call__(&block)
Object.instance_method(:instance_eval).bind(self).call(&block)
end
def DSL.scope(scope, &block)
dsl =
Object.instance_method(:tap).bind(allocate).call do |dsl|
dsl.__call__ do
@object = @scope = scope
end
end
dsl.__call__(&block)
end
end
# because then you can do whatever the hell you want, including catching
# mehods defined on the object. you should *never* instance_eval in the
# actual objects for serious dsl's. hook up dsl terms one by one to your
# object... nearly all dsls get this backwards.
#
class ArrayDSL < ::DSL
def push(*args, &block)
@object.push(*args, &block)
ensure
puts "pushed #{ args.inspect } onto #{ @object.class }(#{ @object.inspect }) via the dsl..."
end
def initialize(*args, &block)
@object.clear
@object.push(*args, &block)
end
end
# then we can change thinking from "the dsl of this object" to evaluating a
# set of code with an object as the context/scope
#
ArrayDSL.scope Array.new do
initialize 1,2,3
push 43 #=> pushed [43] onto Array([1, 2, 3, 43]) via the dsl...
end
# so even for really complex dsl's like testing frameworks we need only
# realize that the *test itself* is the scope and build our fancy pants
# methods on the bloody dsl, not every damn object in ObjectSpace...
#
# this is just an example about how nearly any syntax can be contructed
# without polluting Object using the concepts of scope and a proxied blank
# slate. of course this impl is crap - but it shows that it can be easily
# be done.
#
class Spec
class Suite < ::Array
def run
each do |test|
status = test.run
puts "#{ test.name } #=> #{ status }"
end
end
def prefixes
@prefixes ||= []
end
class Name < ::String
def Name.for(*args)
args.join(' ').scan(/\w+/).join('_')
end
def Name.path_for(prefixes, *args)
'/' + [prefixes, Name.for(args)].join('/')
end
end
class Test
attr_accessor :suite
attr_accessor :name
attr_accessor :block
def initialize(suite, name, &block)
@suite = suite
@name = name
@block = block
end
def run
status = DSL.scope(self, &@block)
end
class Value
def initialize(lhs)
@lhs = lhs
end
def should(condition)
condition.call(@lhs)
end
end
class Condition
attr_accessor :type
attr_accessor :rhs
def initialize(type, rhs)
@type = type
@rhs = rhs
end
def call(lhs)
case type.to_s
when /eql/
lhs == rhs ? :success : :failure
else
raise ArgumentError.new(type.inspect)
end
end
end
class DSL < ::DSL
def value(value)
Value.new(value)
end
def eql(value)
Condition.new(:eql, value)
end
end
end
class DSL < ::DSL
def describe(*args, &block)
suite = @object
suite.prefixes.push(Name.for(args))
__call__(&block)
ensure
suite.prefixes.pop
end
def it(*args, &block)
suite = @object
name = Name.path_for(suite.prefixes, args)
test = Test.new(suite, name, &block)
suite.push(test)
end
end
end
def Spec.suite(&block)
suite = Suite.new
Suite::DSL.scope(suite, &block)
suite.run
end
end
# and, even with this hacked together in 10 minutes impl we can easily imagine
# powerful syntaxes that do not hork the global namespace
#
Spec.suite do
describe "something important..." do
it "should use silly english descriptions" do
value( 42 ).should eql 42.0.to_i
end
it "without fubaring every object's namespace..." do
value( 42 ).should eql 'forty-two'
end
end
end
# and here we prove it...
#
BEGIN {
n = 0
ObjectSpace.each_object(Class) do |c|
n += c.methods.size
n += c.instance_methods(false).size
end
puts "BEFORE: #{ n } methods"
puts
}
END {
}
n = 0
ObjectSpace.each_object(Class) do |c|
n += c.methods.size
n += c.instance_methods(false).size
end
puts
puts "AFTER: #{ n } methods"
__END__
BEFORE: 21280 methods
pushed [43] onto Array([1, 2, 3, 43]) via the dsl...
/something_important/should_use_silly_english_descriptions #=> success
/something_important/without_fubaring_every_object_s_namespace #=> failure
AFTER: 22281 methods
view raw dsl.rb hosted with ❤ by GitHub

so, learn how to write DSLs the right way people. the right way is any way that doesn't dump methods all over ever object and namespace without an extremely good reason.



"it should read nice" is not a good reason.