Question
→ How would you improve this code
- The Error Handling sucks
- Bad Usage of Net::HTTP
- Bad Usage of Classes and Hierachy
- etc
Thanks in advance.
require ‘rubygems’
require ‘hpricot’
require ‘net/http’
class ProxyList < Net::HTTP
URL = 'www.samair.ru'
FILE = File.dirname(__FILE__) + '/proxylist.dat'
IPADDR_AT = 4
@port_values = {}
@proxylist = []
def self.random_useragent
USER_AGENT[rand(USER_AGENT.size - 1)]
end
def self.strip_html(str)
str.gsub(/</?[^>]*>/, ‘’)
end
def self.retreive_proxylist(n = 10)
begin
puts “Retreiving proxylist from server: #{URL}”
http = self.new(URL, self.default_port)
begin
@port_values.clear if @port_values.length > 0
file = File.new(FILE, ‘w’)
1.upto(n) do |i|
page = “/proxy/proxy-%02d.htm” % i
puts “Retreiving #{URL}#{page} from #{URL}”
response = http.get2(page)
document = Hpricot(response.body)
# Hidden Keys?
script =
document.search(‘//head/script[@type=“text/javascript”]’)
script.each do |element|
str = element.inner_html
# /([_a-zA-Z][_a-zA-Z])(?:\s=\s*([^$]+))?;/
str.gsub(/([^;]+)/) {
|match|
var, val = *(match.split(/=/)[0, 2])
@port_values[var.to_sym] = val
}
end
# Proxies
td = document.search(‘//table.tablelist/tr/td’)
td.each_with_index do |element, index|
# Hijos de la riviera maya
if index % IPADDR_AT == 0 then
# wtf!!!
s = element.inner_html
ip, port = *(s.split(/<script/, 2)[0, 2])
next if port == nil
port = port.scan(/(?:\+[a-z])+/).to_s
port.gsub!('+', '')
ip_port = ""
port.each_byte {
|c|
begin
ip_port << @port_values[c.chr.to_sym]
rescue TypeError => e
# do nothing
end
}
@proxylist << "#{ip}:#{ip_port}"
end
end
end
@proxylist.each do |proxy|
file.write(proxy + "\r\n")
end
rescue Exception => e
puts "[file=#{e.inspect}]"
ensure
file.close
end
rescue Exception => e
puts “[http=#{e.inspect}]”
end
end
end