import sys
import datetime
from struct import calcsize, unpack_from
from collections import namedtuple
Tick = namedtuple('Tick', ['open', 'close', 'high', 'low', 'time'])
AssetReport = namedtuple('AssetReport', ['year', 'asset', 'wick_percentage', 'start_date_ok', 'end_date_ok', 'num_gaps', 'num_duplicates'])
OLE_TIME_ZERO = datetime.datetime(1899, 12, 30, 0, 0, 0)
YEARS = range(2002, 2013 + 1)
ASSETS = [
'AUDUSD', 'EURUSD', 'EURCHF', 'GBPUSD', 'GER30', 'NAS100',
'NZDUSD', 'SPX500', 'UK100', 'US30', 'USDCAD', 'USDCHF',
'USDJPY', 'USOil', 'XAGUSD', 'XAUUSD'
]
def dt(oledt):
return OLE_TIME_ZERO + datetime.timedelta(days=float(oledt))
def parse_bar_file(filename):
ticks = open(filename, 'rb').read()
format = '<ffffd'
format_size = calcsize(format)
num_ticks = len(ticks) / format_size
ticks = [Tick(*unpack_from(format, ticks, format_size * i)) for i in xrange(num_ticks)]
ticks.reverse()
return ticks
def count_wicks(bars):
has_wick = lambda b: b.high > max(b.open, b.close) or b.low < min(b.open, b.close)
return len([b for b in bars if has_wick(b)])
def verify_starting_date(bars):
return dt(bars[0].time).day in [1, 2, 3, 4, 5]
def verify_end_date(bars):
date = dt(bars[-1].time)
if date.year == datetime.datetime.now().year:
return True
return date.day in [26, 27, 28, 29, 30, 31]
def count_gaps(bars):
num_gaps = 0
for index, bar in enumerate(bars):
if index == 0:
continue
gap = dt(bar.time) - dt(bars[index - 1].time)
if gap > datetime.timedelta(days=3):
num_gaps += 1
return num_gaps
def count_duplicates(bars):
unique_timestamps = set()
num_duplicates = 0
for bar in bars:
if bar.time not in unique_timestamps:
unique_timestamps.add(bar.time)
else:
num_duplicates += 1
return num_duplicates
def generate_reports():
reports = {}
for asset in ASSETS:
print "Generating for", asset, "..."
year_reports = []
for year in YEARS:
try:
bars = parse_bar_file("%s_%d.bar" % (asset, year))
if len(bars) == 0:
raise IOError("No bars in file")
except IOError:
# Some years are missing/empty
year_reports.append(None)
continue
num_wicks = count_wicks(bars)
wick_prc = num_wicks / float(len(bars))
start_date_ok = verify_starting_date(bars)
end_date_ok = verify_end_date(bars)
num_gaps = count_gaps(bars)
num_duplicates = count_duplicates(bars)
year_reports.append(AssetReport(year, asset, wick_prc, start_date_ok, end_date_ok, num_gaps, num_duplicates))
reports[asset] = year_reports
return reports
def print_table(reports, title, output_func):
print title
print "%6s | %s" % ("Asset", " ".join(map(str, YEARS)))
print "-" * 80
for asset, year_reports in reports.items():
sys.stdout.write("%6s | " % (asset,))
for report in year_reports:
if report is None:
sys.stdout.write(" - ")
else:
sys.stdout.write(output_func(report))
sys.stdout.write("\n")
print "\n\n"
def format_date_ok(r):
output = " "
output += "Y" if r.start_date_ok else "N"
output += "/"
output += "Y" if r.end_date_ok else "N"
output += " "
return output
reports = generate_reports()
print_table(reports, "Wick percentages", lambda r: "%4.1f " % (r.wick_percentage * 100, ))
print_table(reports, "Num gaps large than 3 days", lambda r: "%4d " % (r.num_gaps, ))
print_table(reports, "Num duplicates", lambda r: "%4d " % (r.num_duplicates, ))
print_table(reports, "Start/end dates ok", format_date_ok)