#!/usr/bin/python # -*- coding: utf-8 -*- import os, re, subprocess, sys def search_substr(f, str): for l in f: if l.find(str) >= 0: return l return None def get_price(str): str = str.replace(', ', ',') money_re = re.compile('^-?\d+,\d\d$') fields = str.split() fields.reverse() for f in fields: if money_re.match(f): return float(f.replace(',', '.')) def next_price(f, str): p = search_substr(f, str) if p: price = get_price(p) print 'next_price(%s) -> line "%s" -> %.2f' % (str, p.strip(), price) return price return None def print_price(p): ks = p.keys() ks.sort() for k in ks: print '%-30s: %10.2f €' % (k, p[k]) if len(sys.argv) != 2: print 'Usage:', sys.argv[0], '' sys.exit(-1) pdfpath = sys.argv[1] txtpath = pdfpath[:-3] + 'txt' # create text file assert subprocess.call(['pdftotext', '-enc', 'UTF-8', '-layout', '-nopgbrk', pdfpath]) == 0 try: f = open(txtpath) finally: os.unlink(txtpath) # so that we don't forget later price = {} common = next_price(f, 'Monatliche Beträge') #other = next_price(f, 'Sonstige Leistungen des Konzerns') #if other: # common = common + other bill_vat = next_price(f, 'Umsatzsteuer 19 %') bill_gross = next_price(f, 'Rechnungsbetrag') bill_item_re = re.compile('^\s*\d+\.\s*.*19') # Telekom assert search_substr(f, 'Summe Monatliche Beträge') assert search_substr(f, '...........') assert search_substr(f, '...........') for l in f: if l.find('..........') >= 0: break if bill_item_re.match(l): assert cur_number p = get_price(l) price[cur_number] = price.setdefault(cur_number, 0) + p print 'adding %.2f to %s for "%s"' % (p, cur_number, l.strip()) if l.find('Summe Verbindungen für oben angegebene Rufnummer') >= 0: cur_number = None if l.find('Rufnummer (') >= 0: cur_number = l.strip().split(' ')[0].strip() print l.strip(), ' -> switching to', cur_number assert search_substr(f, '...........') # other companies if search_substr(f, 'Beträge anderer Anbieter'): for l in f: if l.find('Summe Beträge anderer Anbieter') >= 0: break if bill_item_re.match(l): assert cur_number p = get_price(l) print 'adding %.2f to %s for "%s"' % (p, cur_number, l.strip()) price[cur_number] = price.setdefault(cur_number, 0) + p if l.find('Rufnummer (') >= 0: cur_number = l.strip().split(' ')[0].strip() print l.strip(), ' -> switching to', cur_number print '----------------------------------------' print 'Summen:' print_price(price) print 'Allgemeine Gebühren: %.2f' % common common = common / len(price) sum = 0 for k, v in price.iteritems(): price[k] = v + common sum = sum + price[k] print '----------------------------------------' print 'Verrechnung allgemeine Gebühren (Aufschlag für jeden: %.2f)' % common print_price(price) vat = sum * 0.19 gross = sum + vat for k, v in price.iteritems(): price[k] *= 1.19 print '----------------------------------------' print 'Aufschlag Mehrwertsteuer:' print_price(price) print '----------------------------------------' print 'Berechnete MwST: %.3f, Rechnungs-MwSt: %.2f' % (vat, bill_vat) print 'Berechnete Bruttosume: %.3f, Rechnungs-Bruttosumme: %.2f' % (gross, bill_gross)