import gzip
import os

def parse(path):
  g = gzip.open(path, 'r')
  for l in g:
    yield eval(l)

# descs = {}

# for d in parse("descriptions.json.gz"):
#   descs[d['asin']] = d['description']

mfiles = ["categoryFiles/" + x for x in os.listdir("categoryFiles") if "meta_" in x and ".gz" in x] + ["metadata.json.gz"]

for m in mfiles:
  print m
  z = open(m.replace(".gz",''), 'w')
  # seen = set()
  for d in parse(m):
    if d.has_key("description"):
      try:
        d['description'] = d['description'].encode("ascii", "ignore")
      except Exception as e:
        print e
    z.write(str(d) + '\n')
  #   asin = d['asin']
  #   if asin in seen:
  #     continue
  #   seen.add(asin)
  #   if asin in descs:
  #     d['description'] = descs[asin]
  #   z.write(str(d) + '\n')
