from collections import defaultdict
import sys
import math

import os
import gzip
from collections import defaultdict
import string
from commands import getstatusoutput as gso
#import numpy
#import scipy
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
from matplotlib import mpl
import math
from pylab import *

import locale
import time
import datetime
import sys

from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)


#locale.setlocale(locale.LC_ALL, 'en_US')
import matplotlib.font_manager 
prop = matplotlib.font_manager.FontProperties(size=9)

a = open("/afs/cs.stanford.edu/u/jmcauley/scratch/beercode/language/yelpExp/language/yelp_phoenix_academic_dataset/yelp_academic_dataset_review.json", 'r')
cats = set()
#out = open("categories.txt", 'w')

helpful = {}
for l in a.readlines():
  #print l
  l = eval(l.replace("true", "True").replace("false", "False"))
  helpful[l['user_id'], l['business_id']] = l['votes']['useful']
  
#print helpful

usersByHelpfulness = defaultdict(list)
for u,b in helpful.keys():
  usersByHelpfulness[u].append(helpful[u,b])
for u in usersByHelpfulness.keys():
  usersByHelpfulness[u] = sum(usersByHelpfulness[u]) * 1.0 / len(usersByHelpfulness[u])

helpfulVsimilar = defaultdict(list)

for K in 5,10,20,50:
  a = open("similarity" + str(K))
  for l in a.readlines():
    l = l.strip().split()
    u,b = l[0],l[1]
    score = float(l[2])
    if (not helpful.has_key((u,b))): continue
    helpfulness = helpful[u,b]
    if (score < 0 or score > 1 or math.isnan(score)): continue
    helpfulVsimilar[helpfulness].append(score)

  plot = []
  for h in helpfulVsimilar.keys():
    plot.append((h, sum(helpfulVsimilar[h]) / len(helpfulVsimilar[h])))
  plot.sort()

  fig = plt.figure(figsize=(2.5,2.5))
  ax = fig.add_subplot(1,1,1)
  fig.subplots_adjust(bottom=0.3,top=0.9,left=0.3,right=0.9)
  plotX = [x[0] for x in plot]
  plotY = [x[1] for x in plot]
  ax.plot(plotX[:11],plotY[:11],'g',lw=2)
  plt.title("yelp, $K = " + str(K) + "$")
  plt.xticks((0,10))
  ax.yaxis.set_major_locator(MaxNLocator(6))
  plt.xlabel("review's `useful' rating")
  plt.ylabel(r"$d(\theta_i, \vartheta_{u,i})$")
  savefig("plots/yelp" + str(K) + '.pdf')

