import urllib2
import MySQLdb
import simplejson as json
import time
import random
import socket
import sys

con = MySQLdb.connect(host="rocky.stanford.edu", user="jmcauley", passwd="", db="jmcauley")
con.autocommit(True)

cur = con.cursor()

if not "silk" in socket.gethostname():
  while (True):
    x = cur.execute("SELECT COUNT(*) FROM SteamUserQueue")
    uq = cur.fetchone()[0]
    x = cur.execute("SELECT COUNT(*) FROM SteamUserData")
    ug = cur.fetchone()[0]
    print "Steam:", ug, "users crawled", uq, "users queued"
    time.sleep(60)

def wait(wt = 0.1):
  time.sleep(wt)

hostId = int(socket.gethostname().split("silk")[1].split('.')[0]) % 21

amRootNode = (hostId == 0)

#if amRootNode:
if (False):
  try:
    cur.execute("DROP TABLE SteamUserQueue")
  except Exception as e:
    pass
  try:
    cur.execute("DROP TABLE SteamUserGot")
  except Exception as e:
    pass
  try:
    cur.execute("CREATE TABLE SteamUserQueue(steamId VARCHAR(255))")
    cur.execute("CREATE UNIQUE INDEX SteamUserQueue_steamId ON SteamUserQueue(steamId)")
    cur.execute("CREATE TABLE SteamUserGot(steamId VARCHAR(255))")
    cur.execute("CREATE UNIQUE INDEX SteamUserGot_steamId ON SteamUserGot(steamId)")
  except Exception as e:
    print e
    pass
else:
  time.sleep(5)

try:
  cur.execute("CREATE TABLE SteamGamesAll(steamId VARCHAR(255), html MEDIUMBLOB)")
  cur.execute("CREATE UNIQUE INDEX SteamGamesAll_steamId ON SteamGamesAll(steamId)")

  cur.execute("CREATE TABLE SteamReviewsPage(steamId VARCHAR(255), page INT, html MEDIUMBLOB)")
  cur.execute("CREATE UNIQUE INDEX SteamReviewsPage_steamId_page ON SteamReviewsPage(steamId, page)")

  cur.execute("CREATE TABLE SteamUserData(steamId VARCHAR(255), frontPage MEDIUMBLOB, friendsPage MEDIUMBLOB) ROW_FORMAT=COMPRESSED")
  cur.execute("CREATE UNIQUE INDEX SteamUserData_steamId ON SteamUserData (steamId)")
  
  cur.execute("CREATE TABLE SteamBadges(steamId VARCHAR(255), html MEDIUMBLOB)")
  cur.execute("CREATE UNIQUE INDEX SteamBadges_steamId ON SteamBadges(steamId)")
  cur.execute("CREATE TABLE SteamGames(steamId VARCHAR(255), html MEDIUMBLOB)")
  cur.execute("CREATE UNIQUE INDEX SteamGames_steamId ON SteamGames(steamId)")
  cur.execute("CREATE TABLE SteamReviews(steamId VARCHAR(255), html MEDIUMBLOB)")
  cur.execute("CREATE UNIQUE INDEX SteamReviews_steamId ON SteamReviews(steamId)")
  cur.execute("CREATE TABLE SteamGroups(steamId VARCHAR(255), html MEDIUMBLOB)")
  cur.execute("CRAETE UNIQUE INDEX SteamGroups_steamId ON SteamGroups(steamId)")
except Exception as e:
  print e
  pass

def crawlStuff(cursor, userId):
  cursor.execute("SELECT frontPage FROM SteamUserData WHERE steamId='" + userId + "'")
  html = cursor.fetchone()
  if html == None:
    print "No data for user", userId
    return
  html = html[0]
  if html.find(userId + "/games/") != -1:
    try:
      games = getPage("http://steamcommunity.com/" + userId + "/games/")
      cursor.execute("INSERT INTO SteamGames (steamId, html) VALUES(%(steamId)s, %(html)s)", {"steamId":userId, "html":games})
    except Exception as e:
      print e
      pass
  if html.find(userId + "/recommended/") != -1:
    try:
      reviews = getPage("http://steamcommunity.com/" + userId + "/recommended/")
      cursor.execute("INSERT INTO SteamReviews (steamId, html) VALUES(%(steamId)s, %(html)s)", {"steamId":userId, "html":reviews})
    except Exception as e:
      print e
      pass
  if html.find(userId + "/badges/") != -1:
    try:
      badges = getPage("http://steamcommunity.com/" + userId + "/badges/")
      cursor.execute("INSERT INTO SteamBadges (steamId, html) VALUES(%(steamId)s, %(html)s)", {"steamId":userId, "html":badges})
    except Exception as e:
      print e
      pass
  if html.find(userId + "/groups/") != -1:
    try:
      groups = getPage("http://steamcommunity.com/" + userId + "/groups/")
      cursor.execute("INSERT INTO SteamGroups (steamId, html) VALUES(%(steamId)s, %(html)s)", {"steamId":userId, "html":groups})
    except Exception as e:
      print e
      pass

def getPage(url):
  req = urllib2.Request(url, headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/33.0.1750.152 Chrome/33.0.1750.152 Safari/537.36"})
  f = urllib2.urlopen(req, timeout=5)
  s = f.read()
  wait()
  f.close()
  return s

def htmlToFriends(html):
  x = html.split("friendBlockLinkOverlay")[1:]
  friends = [y.split("http://steamcommunity.com/")[1].split('"')[0] for y in x]
  return friends

def getFrontPageFriends(cursor, steamId):
  #print "SELECT FROM SteamUserData"
  cursor.execute("SELECT * FROM SteamUserData WHERE steamId='" + steamId + "'")
  #print "Done"
  rows = cursor.fetchall()
  frontPage, friends = None,None
  if len(rows) == 0:
    try:
      frontPage = getPage("http://steamcommunity.com/" + steamId)
      friendsPage = getPage("http://steamcommunity.com/" + steamId + "/friends")
      
      cursor.execute("INSERT INTO SteamUserData (steamId, frontPage, friendsPage) VALUES(%(steamId)s, %(frontPage)s, %(friendsPage)s)", {"steamId":steamId, "frontPage":frontPage, "friendsPage":friendsPage})
    except Exception as e:
      print e
      return None,[]
  else:
    _, frontPage, friendsPage = rows[0]
  friends = htmlToFriends(friendsPage)
  return frontPage, friends

def gotSomeData(cursor, steamId):
  print "Got", steamId
  try:
    #print "INSERT INTO SteamUserGot (" + steamId + ')'
    cursor.execute("INSERT INTO SteamUserGot (steamId) VALUES(%(steamId)s)", {"steamId":steamId})
    #print "Done"
  except Exception as e:
    pass

def alreadyDone(cursor, steamId):
  #print "SELECT FROM SteamUserGot"
  done = cursor.execute("SELECT * FROM SteamUserGot WHERE steamId='" + steamId + "'")
  #if done: print "Done (already got " + steamId + ')'
  #else: print "Done (need to get " + steamId + ')'
  return done

def enQueueData(cursor, steamId):
  print "Adding", steamId, "to queue"
  try:
    #print "INSERT INTO SteamUserQueue (" + steamId + ')'
    cursor.execute("INSERT INTO SteamUserQueue (steamId) VALUES(%(steamId)s)", {"steamId":steamId})
    #print "Done"
  except Exception as e:
    pass

def deQueueData(cursor):
  #print "SELECT FROM SteamUserQueue"
  cursor.execute("SELECT steamId FROM SteamUserQueue LIMIT 50")
  #print "Done"
  rows = cursor.fetchall()
  steamId = None
  if (len(rows) > 0):
    steamId = random.choice(rows)[0]
  else:
    print "queue is empty"
    return None
  try:
    #print "DELETE FROM SteamUserQueue"
    cursor.execute("DELETE FROM SteamUserQueue WHERE steamId='" + steamId + "'")
    #print "Done"
  except Exception as e:
    #print "DELETE FAILED"
    pass
  print "Pulled", steamId, "from queue"
  return steamId

def crawl():
  if (amRootNode):
    enQueueData(cur, "id/wzx052266")
  else:
    time.sleep(5)
  while(True):
    u = deQueueData(cur)
    if (u == None): break
    if not alreadyDone(cur, u):
      _,friends = getFrontPageFriends(cur, u)
      crawlStuff(cur, u)
      gotSomeData(cur, u)
      for f in set(friends):
        if not alreadyDone(cur, f):
          enQueueData(cur, f)

#crawl()

def allGamesAndPages(cursor):
  moreThan10 = set()
  f = open("/dfs/ilfs2/0/julian/steam/needToGetPages.txt", 'r')
  for l in f.readlines():
    userId, page = l.strip().split()
    moreThan10.add(userId)
    continue
    cursor.execute("SELECT * FROM SteamReviewsPage WHERE steamId='" + userId + "' AND page='" + str(page) + "'")
    if len(cursor.fetchall()) > 0:
      print "Already got", userId, page
      continue
    try:
      rPage = getPage("http://steamcommunity.com/" + userId + "/reviews/?p=" + page)
    except Exception as e:
      print e
      time.sleep(10)
      continue
    cursor.execute("INSERT INTO SteamReviewsPage (steamId, page, html) VALUES(%(steamId)s, %(page)s, %(html)s)", {"steamId":userId, "page":page, "html":rPage})
    print "Inserted", userId, page

  f = open("/dfs/ilfs2/0/julian/steam/needToGetGames.txt", 'r')
  for userId in moreThan10:
    cursor.execute("SELECT * FROM SteamGamesAll WHERE steamId='" + userId + "'")
    if len(cursor.fetchall()) > 0:
      print "Already got", userId
      continue
    try:
      games = getPage("http://steamcommunity.com/" + userId + "/games?tab=all")
    except Exception as e:
      print e
      time.sleep(10)
      continue
    cursor.execute("INSERT INTO SteamGamesAll (steamId, html) VALUES(%(steamId)s, %(html)s)", {"steamId":userId, "html":games})
    print "Inserted", userId

allGamesAndPages(cur)

# Number of views: jsd[0][-1][-1][-1][-1]

