Coverage for IdentifySite.py: 0%

1#!/usr/bin/env python

2# -*- coding: utf-8 -*-

5#This program is free software: you can redistribute it and/or modify

6#it under the terms of the GNU Affero General Public License as published by

7#the Free Software Foundation, version 3 of the License.

9#This program is distributed in the hope that it will be useful,

10#but WITHOUT ANY WARRANTY; without even the implied warranty of

11#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12#GNU General Public License for more details.

13#

14#You should have received a copy of the GNU Affero General Public License

15#along with this program. If not, see <http://www.gnu.org/licenses/>.

16#In the "official" distribution you can find the license in agpl-3.0.txt.

18from __future__ import print_function

21#import L10n

22#_ = L10n.get_translation()

24import re

25import sys

26import os

27from time import time

28from optparse import OptionParser

29import codecs

31import Database

33import Configuration

34import logging

35try:

36 import xlrd

37except:

38 xlrd = None

39# logging has been set up in fpdb.py or HUD_main.py, use their settings:

40log = logging.getLogger("parser")

42re_Divider, re_Head, re_XLS = {}, {}, {}

43re_Divider['PokerStars'] = re.compile(r'^Hand #(\d+)\s*$', re.MULTILINE)

44re_Divider['Fulltilt'] = re.compile(r'\*{20}\s#\s\d+\s\*{15,25}\s?', re.MULTILINE)

45re_Head['Fulltilt'] = re.compile(r'^((BEGIN)?\n)?FullTiltPoker.+\n\nSeat', re.MULTILINE)

46re_XLS['PokerStars'] = re.compile(r'Tournaments\splayed\sby\s\'.+?\'')

47re_XLS['Fulltilt'] = re.compile(r'Player\sTournament\sReport\sfor\s.+?\s$.*$')

49class FPDBFile(object):

50 path = ""

51 ftype = None # Valid: hh, summary, both

52 site = None

53 kodec = None

54 archive = False

55 archiveHead = False

56 archiveDivider = False

57 gametype = False

58 hero = '-'

60 def __init__(self, path):

61 self.path = path

63class Site(object):

65 def __init__(self, name, hhc_fname, filter_name, summary, obj):

66 self.name = name

67 # FIXME: rename filter to hhc_fname

68 self.hhc_fname = hhc_fname

69 # FIXME: rename filter_name to hhc_type

70 self.filter_name = filter_name

71 self.re_SplitHands = obj.re_SplitHands

72 self.codepage = obj.codepage

73 self.copyGameHeader = obj.copyGameHeader

74 self.summaryInFile = obj.summaryInFile

75 self.re_Identify = obj.re_Identify

76 #self.obj = obj

77 if summary:

78 self.summary = summary

79 self.re_SumIdentify = getattr(__import__(summary), summary, None).re_Identify

80 else:

81 self.summary = None

82 self.line_delimiter = self.getDelimiter(filter_name)

83 self.line_addendum = self.getAddendum(filter_name)

84 self.spaces = filter_name == 'Entraction'

85 self.getHeroRegex(obj, filter_name)

87 def getDelimiter(self, filter_name):

88 line_delimiter = None

89 if filter_name == 'PokerStars':

90 line_delimiter = '\n\n'

91 elif filter_name == 'Fulltilt' or filter_name == 'PokerTracker':

92 line_delimiter = '\n\n\n'

93 elif self.re_SplitHands.match('\n\n') and filter_name != 'Entraction':

94 line_delimiter = '\n\n'

95 elif self.re_SplitHands.match('\n\n\n'):

96 line_delimiter = '\n\n\n'

98 return line_delimiter

100 def getAddendum(self, filter_name):

101 line_addendum = ''

102 if filter_name == 'OnGame':

103 line_addendum = '*'

104 elif filter_name == 'Merge':

105 line_addendum = '<'

106 elif filter_name == 'Entraction':

107 line_addendum = '\n\n'

108

109 return line_addendum

110

111 def getHeroRegex(self, obj, filter_name):

112 self.re_HeroCards = None

113 if hasattr(obj, 're_HeroCards'):

114 if filter_name not in ('Bovada', 'Enet'):

115 self.re_HeroCards = obj.re_HeroCards

116 if filter_name == 'PokerTracker':

117 self.re_HeroCards1 = obj.re_HeroCards1

118 self.re_HeroCards2 = obj.re_HeroCards2

119

120class IdentifySite(object):

121 def __init__(self, config, hhcs = None):

122 self.config = config

123 self.codepage = ("utf8", "utf-16", "cp1252", "ISO-8859-1")

124 self.sitelist = {}

125 self.filelist = {}

126 self.generateSiteList(hhcs)

127

128 def scan(self, path):

129 if os.path.isdir(path):

130 self.walkDirectory(path, self.sitelist)

131 else:

132 self.processFile(path)

133

134 def get_fobj(self, file):

135 try:

136 fobj = self.filelist[file]

137 except KeyError:

138 return False

139 return fobj

140

141 def get_filelist(self):

142 return self.filelist

143

144 def clear_filelist(self):

145 self.filelist = {}

146

147 def generateSiteList(self, hhcs):

148 """Generates a ordered dictionary of site, filter and filter name for each site in hhcs"""

149 if not hhcs:

150 hhcs = self.config.hhcs

151 for site, hhc in list(hhcs.items()):

152 filter = hhc.converter

153 filter_name = filter.replace("ToFpdb", "")

154 summary = hhc.summaryImporter

155 mod = __import__(filter)

156 obj = getattr(mod, filter_name, None)

157 try:

158 self.sitelist[obj.siteId] = Site(site, filter, filter_name, summary, obj)

159 except Exception as e:

160 log.error("Failed to load HH importer: %s. %s" % (filter_name, e))

161 self.re_Identify_PT = getattr(__import__("PokerTrackerToFpdb"), "PokerTracker", None).re_Identify

162 self.re_SumIdentify_PT = getattr(__import__("PokerTrackerSummary"), "PokerTrackerSummary", None).re_Identify

163

164 def walkDirectory(self, dir, sitelist):

165 """Walks a directory, and executes a callback on each file"""

166 dir = os.path.abspath(dir)

167 for file in [file for file in os.listdir(dir) if not file in [".",".."]]:

168 nfile = os.path.join(dir,file)

169 if os.path.isdir(nfile):

170 self.walkDirectory(nfile, sitelist)

171 else:

172 self.processFile(nfile)

173

174 def __listof(self, x):

175 if isinstance(x, list) or isinstance(x, tuple):

176 return x

177 else:

178 return [x]

179

180 def processFile(self, path):

181 print('process fill identify',path)

182 if path not in self.filelist:

183 print('filelist', self.filelist)

184 whole_file, kodec = self.read_file(path)

185 # print('whole_file',whole_file)

186 print('kodec',kodec )

187 if whole_file:

188 fobj = self.idSite(path, whole_file, kodec)

189 print('siteid obj')

190 #print(fobj.path)

191 if fobj == False: # Site id failed

192 log.debug(("DEBUG:") + " " + ("siteId Failed for: %s") % path)

193 else:

194 self.filelist[path] = fobj

195

196 def read_file(self, in_path):

197 if in_path.endswith('.xls') or in_path.endswith('.xlsx') and xlrd:

198 try:

199 wb = xlrd.open_workbook(in_path)

200 sh = wb.sheet_by_index(0)

201 header = str(sh.cell(0,0).value)

202 return header, 'utf-8'

203 except:

204 return None, None

205 for kodec in self.codepage:

206 try:

207 infile = codecs.open(in_path, 'r', kodec)

208 whole_file = infile.read()

209 infile.close()

210 return whole_file, kodec

211 except:

212 continue

213 return None, None

214

215 def idSite(self, path, whole_file, kodec):

216 """Identifies the site the hh file originated from"""

217 f = FPDBFile(path)

218 f.kodec = kodec

219 #DEBUG:print('idsite path',path )

220 #DEBUG:print('idsite f',f,f.ftype,f.site,f.gametype )

221

222 #DEBUG:print('idsite self.sitelist.items',self.sitelist.items())

223 for id, site in list(self.sitelist.items()):

224 filter_name = site.filter_name

225 m = site.re_Identify.search(whole_file[:5000])

226 if m and filter_name in ('Fulltilt', 'PokerStars'):

227 m1 = re_Divider[filter_name].search(whole_file.replace('\r\n', '\n'))

228 if m1:

229 f.archive = True

230 f.archiveDivider = True

231 elif re_Head.get(filter_name) and re_Head[filter_name].match(whole_file[:5000].replace('\r\n', '\n')):

232 f.archive = True

233 f.archiveHead = True

234 if m:

235 f.site = site

236 f.ftype = "hh"

237 if f.site.re_HeroCards:

238 h = f.site.re_HeroCards.search(whole_file[:5000])

239 if h and 'PNAME' in h.groupdict():

240 f.hero = h.group('PNAME')

241 else:

242 f.hero = 'Hero'

243 return f

244

245 for id, site in list(self.sitelist.items()):

246 if site.summary:

247 if path.endswith('.xls') or path.endswith('.xlsx'):

248 filter_name = site.filter_name

249 if filter_name in ('Fulltilt', 'PokerStars'):

250 m2 = re_XLS[filter_name].search(whole_file[:5000])

251 if m2:

252 f.site = site

253 f.ftype = "summary"

254 return f

255 else:

256 m3 = site.re_SumIdentify.search(whole_file[:10000])

257 if m3:

258 f.site = site

259 f.ftype = "summary"

260 return f

261

262 m1 = self.re_Identify_PT.search(whole_file[:5000])

263 m2 = self.re_SumIdentify_PT.search(whole_file[:100])

264 if m1 or m2:

265 filter = 'PokerTrackerToFpdb'

266 filter_name = 'PokerTracker'

267 mod = __import__(filter)

268 obj = getattr(mod, filter_name, None)

269 summary = 'PokerTrackerSummary'

270 f.site = Site('PokerTracker', filter, filter_name, summary, obj)

271 if m1:

272 f.ftype = "hh"

273 if re.search(u'\*{2}\sGame\sID\s', m1.group()):

274 f.site.line_delimiter = None

275 f.site.re_SplitHands = re.compile(u'End\sof\sgame\s\d+')

276 elif re.search(u'\*{2}\sHand\s\#\s', m1.group()):

277 f.site.line_delimiter = None

278 f.site.re_SplitHands = re.compile(u'Rake:\s[^\s]+')

279 elif re.search(u'Server\spoker\d+\.ipoker\.com', whole_file[:250]):

280 f.site.line_delimiter = None

281 f.site.spaces = True

282 f.site.re_SplitHands = re.compile(u'GAME\s\#')

283 m3 = f.site.re_HeroCards1.search(whole_file[:5000])

284 if m3:

285 f.hero = m3.group('PNAME')

286 else:

287 m4 = f.site.re_HeroCards2.search(whole_file[:5000])

288 if m4:

289 f.hero = m4.group('PNAME')

290 else:

291 f.ftype = "summary"

292 return f

293

294 return False

295

296 def getFilesForSite(self, sitename, ftype):

297 l = []

298 for name, f in list(self.filelist.items()):

299 if f.ftype != None and f.site.name == sitename and f.ftype == "hh":

300 l.append(f)

301 return l

302

303 def fetchGameTypes(self):

304 for name, f in list(self.filelist.items()):

305 if f.ftype != None and f.ftype == "hh":

306 try: #TODO: this is a dirty hack. Borrowed from fpdb_import

307 name = str(name, "utf8", "replace")

308 except TypeError:

309 log.error(TypeError)

310 mod = __import__(f.site.hhc_fname)

311 obj = getattr(mod, f.site.filter_name, None)

312 hhc = obj(self.config, in_path = name, sitename = f.site.hhc_fname, autostart = False)

313 if hhc.readFile():

314 f.gametype = hhc.determineGameType(hhc.whole_file)

315

316def main(argv=None):

317 if argv is None:

318 argv = sys.argv[1:]

319

320 Configuration.set_logfile("fpdb-log.txt")

321 config = Configuration.Config(file = "HUD_config.test.xml")

322 in_path = os.path.abspath('regression-test-files')

323 IdSite = IdentifySite(config)

324 start = time()

325 IdSite.scan(in_path)

326 print('duration', time() - start)

327

328 print("\n----------- SITE LIST -----------")

329 for sid, site in list(IdSite.sitelist.items()):

330 print("%2d: Name: %s HHC: %s Summary: %s" %(sid, site.name, site.filter_name, site.summary))

331 print("----------- END SITE LIST -----------")

332

333 print("\n----------- ID REGRESSION FILES -----------")

334 count = 0

335 for f, ffile in list(IdSite.filelist.items()):

336 tmp = ""

337 tmp += ": Type: %s " % ffile.ftype

338 count += 1

339 if ffile.ftype == "hh":

340 tmp += "Conv: %s" % ffile.site.hhc_fname

341 elif ffile.ftype == "summary":

342 tmp += "Conv: %s" % ffile.site.summary

343 print(f, tmp)

344 print(count, 'files identified')

345 print("----------- END ID REGRESSION FILES -----------")

346

347 print("----------- RETRIEVE FOR SINGLE SITE -----------")

348 IdSite.getFilesForSite("PokerStars", "hh")

349 print("----------- END RETRIEVE FOR SINGLE SITE -----------")

350

351if __name__ == '__main__':

352 sys.exit(main())