You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

111 lines
3.8KB

  1. #!/usr/bin/env python3
  2. import argparse
  3. import psycopg2 as pg
  4. def get_args():
  5. p = argparse.ArgumentParser()
  6. p.add_argument("-H", "--host",help="Hostname or IP", default="localhost")
  7. p.add_argument("-p", "--port",help="Port of the service", default="5432")
  8. p.add_argument("-d", "--dbname",help="database to connect", default="postgres")
  9. p.add_argument("-U", "--user",help="Username to connect with", required=True)
  10. p.add_argument("-P", "--pass",help="Password to use", required=True)
  11. p.add_argument("-t", "--table", help="Table to delete dupes", required=True)
  12. p.add_argument("-w", "--where", help="Custom where clause")
  13. p.add_argument("--delete", help="Perform the deletion", action='store_true')
  14. return vars(p.parse_args())
  15. def get_dupes_count_query(args, cols):
  16. dupes_count_query = "SELECT COUNT(*) FROM (SELECT "
  17. dupes_count_query += ", ".join(cols['name'])
  18. dupes_count_query += f", count(*) FROM {args['table']}"
  19. dupes_count_query += f" where {args['where']}" if args['where'] else ""
  20. dupes_count_query += " GROUP BY "
  21. dupes_count_query += ", ".join(cols['name'])
  22. dupes_count_query += " having count(*) > 1"
  23. dupes_count_query += ") A"
  24. return dupes_count_query
  25. def gen_coal_values(cols):
  26. coal = []
  27. for i in range(len(cols['type'])):
  28. if cols['type'][i] in ('character varying', 'text'):
  29. coal += [f"COALESCE(a.{cols['name'][i]}, 'NULL')=COALESCE(b.{cols['name'][i]}, 'NULL')"]
  30. elif cols['type'][i] in ('timestamp', 'timestamp without time zone'):
  31. coal += [f"COALESCE(a.{cols['name'][i]}, '1970-01-01 00:00:00')=COALESCE(b.{cols['name'][i]}, '1970-01-01 00:00:00')"]
  32. elif cols['type'][i] in ('integer', 'double precision'):
  33. coal += [f"COALESCE(a.{cols['name'][i]}, -1)=COALESCE(b.{cols['name'][i]}, -1)"]
  34. else:
  35. coal += [f"COALESCE(a.{cols['name'][i]}, -1)=COALESCE(b.{cols['name'][i]}, -1)"]
  36. return coal
  37. def get_del_query(args, cols):
  38. del_query = f"DELETE FROM {args['table']} a using (select min(ctid) as ctid, "
  39. del_query += ", ".join(cols['name'])
  40. del_query += f", count(*) from {args['table']} group by "
  41. del_query += ", ".join(cols['name'])
  42. del_query += " having count(*) > 1) b where "
  43. del_query += " and ".join(cols['coal'])
  44. del_query += " and a.ctid <> b.ctid"
  45. del_query += f" and a.{args['where']}" if args['where'] else ""
  46. return del_query
  47. def main():
  48. args = get_args()
  49. conn = pg.connect(f"host={args['host']} port={args['port']} dbname={args['dbname']} user={args['user']} password={args['pass']}")
  50. cur = conn.cursor()
  51. schema,table = args['table'].split(".")
  52. cur.execute(f"select data_type, column_name from information_schema.columns where table_schema = '{schema}' and table_name = '{table}'")
  53. tmp = cur.fetchall()
  54. cols = {}
  55. cols['type'] = [x[0] for x in tmp]
  56. cols['name'] = [x[1] for x in tmp]
  57. dupes_count_query = get_dupes_count_query(args, cols)
  58. cur.execute(dupes_count_query)
  59. total_dupes = [x[0] for x in cur.fetchall()][0]
  60. print(f"[*] Total dupes: {total_dupes}")
  61. if not args['delete']:
  62. return
  63. if total_dupes == 0:
  64. print("[!] No dupes to delete... Exiting...")
  65. return
  66. cols['coal'] = gen_coal_values(cols)
  67. del_query = get_del_query(args, cols)
  68. cur.execute(del_query)
  69. print("[+] Dupes deleted")
  70. cur.execute(dupes_count_query)
  71. total_dupes = [x[0] for x in cur.fetchall()][0]
  72. print(f"[*] Total dupes: {total_dupes}")
  73. user_input = input("Shall we commit? [Y/n]: ")
  74. if user_input.lower() == 'n':
  75. print("[*] Rollbacking ...")
  76. cur.execute("rollback")
  77. print("[+] Done!")
  78. return
  79. cur.execute("commit")
  80. print("[+] Commited!")
  81. if __name__=='__main__':
  82. exit(main())