Author: Michael R. Crusoe <michael.crusoe@gmail.com>
Description: 2to3
--- hisat2.orig/evaluation/build_indexes.py
+++ hisat2/evaluation/build_indexes.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 use_message = '''
@@ -47,7 +47,7 @@
                 assert False
             else:
                 assert False
-            print >> sys.stderr, cmd
+            print(cmd, file=sys.stderr)
             os.system(cmd)
             os.chdir("..")
 
--- hisat2.orig/evaluation/generate_reads.py
+++ hisat2/evaluation/generate_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, random
 from argparse import ArgumentParser, FileType
@@ -18,7 +18,7 @@
     assert len(random_list) == len(reads)
     for i in random_list:
         read = reads[random_list[i]]
-        print >> read_file_out, "\n".join(read)
+        print("\n".join(read), file=read_file_out)
     read_file_out.close()
 
 
@@ -73,11 +73,11 @@
     data_dir_base = "../../../data"
 
     def generate_reads(cmd):
-        print >> sys.stderr, cmd
+        print(cmd, file=sys.stderr)
         os.system(cmd)
 
         random.seed(0)
-        print >> sys.stderr, "shuffle reads sim_1.fa and sim_2.fa"
+        print("shuffle reads sim_1.fa and sim_2.fa", file=sys.stderr)
         shuffle_pairs("sim_1.fa", "sim_2.fa")
         shuffle_reads_cmd = " mv sim_1.fa.shuffle sim_1.fa"
         shuffle_reads_cmd += "; mv sim_2.fa.shuffle sim_2.fa"
--- hisat2.orig/evaluation/get_data.py
+++ hisat2/evaluation/get_data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 from argparse import ArgumentParser, FileType
@@ -21,7 +21,7 @@
         if os.path.exists(file):
             continue
         wget_cmd = "wget %s/data/%s" % (data_root, file)
-        print >> sys.stderr, wget_cmd
+        print(wget_cmd, file=sys.stderr)
         os.system(wget_cmd)
     os.chdir("..")
 
@@ -42,7 +42,7 @@
                 continue
             cmd = "wget %s/indexes/%s.tar.gz; tar xvzf %s.tar.gz; rm %s.tar.gz" % \
                 (data_root, aligner_dir, aligner_dir, aligner_dir)
-            print >> sys.stderr, cmd
+            print(cmd, file=sys.stderr)
             os.system(cmd)
     os.chdir("..")
 
@@ -83,7 +83,7 @@
                 continue
             cmd = "wget %s/reads/%s/%s.tar.gz; tar xvzf %s.tar.gz; rm %s.tar.gz" % \
                 (data_root, type, file, file, file)
-            print >> sys.stderr, cmd
+            print(cmd, file=sys.stderr)
             os.system(cmd)
         os.chdir("..")
     
--- hisat2.orig/evaluation/get_programs.py
+++ hisat2/evaluation/get_programs.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 use_message = '''
@@ -112,7 +112,7 @@
             cmd += "; source ./source_me.sh; make; cp bin/%s ../bin; cd .." % (program)
         else:
             assert False
-        print >> sys.stderr, cmd
+        print(cmd, file=sys.stderr)
         os.system(cmd)
 
     files = ["hisat2", "hisat2-align-s", "hisat2-build", "hisat2-build-s", "hisat2-inspect", "hisat2-inspect-s", "extract_splice_sites.py", "hisat2_extract_snps_haplotypes_UCSC.py", "hisat2_simulate_reads.py"]
--- hisat2.orig/evaluation/real/calculate_read_cost.py
+++ hisat2/evaluation/real/calculate_read_cost.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, subprocess, signal
 import multiprocessing
@@ -86,7 +86,7 @@
 
     genome_file.close()
 
-    print >> sys.stderr, "genome is loaded"
+    print("genome is loaded", file=sys.stderr)
     
     return chr_dic
 
@@ -111,7 +111,7 @@
             data = int(data)
         snps[chr].append([snpID, type, int(pos), data])
 
-    print >> sys.stderr, "snp is loaded"
+    print("snp is loaded", file=sys.stderr)
 
     return snps
 
@@ -158,7 +158,7 @@
     gtf_file.close()
     
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -170,7 +170,7 @@
 
     # Calculate and print the unique junctions
     junctions = set()
-    for chrom, strand, exons in trans.values():
+    for chrom, strand, exons in list(trans.values()):
         for i in range(1, len(exons)):
             junctions.add(to_junction_str([chrom, exons[i-1][1], exons[i][0]]))
 
@@ -411,7 +411,7 @@
             high = mid - 1
 
     snps = []
-    for i in xrange(low, len(chr_snps)):
+    for i in range(low, len(chr_snps)):
         snp = chr_snps[i]
         snpID, type, pos, data = snp
         pos2 = pos
@@ -539,7 +539,7 @@
 
             if cigar_op == "S":
                 if i != 0 and i != len(cigars) - 1:
-                    print >> sys.stderr, "S is located at %dth out of %d %s" % (i+1, len(cigars), cigar_str)
+                    print("S is located at %dth out of %d %s" % (i+1, len(cigars), cigar_str), file=sys.stderr)
 
             if cigar_op in "MS":
                 ref_pos = right_pos
@@ -596,8 +596,8 @@
         if hisat2:
             XM, NM = HISAT2_XM, HISAT2_NM
         if NM < MAX_EDIT:
-            print >> temp_read_file, "%s\t%d\t%s\t%s\t%s\tXM:i:%d\tNM:i:%d" % \
-                  (read_id, flag, chr, pos, cigar_str, XM, NM)
+            print("%s\t%d\t%s\t%s\t%s\tXM:i:%d\tNM:i:%d" % \
+                  (read_id, flag, chr, pos, cigar_str, XM, NM), file=temp_read_file)
 
             found = False
             me = "%s\t%s\t%d" % (read_id, chr, pos)
@@ -611,8 +611,8 @@
                             flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr_str, mate_pos, mate_cigar_str, mate_XM, mate_NM = \
                                   mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM
 
-                        print >> temp_pair_file, "%s\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d" % \
-                              (read_id, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM)
+                        print("%s\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d\t%d\t%s\t%d\t%s\tXM:i:%d\tNM:i:%d" % \
+                              (read_id, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM, flag, chr, pos, cigar_str, XM, NM), file=temp_pair_file)
                         found = True
                         break
 
@@ -764,7 +764,7 @@
 
 def is_concordantly(read_id, flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM):
     concord_length = 1000
-    segment_length = sys.maxint
+    segment_length = sys.maxsize
 
     pairs = {}
     pairs[0] = [flag, chr, pos, cigar_str, XM, NM]
@@ -819,7 +819,7 @@
 
         # check concordantly
         concord_align, segment_len = is_concordantly(read_id, flag, chr, pos, cigar_str, XM, NM, mate_flag, mate_chr, mate_pos, mate_cigar_str, mate_XM, mate_NM)
-        print >> (con_file if concord_align else discon_file), line.strip(), ('none', 'first')[(flag & 0x40 == 0x40)], ('none', 'last')[(mate_flag & 0x80 == 0x80)], segment_len
+        print(line.strip(), ('none', 'first')[(flag & 0x40 == 0x40)], ('none', 'last')[(mate_flag & 0x80 == 0x80)], segment_len, file=(con_file if concord_align else discon_file))
 
         if junction_pair:
             for junction_str, is_gtf_junction in pair_junctions:
@@ -911,7 +911,7 @@
 
 def create_sql_db(sql_db):
     if os.path.exists(sql_db):
-        print >> sys.stderr, sql_db, "already exists!"
+        print(sql_db, "already exists!", file=sys.stderr)
         return
 
     columns = [
@@ -961,14 +961,14 @@
     database_fname = database_name + "_" + paired + ".analysis"
     database_file = open(database_fname, "w")
 
-    print >> database_file, "aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime"
+    print("aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime", file=database_file)
     for aligner in aligners:
         for edit_distance in range(MAX_EDIT):
             sql_row = "SELECT aligner, use_annotation, end_type, edit_distance, mapped_reads, junction_reads, gtf_junction_reads, junctions, gtf_junctions, runtime FROM Mappings"
             sql_row += " WHERE reads = '%s' and aligner = '%s' and edit_distance = %d and end_type = '%s' ORDER BY created DESC LIMIT 1" % (database_name, aligner, edit_distance, paired)
             output = sql_execute(sql_db, sql_row)
             if output:
-                print >> database_file, output
+                print(output, file=database_file)
             
     database_file.close()
 
@@ -1072,7 +1072,7 @@
         gtf_junctions.append(junction)
     gtf_junctions = sorted(gtf_junctions, cmp=junction_cmp)            
 
-    print >> sys.stderr, "aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime"
+    print("aligner\tuse_annotation\tend_type\tedit_distance\tmapped_reads\tjunction_reads\tgtf_junction_reads\tjunctions\tgtf_junctions\truntime", file=sys.stderr)
     
     for paired in [False, True]:
         if not paired and not single_end:
@@ -1219,7 +1219,7 @@
                 if version != "":
                     version = int(version)
                 else:
-                    version = sys.maxint
+                    version = sys.maxsize
 
                 if not RNA:
                     cmd += ["--no-spliced-alignment"]
@@ -1441,7 +1441,7 @@
                         dummy_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../one.fq", "../two.fq", "/dev/null")
                         start_time = datetime.now()
                         if verbose:
-                            print >> sys.stderr, start_time, "\t", " ".join(dummy_cmd)
+                            print(start_time, "\t", " ".join(dummy_cmd), file=sys.stderr)
                         if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]:
                             proc = subprocess.Popen(dummy_cmd, stdout=open("/dev/null", "w"), stderr=subprocess.PIPE)
                         else:
@@ -1451,7 +1451,7 @@
                         duration = finish_time - start_time
                         duration = duration.total_seconds()
                         if verbose:
-                            print >> sys.stderr, finish_time, "duration:", duration
+                            print(finish_time, "duration:", duration, file=sys.stderr)
                         loading_time = duration
 
                 # align all reads
@@ -1459,7 +1459,7 @@
                     sweep_read_cmd = "cat ../%s ../%s > /dev/null" % (type_read1_fname, type_read2_fname)
                 else:
                     sweep_read_cmd = "cat ../%s > /dev/null" % (type_read1_fname)
-                print >> sys.stderr, datetime.now(), "\t", sweep_read_cmd
+                print(datetime.now(), "\t", sweep_read_cmd, file=sys.stderr)
                 os.system(sweep_read_cmd)
 
                 skip_alignment = False
@@ -1470,7 +1470,7 @@
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname)
                     start_time = datetime.now()
                     if verbose:
-                        print >> sys.stderr, start_time, "\t", " ".join(aligner_cmd)
+                        print(start_time, "\t", " ".join(aligner_cmd), file=sys.stderr)
                     if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa", "vg", "minimap2"]:
                         proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                     else:
@@ -1483,14 +1483,14 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, finish_time, "duration:", duration
+                        print(finish_time, "duration:", duration, file=sys.stderr)
 
                     if verbose:
-                        print >> sys.stderr, finish_time, "Memory Usage: %dMB" % (int(mem_usage) / 1024)
+                        print(finish_time, "Memory Usage: %dMB" % (int(mem_usage) / 1024), file=sys.stderr)
 
                     if debug and aligner == "hisat" and type == "x1":
                         os.system("cat metrics.out")
-                        print >> sys.stderr, "\ttime: %.4f" % (duration)
+                        print("\ttime: %.4f" % (duration), file=sys.stderr)
                         # break
 
                 if aligner == "star" and type in ["", "gtf"]:
@@ -1498,7 +1498,7 @@
                 elif aligner in ["hisat2", "hisat"] and type == "x2":
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                     if verbose:
-                        print >> sys.stderr, start_time, "\t", " ".join(aligner_cmd)
+                        print(start_time, "\t", " ".join(aligner_cmd), file=sys.stderr)
                     start_time = datetime.now()
                     proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                     proc.communicate()
@@ -1508,7 +1508,7 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, finish_time, "duration:", duration
+                        print(finish_time, "duration:", duration, file=sys.stderr)
                 elif aligner == "star" and type == "x2":
                     assert os.path.exists("SJ.out.tab")
                     os.system("awk 'BEGIN {OFS=\"\t\"; strChar[0]=\".\"; strChar[1]=\"+\"; strChar[2]=\"-\";} {if($5>0){print $1,$2,$3,strChar[$4]}}' SJ.out.tab > SJ.out.tab.Pass1.sjdb")
@@ -1517,18 +1517,18 @@
                             continue
                         os.remove(file)
                     star_index_cmd = "STAR --genomeDir ./ --runMode genomeGenerate --genomeFastaFiles ../../../../data/genome.fa --sjdbFileChrStartEnd SJ.out.tab.Pass1.sjdb --sjdbOverhang 100 --runThreadN %d" % (num_threads)
-                    print >> sys.stderr, "\t", datetime.now(), star_index_cmd
+                    print("\t", datetime.now(), star_index_cmd, file=sys.stderr)
                     os.system(star_index_cmd)
                     if verbose:
-                        print >> sys.stderr, "\t", datetime.now(), " ".join(dummy_cmd)
+                        print("\t", datetime.now(), " ".join(dummy_cmd), file=sys.stderr)
                     proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                     proc.communicate()
                     if verbose:
-                        print >> sys.stderr, "\t", datetime.now(), "finished"
+                        print("\t", datetime.now(), "finished", file=sys.stderr)
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                     start_time = datetime.now()
                     if verbose:
-                        print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                        print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                     proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                     proc.communicate()
                     finish_time = datetime.now()
@@ -1537,7 +1537,7 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, "\t", finish_time, "finished:", duration
+                        print("\t", finish_time, "finished:", duration, file=sys.stderr)
                     os.system("mv Aligned.out.sam %s" % out_fname)
                 elif aligner == "tophat2":
                     os.system("samtools sort -n tophat_out/accepted_hits.bam accepted_hits; samtools view -h accepted_hits.bam > %s" % out_fname)
@@ -1597,8 +1597,8 @@
                             sql_execute("../" + sql_db_name, sql_insert)     
                     
 
-                    print >> sys.stderr, output,
-                    print >> done_file, output
+                    print(output, end=' ', file=sys.stderr)
+                    print(output, file=done_file)
                 else:
                     sum = [0, 0, 0, 0, 0]
                     stat = read_stat(read_sam, gtf_junctions, chr_dic)
@@ -1616,8 +1616,8 @@
                                     (workdir, genome, "single", aligner_name, get_aligner_version(aligner), "no", i, mapped_reads, junction_reads, gtf_junction_reads, num_junctions, num_gtf_junctions, duration, platform.node(), " ".join(aligner_cmd))
                             sql_execute("../" + sql_db_name, sql_insert)                    
                         
-                    print >> sys.stderr, output,
-                    print >> done_file, output
+                    print(output, end=' ', file=sys.stderr)
+                    print(output, file=done_file)
                     
                 done_file.close()
 
--- hisat2.orig/evaluation/real/init.py
+++ hisat2/evaluation/real/init.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, signal
 import string, re
@@ -51,7 +51,7 @@
                  not os.path.exists(read_dir_base + read_dir + "/1.fq"):
                 continue
 
-        print >> sys.stderr, "Processing", read_dir, "..."
+        print("Processing", read_dir, "...", file=sys.stderr)
 
         os.mkdir(read_dir)
         os.chdir(read_dir)
@@ -74,18 +74,18 @@
 
             if dir_name == "whole":
                 ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_1_name)
-                print >> sys.stderr, ln_cmd
+                print(ln_cmd, file=sys.stderr)
                 os.system(ln_cmd)
                 ln_cmd = "ln -s ../../%s%s/%s ." % (read_dir_base, read_dir, fq_2_name)
-                print >> sys.stderr, ln_cmd
+                print(ln_cmd, file=sys.stderr)
                 os.system(ln_cmd)
             else:
                 cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_1_name, num_reads)
-                print >> sys.stderr, cmd
+                print(cmd, file=sys.stderr)
                 os.system(cmd)
 
                 cmd = make_cat_cmd(gz_file, read_dir_base, read_dir, fq_2_name, num_reads)
-                print >> sys.stderr, cmd
+                print(cmd, file=sys.stderr)
                 os.system(cmd)
 
             os.system("ln -s ../../calculate_read_cost.py .")
--- hisat2.orig/evaluation/simulation/calculate_read_cost.py
+++ hisat2/evaluation/simulation/calculate_read_cost.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, subprocess
 import multiprocessing
@@ -170,18 +170,18 @@
                     repeat_db[rptRefName][rptName].allele[allele_idx].add_position(chr, pos, strand)
 
     else:
-        print >> sys.stderr, 'Cannot open file', repeat_filename
+        print('Cannot open file', repeat_filename, file=sys.stderr)
 
 
-    print >> sys.stderr, 'Build repeatMap'
+    print('Build repeatMap', file=sys.stderr)
     repeat_map = {}
-    for rptRefName, repeats in repeat_db.items():
+    for rptRefName, repeats in list(repeat_db.items()):
         #print 'Processing', rptRefName
         repeat_pos_list = []
 
-        for repeatName, repeat in repeats.items():
+        for repeatName, repeat in list(repeats.items()):
             #print 'Common Allele:', repeatName, repeat.repeat_name
-            repeat_left = sys.maxint
+            repeat_left = sys.maxsize
             repeat_right = 0
 
             #for allele_id, repeatAllele in repeat.allele.items():
@@ -242,11 +242,11 @@
 
     i = find_leftmost_pos(rmap, left)
     if i >= len(rmap):
-        print >> sys.stderr, 'Value Error'
+        print('Value Error', file=sys.stderr)
         return
 
     if right > rmap[i][0]:
-        print >> sys.stderr, 'Not repeat'
+        print('Not repeat', file=sys.stderr)
         return
 
     repeat = repeats[rmap[i][1]]
@@ -260,7 +260,7 @@
         if (left >= rpos) and (right <= rpos + rlen):
             offset = left - rpos
             for genome_pos in allele.positions:
-                print genome_pos[0], genome_pos[1] + offset + 1, genome_pos[2], genome_pos[1]
+                print(genome_pos[0], genome_pos[1] + offset + 1, genome_pos[2], genome_pos[1])
 
 """
 """
@@ -284,7 +284,7 @@
 
     genome_file.close()
 
-    print >> sys.stderr, "genome is loaded"
+    print("genome is loaded", file=sys.stderr)
     
     return chr_dic
 
@@ -331,7 +331,7 @@
     gtf_file.close()
     
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -343,7 +343,7 @@
 
     # Calculate and print the unique junctions
     junctions = set()
-    for chrom, strand, exons in trans.values():
+    for chrom, strand, exons in list(trans.values()):
         for i in range(1, len(exons)):
             junctions.add(to_junction_str([chrom, exons[i-1][1], exons[i][0]]))
 
@@ -381,7 +381,7 @@
                     repeat_dic[rep][allele].append([chr, pos, strand])
                     repeat_pos[rep][allele].add(pos)
 
-    for rep, repeats in repeat_info.items():
+    for rep, repeats in list(repeat_info.items()):
         def my_cmp(a, b):
             if a[1] < b[1]:
                 return -1
@@ -687,11 +687,11 @@
 
     i = find_leftmost_pos(rmap, left)
     if i >= len(rmap):
-        print >> sys.stderr, 'Value Error'
+        print('Value Error', file=sys.stderr)
         return alignments
 
     if right > rmap[i][0]:
-        print >> sys.stderr, 'Not repeat'
+        print('Not repeat', file=sys.stderr)
         return alignments
 
     repeat = repeats[rmap[i][1]]
@@ -775,7 +775,7 @@
             prev_read_id = read_id
             continue
 
-        NH, NM, XA = "", sys.maxint, []
+        NH, NM, XA = "", sys.maxsize, []
         for i in range(11, len(cols)):
             col = cols[i]
             # "nM" from STAR
@@ -867,7 +867,7 @@
             chr, pos, cigar_str = alignment
             pos, cigar_str, NM_real = adjust_alignment(chr, pos, cigar_str)
             p_str = "%s\t%s\t%d\t%s\tNM:i:%d" % (read_id, chr, pos, cigar_str, NM_real)
-            print >> outfile, p_str
+            print(p_str, file=outfile)
 
         if aligner == "hisat2":
             if prev_read_id != read_id:
@@ -940,7 +940,7 @@
     num_aligned_reads, num_ualigned_reads = 0, 0
     
     prev_read_id, pair_list = "", set()
-    prev_NM = sys.maxint
+    prev_NM = sys.maxsize
     prev_NH1, prev_NH2 = 0, 0
     NH1_real, NH2_real = 0, 0
 
@@ -969,7 +969,7 @@
         if read_id != prev_read_id:
             num_pairs += 1
             pair_list = set()
-            prev_NM = sys.maxint
+            prev_NM = sys.maxsize
 
         flag = int(flag)
         canonical_pos1, canonical_pos2 = int(pos1), int(pos2)
@@ -982,7 +982,7 @@
             continue
 
         concordant = (flag & 0x2 != 0)        
-        NH, NM1, YT, XA = sys.maxint, sys.maxint, "", []
+        NH, NM1, YT, XA = sys.maxsize, sys.maxsize, "", []
         for i in range(11, len(cols)):
             col = cols[i]
             # "nM" from STAR
@@ -1151,7 +1151,7 @@
 
                         if p_str not in pair_reported:
                             pair_reported.add(p_str)
-                            print >> outfile, p_str
+                            print(p_str, file=outfile)
 
             if not me in read_dic:
                 read_dic[me] = []
@@ -1348,7 +1348,7 @@
     file.close()
 
     temp_junctions, temp_gtf_junctions = set(), set()
-    for read_name, can_junctions in db_junction_dic.items():
+    for read_name, can_junctions in list(db_junction_dic.items()):
         if len(can_junctions) <= 0:
             continue
 
@@ -1492,7 +1492,7 @@
                     break
 
         if found:
-            print >> mapped_file, read_name
+            print(read_name, file=mapped_file)
             mapped += 1
             if snp_included:
                 snp_mapped += 1
@@ -1501,7 +1501,7 @@
                 if snp_included:
                     snp_unique_mapped += 1
             if found_at_first:
-                print >> first_mapped_file, read_name
+                print(read_name, file=first_mapped_file)
                 first_mapped += 1
                 if snp_included:
                     snp_first_mapped += 1
@@ -1527,7 +1527,7 @@
             false_can_junctions += 1
         else:
             false_noncan_junctions += 1
-    print >> sys.stderr, "\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions)
+    print("\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions), file=sys.stderr)
     
     return mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \
         snp_mapped, snp_unique_mapped, snp_first_mapped, snp_unmapped, \
@@ -1587,7 +1587,7 @@
     file.close()
 
     temp_junctions, temp_gtf_junctions = set(), set()
-    for read_name, can_junctions in db_junction_dic.items():
+    for read_name, can_junctions in list(db_junction_dic.items()):
         if len(can_junctions) <= 0:
             continue
 
@@ -1767,17 +1767,17 @@
                     break
 
         if found:
-            print >> mapped_file, read_name
+            print(read_name, file=mapped_file)
             mapped += 1
             if snp_included:
                 snp_mapped += 1
             if len(maps) == 1:
                 unique_mapped += 1
-                print >> uniq_mapped_file, read_name
+                print(read_name, file=uniq_mapped_file)
                 if snp_included:
                     snp_unique_mapped += 1
             if found_at_first:
-                print >> first_mapped_file, read_name
+                print(read_name, file=first_mapped_file)
                 first_mapped += 1
                 if snp_included:
                     snp_first_mapped += 1
@@ -1802,7 +1802,7 @@
             false_can_junctions += 1
         else:
             false_noncan_junctions += 1
-    print >> sys.stderr, "\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions)
+    print("\t\t\tfalse junctions: %d (canonical), %d (non-canonical)" % (false_can_junctions, false_noncan_junctions), file=sys.stderr)
         
     
     return mapped, unique_mapped, first_mapped, unmapped, aligned, multi_aligned, \
@@ -1832,9 +1832,9 @@
                 write = read_id in mapped_ids
 
             if write:
-                print >> mapped_file, line[:-1]
+                print(line[:-1], file=mapped_file)
             else:
-                print >> unmapped_file, line[:-1]
+                print(line[:-1], file=unmapped_file)
 
         read_file.close()
         mapped_file.close()
@@ -1865,7 +1865,7 @@
 """
 def create_sql_db(sql_db):
     if os.path.exists(sql_db):
-        print >> sys.stderr, sql_db, "already exists!"
+        print(sql_db, "already exists!", file=sys.stderr)
         return
     
     columns = [
@@ -1936,14 +1936,14 @@
             database_fname += "_single"
         database_fname += ".analysis"
         database_file = open(database_fname, "w")
-        print >> database_file, "end_type\ttype\taligner\tnum_reads\ttime\tmem\tmapped_reads\tunique_mapped_reads\tunmapped_reads\tmapping_point\ttrue_gtf_junctions\ttemp_junctions\ttemp_gtf_junctions"
+        print("end_type\ttype\taligner\tnum_reads\ttime\tmem\tmapped_reads\tunique_mapped_reads\tunmapped_reads\tmapping_point\ttrue_gtf_junctions\ttemp_junctions\ttemp_gtf_junctions", file=database_file)
         for aligner in aligners:
             for read_type in read_types:
                 sql_row = "SELECT end_type, type, aligner, num_reads, time, mem, mapped_reads, unique_mapped_reads, unmapped_reads, mapping_point, snp_mapped_reads, snp_unique_mapped_reads, snp_unmapped_reads, true_gtf_junctions, temp_junctions, temp_gtf_junctions FROM ReadCosts"
                 sql_row += " WHERE genome = '%s' and head = '%s' and aligner = '%s' and type = '%s' and end_type = '%s' ORDER BY created DESC LIMIT 1" % (genome_name, database_name, aligner, read_type, end_type)
                 output = sql_execute(sql_db, sql_row)
                 if output:
-                    print >> database_file, output
+                    print(output, file=database_file)
 
         database_file.close()
 
@@ -2069,7 +2069,7 @@
             type_sam_file.close()
             if numreads <= 0:
                 continue
-            print >> sys.stderr, "%s\t%d" % (readtype, numreads)
+            print("%s\t%d" % (readtype, numreads), file=sys.stderr)
 
             junctions, junctions_set = [], set()
             type_junction_file = open(type_junction_fname)
@@ -2208,7 +2208,7 @@
                     if version != "":
                         version = int(version)
                     else:
-                        version = sys.maxint
+                        version = sys.maxsize
 
                     if not RNA:
                         cmd += ["--no-spliced-alignment"]
@@ -2417,7 +2417,7 @@
                 if not RNA and readtype != "all":
                     continue
 
-                print >> sys.stderr, "\t%s\t%s" % (aligner_name, str(datetime.now()))
+                print("\t%s\t%s" % (aligner_name, str(datetime.now())), file=sys.stderr)
                 if options != "":
                     option_name = options.replace(' ', '').replace('-', '').replace(',', '')
                     aligner_name = aligner_name + '_' + option_name
@@ -2457,7 +2457,7 @@
                             dummy_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../one.fa", "../two.fa", "/dev/null")
                             start_time = datetime.now()
                             if verbose:
-                                print >> sys.stderr, start_time, "\t", " ".join(dummy_cmd)
+                                print(start_time, "\t", " ".join(dummy_cmd), file=sys.stderr)
                             if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]:
                                 proc = subprocess.Popen(dummy_cmd, stdout=open("/dev/null", "w"), stderr=subprocess.PIPE)
                             else:
@@ -2467,14 +2467,14 @@
                             duration = finish_time - start_time
                             duration = duration.total_seconds()
                             if verbose:
-                                print >> sys.stderr, finish_time, "duration:", duration
+                                print(finish_time, "duration:", duration, file=sys.stderr)
                             loading_time = duration
 
                     # Align all reads
                     aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname)
                     start_time = datetime.now()
                     if verbose:
-                        print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                        print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                     if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa", "vg", "minimap2"]:
                         proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                     else:
@@ -2487,11 +2487,11 @@
                     if duration < 0.1:
                         duration = 0.1
                     if verbose:
-                        print >> sys.stderr, "\t", finish_time, "finished:", duration
+                        print("\t", finish_time, "finished:", duration, file=sys.stderr)
 
                     if debug and aligner == "hisat2":
                         os.system("cat metrics.out")
-                        print >> sys.stderr, "\ttime: %.4f" % (duration)
+                        print("\ttime: %.4f" % (duration), file=sys.stderr)
 
                     if aligner == "star" and type in ["", "gtf"]:
                         os.system("mv Aligned.out.sam %s" % out_fname)
@@ -2499,7 +2499,7 @@
                         aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                         start_time = datetime.now()
                         if verbose:
-                            print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                            print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                         proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
                         proc.communicate()
                         finish_time = datetime.now()
@@ -2508,7 +2508,7 @@
                         if duration < 0.1:
                             duration = 0.1
                         if verbose:
-                            print >> sys.stderr, "\t", finish_time, "finished:", duration
+                            print("\t", finish_time, "finished:", duration, file=sys.stderr)
                     elif aligner == "star" and type == "x2":
                         assert os.path.exists("SJ.out.tab")
                         os.system("awk 'BEGIN {OFS=\"\t\"; strChar[0]=\".\"; strChar[1]=\"+\"; strChar[2]=\"-\";} {if($5>0){print $1,$2,$3,strChar[$4]}}' SJ.out.tab > SJ.out.tab.Pass1.sjdb")
@@ -2518,18 +2518,18 @@
                             os.remove(file)
                         star_index_cmd = "%s/STAR --genomeDir ./ --runMode genomeGenerate --genomeFastaFiles ../../../data/%s.fa --sjdbFileChrStartEnd SJ.out.tab.Pass1.sjdb --sjdbOverhang 99 --runThreadN %d" % (aligner_bin_base, genome, num_threads)
                         if verbose:
-                            print >> sys.stderr, "\t", datetime.now(), star_index_cmd
+                            print("\t", datetime.now(), star_index_cmd, file=sys.stderr)
                         os.system(star_index_cmd)
                         if verbose:
-                            print >> sys.stderr, "\t", datetime.now(), " ".join(dummy_cmd)
+                            print("\t", datetime.now(), " ".join(dummy_cmd), file=sys.stderr)
                         proc = subprocess.Popen(dummy_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                         proc.communicate()
                         if verbose:
-                            print >> sys.stderr, "\t", datetime.now(), "finished"
+                            print("\t", datetime.now(), "finished", file=sys.stderr)
                         aligner_cmd = get_aligner_cmd(RNA, aligner, type, index_type, version, options, "../" + type_read1_fname, "../" + type_read2_fname, out_fname, 1)
                         start_time = datetime.now()
                         if verbose:
-                            print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                            print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
                         proc = subprocess.Popen(aligner_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                         proc.communicate()
                         finish_time = datetime.now()
@@ -2538,7 +2538,7 @@
                         if duration < 0.1:
                             duration = 0.1
                         if verbose:
-                            print >> sys.stderr, "\t", finish_time, "finished:", duration
+                            print("\t", finish_time, "finished:", duration, file=sys.stderr)
                         os.system("mv Aligned.out.sam %s" % out_fname)
                     elif aligner == "tophat2":
                         os.system("samtools sort -n tophat_out/accepted_hits.bam accepted_hits; samtools view -h accepted_hits.bam > %s" % out_fname)
@@ -2551,7 +2551,7 @@
                         os.system("tar cvzf %s.tar.gz %s &> /dev/null" % (out_fname, out_fname))
 
                 if runtime_only:
-                    print >> sys.stderr, "\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024)
+                    print("\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024), file=sys.stderr)
                     os.chdir("..")
                     continue
 
@@ -2560,7 +2560,7 @@
                     pid_list = []
                     if paired:
                         if mp_mode:
-                            for i in xrange(mp_num):
+                            for i in range(mp_num):
                                 p = Process(target=extract_pair, args=(out_fname, out_fname2, chr_dic, RNA, aligner, version, repeat_db, repeat_map, debug_dic, i))
                                 pid_list.append(p)
                                 p.start()
@@ -2570,7 +2570,7 @@
 
                             # merge 
                             os.system("mv %s %s" % (out_fname2 + ".0", out_fname2))
-                            for i in xrange(1, mp_num):
+                            for i in range(1, mp_num):
                                 os.system("cat %s >> %s" % (out_fname2 + "." + str(i), out_fname2))
                                 os.system("rm %s" % (out_fname2 + "." + str(i)))
 
@@ -2581,7 +2581,7 @@
                     else:
                         if mp_mode:
                             # Prepare queues
-                            for i in xrange(mp_num): 
+                            for i in range(mp_num): 
                                 p = Process(target=extract_single, args=(out_fname, out_fname2, chr_dic, aligner, version, repeat_db, repeat_map, debug_dic, i))
                                 pid_list.append(p)
                                 p.start()
@@ -2592,7 +2592,7 @@
 
                             # merge 
                             os.system("mv %s %s" % (out_fname2 + ".0", out_fname2))
-                            for i in xrange(1, mp_num):
+                            for i in range(1, mp_num):
                                 os.system("cat %s >> %s" % (out_fname2 + "." + str(i), out_fname2))
                                 os.system("rm %s" % (out_fname2 + "." + str(i)))
                             
@@ -2628,25 +2628,25 @@
                     assert mapped + unmapped == numreads
                     
                     if two_step:
-                        print >> sys.stderr, "\t\t%s" % readtype2
-                    print >> sys.stderr, "\t\taligned: %d, multi aligned: %d" % (aligned, multi_aligned)
-                    print >> sys.stderr, "\t\tcorrectly mapped: %d (%.2f%%) mapping_point: %.2f" % (mapped, float(mapped) * 100.0 / numreads, mapping_point * 100.0 / numreads)
-                    print >> sys.stderr, "\t\tcorrectly mapped at first: %d (%.2f%%)" % (first_mapped, float(first_mapped) * 100.0 / numreads)
-                    print >> sys.stderr, "\t\tuniquely and correctly mapped: %d (%.2f%%)" % (unique_mapped, float(unique_mapped) * 100.0 / numreads)
+                        print("\t\t%s" % readtype2, file=sys.stderr)
+                    print("\t\taligned: %d, multi aligned: %d" % (aligned, multi_aligned), file=sys.stderr)
+                    print("\t\tcorrectly mapped: %d (%.2f%%) mapping_point: %.2f" % (mapped, float(mapped) * 100.0 / numreads, mapping_point * 100.0 / numreads), file=sys.stderr)
+                    print("\t\tcorrectly mapped at first: %d (%.2f%%)" % (first_mapped, float(first_mapped) * 100.0 / numreads), file=sys.stderr)
+                    print("\t\tuniquely and correctly mapped: %d (%.2f%%)" % (unique_mapped, float(unique_mapped) * 100.0 / numreads), file=sys.stderr)
                     snp_numreads = snp_mapped + snp_unmapped
                     if snp_numreads > 0:
-                        print >> sys.stderr, "\t\t\t\tSNP: reads: %d" % (snp_numreads)
-                        print >> sys.stderr, "\t\t\t\tSNP: correctly mapped: %d (%.2f%%)" % (snp_mapped, float(snp_mapped) * 100.0 / snp_numreads)
-                        print >> sys.stderr, "\t\t\t\tSNP: correctly mapped at first: %d (%.2f%%)" % (snp_first_mapped, float(snp_first_mapped) * 100.0 / snp_numreads)
-                        print >> sys.stderr, "\t\t\t\tSNP: uniquely and correctly mapped: %d (%.2f%%)" % (snp_unique_mapped, float(snp_unique_mapped) * 100.0 / snp_numreads)
+                        print("\t\t\t\tSNP: reads: %d" % (snp_numreads), file=sys.stderr)
+                        print("\t\t\t\tSNP: correctly mapped: %d (%.2f%%)" % (snp_mapped, float(snp_mapped) * 100.0 / snp_numreads), file=sys.stderr)
+                        print("\t\t\t\tSNP: correctly mapped at first: %d (%.2f%%)" % (snp_first_mapped, float(snp_first_mapped) * 100.0 / snp_numreads), file=sys.stderr)
+                        print("\t\t\t\tSNP: uniquely and correctly mapped: %d (%.2f%%)" % (snp_unique_mapped, float(snp_unique_mapped) * 100.0 / snp_numreads), file=sys.stderr)
                     if readtype == readtype2:
-                        print >> sys.stderr, "\t\t\t%d reads per sec (all)" % (numreads / max(1.0, duration))
+                        print("\t\t\t%d reads per sec (all)" % (numreads / max(1.0, duration)), file=sys.stderr)
                     if RNA:
-                        print >> sys.stderr, "\t\tjunc. sensitivity %d / %d (%.2f%%), junc. accuracy: %d / %d (%.2f%%)" % \
+                        print("\t\tjunc. sensitivity %d / %d (%.2f%%), junc. accuracy: %d / %d (%.2f%%)" % \
                             (temp_gtf_junctions, len(junctions), float(temp_gtf_junctions) * 100.0 / max(1, len(junctions)), \
-                                 temp_gtf_junctions, temp_junctions, float(temp_gtf_junctions) * 100.0 / max(1, temp_junctions))
+                                 temp_gtf_junctions, temp_junctions, float(temp_gtf_junctions) * 100.0 / max(1, temp_junctions)), file=sys.stderr)
 
-                    print >> sys.stderr, "\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024)
+                    print("\t\t\tMemory Usage: %dMB" % (int(mem_usage) / 1024), file=sys.stderr)
 
                     if duration > 0.0:
                         if sql_write and os.path.exists("../" + sql_db_name):
@@ -2669,14 +2669,14 @@
 
                 os.chdir("..")
 
-    print >> sys.stdout, "\t".join(["type", "aligner", "all", "all_time", "mem", "mapped", "unique_mapped", "unmapped", "mapping point", "snp_mapped", "snp_unique_mapped", "snp_unmapped", "true_gtf_junctions", "temp_junctions", "temp_gtf_junctions"])
+    print("\t".join(["type", "aligner", "all", "all_time", "mem", "mapped", "unique_mapped", "unmapped", "mapping point", "snp_mapped", "snp_unique_mapped", "snp_unmapped", "true_gtf_junctions", "temp_junctions", "temp_gtf_junctions"]), file=sys.stdout)
     for line in align_stat:
         outstr = ""
         for item in line:
             if outstr != "":
                 outstr += "\t"
             outstr += str(item)
-        print >> sys.stdout, outstr
+        print(outstr, file=sys.stdout)
 
     if os.path.exists(sql_db_name):
         write_analysis_data(sql_db_name, genome, data_base)
--- hisat2.orig/evaluation/simulation/init.py
+++ hisat2/evaluation/simulation/init.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 import string, re
@@ -50,7 +50,7 @@
 
                     if p_str not in pair_reported:
                         pair_reported.add(p_str)
-                        print >> out_file, p_str
+                        print(p_str, file=out_file)
 
         if not me in read_dic:
             read_dic[me] = []
@@ -83,8 +83,7 @@
             elif field.startswith("Zs"):
                 Zs = "\t" + field
 
-        print >> (sim1_file if left_read else sim2_file), \
-            "%s\t%s\t%s\t%s%s%s%s" % (read_id, chr, pos, cigar, TI, NM, Zs)
+        print("%s\t%s\t%s\t%s%s%s%s" % (read_id, chr, pos, cigar, TI, NM, Zs), file=(sim1_file if left_read else sim2_file))
 
     sim1_file.close()
     sim2_file.close()
@@ -194,7 +193,7 @@
                             break
                     assert left_anchor > 0
                     right_anchor = 0
-                    for ci in reversed(range(len(cigars))):
+                    for ci in reversed(list(range(len(cigars)))):
                         c = cigars[ci][-1]
                         c_len = int(cigars[ci][:-1])
                         if c in "MI":
@@ -262,7 +261,7 @@
 
                 if readtype == "all" or readtype == readtype2:
                     read_ids.add(read_id)
-                    print >> type_sam_file, line[:-1]
+                    print(line[:-1], file=type_sam_file)
                     junctions += get_junctions(chr, int(pos), cigar)
                     if paired:
                         junctions += get_junctions(chr2, int(pos2), cigar2)
@@ -285,7 +284,7 @@
             # write the junctions into a file
             type_junction_file = open(type_junction_fname, "w")
             for junction in junctions:
-                print >> type_junction_file, "%s\t%d\t%d" % (junction[0], junction[1], junction[2])
+                print("%s\t%d\t%d" % (junction[0], junction[1], junction[2]), file=type_junction_file)
             type_junction_file.close()
 
             def write_reads(read_fname, type_read_fname):
@@ -299,7 +298,7 @@
                         write = read_id in read_ids
 
                     if write:
-                        print >> type_read_file, line[:-1]
+                        print(line[:-1], file=type_read_file)
 
                 read_file.close()
                 type_read_file.close()
@@ -323,7 +322,7 @@
                 not os.path.exists(read_dir_base + read_dir + "/sim_2.fa"):
             continue
 
-        print >> sys.stderr, "Processing", read_dir, "..."
+        print("Processing", read_dir, "...", file=sys.stderr)
 
         os.mkdir(read_dir)
         os.chdir(read_dir)
--- hisat2.orig/evaluation/tests/HLA_novel/hisatgenotype_locus_prev.py
+++ hisat2/evaluation/tests/HLA_novel/hisatgenotype_locus_prev.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
 #
@@ -79,7 +79,7 @@
         aligner_cmd += ["-1", "%s" % read_fname[0],
                         "-2", "%s" % read_fname[1]]
     if verbose >= 1:
-        print >> sys.stderr, ' '.join(aligner_cmd)
+        print(' '.join(aligner_cmd), file=sys.stderr)
     align_proc = subprocess.Popen(aligner_cmd,
                                   stdout=subprocess.PIPE,
                                   stderr=open("/dev/null", 'w'))
@@ -116,7 +116,7 @@
 """ 
 def normalize(prob):
     total = sum(prob.values())
-    for allele, mass in prob.items():
+    for allele, mass in list(prob.items()):
         prob[allele] = mass / total
 
         
@@ -124,7 +124,7 @@
 """
 def prob_diff(prob1, prob2):
     diff = 0.0
-    for allele in prob1.keys():
+    for allele in list(prob1.keys()):
         if allele in prob2:
             diff += abs(prob1[allele] - prob2[allele])
         else:
@@ -153,15 +153,15 @@
                      Gene_length):
     def normalize2(prob, length):
         total = 0
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             total += (mass / length[allele])
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             prob[allele] = mass / length[allele] / total
 
     Gene_prob, Gene_prob_next = {}, {}
-    for cmpt, count in Gene_cmpt.items():
+    for cmpt, count in list(Gene_cmpt.items()):
         alleles = cmpt.split('-')
         for allele in alleles:
             if allele not in Gene_prob:
@@ -171,7 +171,7 @@
     normalize(Gene_prob)
     def next_prob(Gene_cmpt, Gene_prob, Gene_length):
         Gene_prob_next = {}
-        for cmpt, count in Gene_cmpt.items():
+        for cmpt, count in list(Gene_cmpt.items()):
             alleles = cmpt.split('-')
             alleles_prob = 0.0
             for allele in alleles:
@@ -201,14 +201,14 @@
             Gene_prob_next2 = next_prob(Gene_cmpt, Gene_prob_next, Gene_length)
             sum_squared_r, sum_squared_v = 0.0, 0.0
             p_r, p_v = {}, {}
-            for a in Gene_prob.keys():
+            for a in list(Gene_prob.keys()):
                 p_r[a] = Gene_prob_next[a] - Gene_prob[a]
                 sum_squared_r += (p_r[a] * p_r[a])
                 p_v[a] = Gene_prob_next2[a] - Gene_prob_next[a] - p_r[a]
                 sum_squared_v += (p_v[a] * p_v[a])
             if sum_squared_v > 0.0:
                 gamma = -math.sqrt(sum_squared_r / sum_squared_v)
-                for a in Gene_prob.keys():
+                for a in list(Gene_prob.keys()):
                     Gene_prob_next2[a] = max(0.0, Gene_prob[a] - 2 * gamma * p_r[a] + gamma * gamma * p_v[a]);
                 Gene_prob_next = next_prob(Gene_cmpt, Gene_prob_next2, Gene_length)
 
@@ -219,17 +219,17 @@
         if iter >= 10:
             Gene_prob2 = {}
             avg_prob = sum(Gene_prob.values()) / len(Gene_prob)
-            for allele, prob in Gene_prob.items():
+            for allele, prob in list(Gene_prob.items()):
                 if prob >= 0.005 or prob > avg_prob:
                     Gene_prob2[allele] = prob
             Gene_prob = Gene_prob2
 
         # DK - debugging purposes
         if iter % 10 == 0 and False:
-            print "iter", iter
-            for allele, prob in Gene_prob.items():
+            print("iter", iter)
+            for allele, prob in list(Gene_prob.items()):
                 if prob >= 0.01:
-                    print >> sys.stderr, "\t", iter, allele, prob, str(datetime.now())
+                    print("\t", iter, allele, prob, str(datetime.now()), file=sys.stderr)
         
         iter += 1
         
@@ -241,7 +241,7 @@
     
     # normalize(Gene_prob)
     normalize2(Gene_prob, Gene_length)
-    Gene_prob = [[allele, prob] for allele, prob in Gene_prob.items()]
+    Gene_prob = [[allele, prob] for allele, prob in list(Gene_prob.items())]
     Gene_prob = sorted(Gene_prob, cmp=Gene_prob_cmp)
     return Gene_prob
 
@@ -288,7 +288,7 @@
 """
 def get_exonic_vars(Vars, exons):
     vars = set()
-    for var_id, var in Vars.items():
+    for var_id, var in list(Vars.items()):
         var_type, var_left, var_data = var
         var_right = var_left
         if var_type == "deletion":
@@ -305,7 +305,7 @@
 """
 def get_rep_alleles(Links, exon_vars):
     allele_vars = {}
-    for var, alleles in Links.items():
+    for var, alleles in list(Links.items()):
         if var not in exon_vars:
             continue
         for allele in alleles:
@@ -314,7 +314,7 @@
             allele_vars[allele].add(var)
 
     allele_groups = {}
-    for allele, vars in allele_vars.items():
+    for allele, vars in list(allele_vars.items()):
         vars = '-'.join(vars)
         if vars not in allele_groups:
             allele_groups[vars] = []
@@ -322,7 +322,7 @@
 
     allele_reps = {} # allele representatives
     allele_rep_groups = {} # allele groups by allele representatives
-    for allele_members in allele_groups.values():
+    for allele_members in list(allele_groups.values()):
         assert len(allele_members) > 0
         allele_rep = allele_members[0]
         allele_rep_groups[allele_rep] = allele_members
@@ -402,9 +402,9 @@
                 if j_type == "single" and j_pos == latest_pos - 1:
                     j_cmp_pos = j_pos - del_len + other_del_len
                     if debug:
-                        print Vars[j_id]
-                        print j_pos, ref_seq[j_pos]
-                        print j_cmp_pos, ref_seq[j_cmp_pos]
+                        print(Vars[j_id])
+                        print(j_pos, ref_seq[j_pos])
+                        print(j_cmp_pos, ref_seq[j_cmp_pos])
                     if j_data == ref_seq[j_cmp_pos]:
                         add_alt(Alts, alt_list, var_id, j_id)
                         latest_pos = j_pos
@@ -465,9 +465,9 @@
                     # DK - debugging purposes
                     if debug:
                         pos2_ = latest_pos + 1 + del_len - other_del_len
-                        print "DK: latest_pos:", latest_pos + 1, pos2_
-                        print "DK: var_pos:", var_pos, "del_len:", del_len, "other_del_len:", other_del_len
-                        print "DK:", ref_seq[latest_pos + 1], ref_seq[pos2_]
+                        print("DK: latest_pos:", latest_pos + 1, pos2_)
+                        print("DK: var_pos:", var_pos, "del_len:", del_len, "other_del_len:", other_del_len)
+                        print("DK:", ref_seq[latest_pos + 1], ref_seq[pos2_])
                     
                     latest_pos += 1
                     add_alt(Alts, alt_list, var_id, str(latest_pos))
@@ -479,9 +479,9 @@
                 if j_type == "single" and j_pos == latest_pos + 1:
                     j_cmp_pos = j_pos + del_len - other_del_len
                     if debug:
-                        print Vars[j_id]
-                        print j_pos, ref_seq[j_pos]
-                        print j_cmp_pos, ref_seq[j_cmp_pos]
+                        print(Vars[j_id])
+                        print(j_pos, ref_seq[j_pos])
+                        print(j_cmp_pos, ref_seq[j_cmp_pos])
 
                     if j_data == ref_seq[j_cmp_pos]:
                         add_alt(Alts, alt_list, var_id, j_id)
@@ -542,7 +542,7 @@
             continue
         debug = (var_id == "hv454a")
         if debug:
-            print Vars[var_id]
+            print(Vars[var_id])
 
         alt_list = []
         var_j = lower_bound(Var_list, var_pos + del_len - 1)
@@ -578,7 +578,7 @@
                                debug)
 
         if debug:
-            print "DK :-)"
+            print("DK :-)")
             sys.exit(1)
 
     def assert_print_alts(Alts, dir):
@@ -640,8 +640,8 @@
                         
             return seq, seq_left, seq_right
         
-        for alt_list1, alt_list2 in Alts.items():
-            if verbose >= 2: print >> sys.stderr, "\t", dir, ":", alt_list1, alt_list2
+        for alt_list1, alt_list2 in list(Alts.items()):
+            if verbose >= 2: print("\t", dir, ":", alt_list1, alt_list2, file=sys.stderr)
             out_str = "\t\t"
             alt_list1 = alt_list1.split('-')            
             for i in range(len(alt_list1)):
@@ -664,7 +664,7 @@
                     if j + 1 < len(alt_list3):
                         out_str += ", "
                 out_str += "]"
-            if verbose >= 2: print >> sys.stderr, out_str
+            if verbose >= 2: print(out_str, file=sys.stderr)
 
             for i in range(len(alt_list2)):
                 alt_list3 = alt_list2[i]
@@ -702,9 +702,9 @@
                         else:
                             seq2 += ref_seq[seq2_right:seq2_right+len_diff]
                 if verbose >= 3:
-                    print >> sys.stderr, "\t\t", alt_list1, alt_list3
-                    print >> sys.stderr, "\t\t\t", seq1, seq1_left, seq1_right
-                    print >> sys.stderr, "\t\t\t", seq2, seq2_left, seq2_right
+                    print("\t\t", alt_list1, alt_list3, file=sys.stderr)
+                    print("\t\t\t", seq1, seq1_left, seq1_right, file=sys.stderr)
+                    print("\t\t\t", seq2, seq2_left, seq2_right, file=sys.stderr)
                 assert seq1 == seq2            
             
     assert_print_alts(Alts_left, "left")
@@ -739,7 +739,7 @@
             # Left direction
             id_str = var_id
             total_del_len = length if type == "deletion" else 0
-            for j in reversed(range(0, i)):
+            for j in reversed(list(range(0, i))):
                 cmp_j = cmp_list[j]
                 j_type, j_pos, j_len = cmp_j[:3]
                 if j_type != "match":
@@ -792,16 +792,16 @@
                             alt_left_pos += alt_total_del_len
                         if left_pos >= alt_left_pos:
                             if verbose >= 2:
-                                print "LEFT:", cmp_list
-                                print "\t", type, "id_str:", id_str, "=>", alts_id_str, "=>", back_alts, "left_pos:", left_pos, "alt_left_pos:", alt_left_pos
+                                print("LEFT:", cmp_list)
+                                print("\t", type, "id_str:", id_str, "=>", alts_id_str, "=>", back_alts, "left_pos:", left_pos, "alt_left_pos:", alt_left_pos)
                             cmp_left = i + 1
                             break
 
             # DK - debugging purposes
             if debug:
-                print "DK: var_id:", var_id
-                print "DK: cmp_list:", cmp_list
-                print "DK: cmp_right:", cmp_right
+                print("DK: var_id:", var_id)
+                print("DK: cmp_list:", cmp_list)
+                print("DK: cmp_right:", cmp_right)
                 # sys.exit(1)
     
             # Right direction
@@ -824,7 +824,7 @@
 
                 # DK - debugging purposes
                 if debug:
-                    print "DK: id_str:", id_str
+                    print("DK: id_str:", id_str)
                 
                 if id_str in Alts_right:
                     orig_alts = id_str.split('-')
@@ -877,8 +877,8 @@
                                     
                         if right_pos <= alt_right_pos:
                             if verbose >= 2:
-                                print "RIGHT:", cmp_list
-                                print "\t", type, "id_str:", id_str, "=>", alts_id_str, "right_pos:", right_pos, "alt_right_pos:", alt_right_pos
+                                print("RIGHT:", cmp_list)
+                                print("\t", type, "id_str:", id_str, "=>", alts_id_str, "right_pos:", right_pos, "alt_right_pos:", alt_right_pos)
                             cmp_right = i - 1
                             break
         i += 1
@@ -971,7 +971,7 @@
         num_nt = sum(nt_dic.values())
         nt_set = []
         if num_nt >= 20:
-            for nt, count in nt_dic.items():
+            for nt, count in list(nt_dic.items()):
                 if nt not in "ACGT":
                     continue
                 if count >= num_nt * 0.2 or count >= 7:
@@ -980,7 +980,7 @@
 
     # Sort variants
     var_list = [[] for i in range(len(mpileup))]
-    for var_id, value in vars.items():
+    for var_id, value in list(vars.items()):
         var_type, var_pos, var_data = value
         assert var_pos < len(var_list)
         var_list[var_pos].append([var_id, var_type, var_data])
@@ -991,7 +991,7 @@
         nt_dic = mpileup[i][1]
         ref_nt = ref_seq[i]
         new_nt_dic = {}
-        for nt, count in nt_dic.items():
+        for nt, count in list(nt_dic.items()):
             var_id = ""
             if nt == 'D':
                 if i <= skip_i:
@@ -1041,8 +1041,8 @@
                   cmp_list,
                   debug = False):
     if debug:
-        print cmp_list
-        print read_seq
+        print(cmp_list)
+        print(read_seq)
 
     i = 0
     while i < len(cmp_list):
@@ -1089,7 +1089,7 @@
             nt_set = mpileup[left][0]
 
             if debug:
-                print left, read_bp, ref_bp, mpileup[left]
+                print(left, read_bp, ref_bp, mpileup[left])
 
             if len(nt_set) > 0 and read_bp not in nt_set:
                 read_bp = 'N' if len(nt_set) > 1 else nt_set[0]
@@ -1113,8 +1113,8 @@
                         var_idx += 1
 
                 if debug:
-                    print left, read_bp, ref_bp, mpileup[left]
-                    print cmp_list[i]
+                    print(left, read_bp, ref_bp, mpileup[left])
+                    print(cmp_list[i])
 
         read_pos += length
         i += 1
@@ -1132,8 +1132,8 @@
         i += 1
 
     if debug:
-        print cmp_list
-        print read_seq
+        print(cmp_list)
+        print(read_seq)
                             
     return cmp_list, read_seq
 
@@ -1267,9 +1267,9 @@
     for aligner, index_type in aligners:
         for f_ in [sys.stderr, report_file]:
             if index_type == "graph":
-                print >> f_, "\n\t\t%s %s" % (aligner, index_type)
+                print("\n\t\t%s %s" % (aligner, index_type), file=f_)
             else:
-                print >> f_, "\n\t\t%s %s" % (aligner, index_type)
+                print("\n\t\t%s %s" % (aligner, index_type), file=f_)
 
         remove_alignment_file = False
         if alignment_fname == "":
@@ -1364,7 +1364,7 @@
 
             # List of nodes that represent alleles
             allele_vars = {}
-            for var_id, allele_list in Links.items():
+            for var_id, allele_list in list(Links.items()):
                 for allele_id in allele_list:
                     if allele_id not in Genes[gene]:
                         continue
@@ -1480,8 +1480,8 @@
                     # Unalined?
                     if flag & 0x4 != 0:
                         if simulation and verbose >= 2:
-                            print "Unaligned"
-                            print "\t", line
+                            print("Unaligned")
+                            print("\t", line)
                         continue
 
                     # Concordantly mapped?
@@ -1686,7 +1686,7 @@
                             # Check if this deletion is artificial alignment
                             assert right_pos < mpileup
                             del_count, nt_count = 0, 0
-                            for nt, value in mpileup[right_pos][1].items():
+                            for nt, value in list(mpileup[right_pos][1].items()):
                                 count = value[0]
                                 if nt == 'D':
                                     del_count += count
@@ -1775,7 +1775,7 @@
                     def add_stat(Gene_cmpt, Gene_counts, Gene_count_per_read, include_alleles = set()):
                         max_count = max(Gene_count_per_read.values())
                         cur_cmpt = set()
-                        for allele, count in Gene_count_per_read.items():
+                        for allele, count in list(Gene_count_per_read.items()):
                             if count < max_count:
                                 continue
 
@@ -1796,7 +1796,7 @@
                         # alleles = ["A*24:36N", "A*24:359N"]
                         allele1_found, allele2_found = False, False
                         if alleles[0] != "":
-                            for allele, count in Gene_count_per_read.items():
+                            for allele, count in list(Gene_count_per_read.items()):
                                 if count < max_count:
                                     continue
                                 if allele == alleles[0]:
@@ -1804,13 +1804,13 @@
                                 elif allele == alleles[1]:
                                     allele2_found = True
                             if allele1_found != allele2_found:
-                                print alleles[0], Gene_count_per_read[alleles[0]]
-                                print alleles[1], Gene_count_per_read[alleles[1]]
+                                print(alleles[0], Gene_count_per_read[alleles[0]])
+                                print(alleles[1], Gene_count_per_read[alleles[1]])
                                 if allele1_found:
-                                    print ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, Gene_count_per_read[alleles[1]]))
+                                    print(("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, Gene_count_per_read[alleles[1]])))
                                 else:
-                                    print ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, Gene_count_per_read[alleles[0]]))
-                                print read_seq
+                                    print(("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, Gene_count_per_read[alleles[0]])))
+                                print(read_seq)
 
                         cur_cmpt = sorted(list(cur_cmpt))
                         cur_cmpt = '-'.join(cur_cmpt)
@@ -1832,7 +1832,7 @@
                             if debug_read_count == debug_max_read_count and \
                                Gene_count_per_read["A*24:02:01:02L"] < debug_max_read_count and \
                                Gene_count_per_read["A*01:01:01:01"] < debug_max_read_count:
-                                print prev_read_id
+                                print(prev_read_id)
                                 None
 
                             if prev_read_id == "HSQ1008:175:C0JVFACXX:7:1208:5604:41201":
@@ -1861,9 +1861,9 @@
                                base_fname == "hla":
                                 cur_cmpt = cur_cmpt.split('-')
                                 if not(set(cur_cmpt) & set(test_Gene_names)):
-                                    print "%s are chosen instead of %s" % ('-'.join(cur_cmpt), '-'.join(test_Gene_names))
+                                    print("%s are chosen instead of %s" % ('-'.join(cur_cmpt), '-'.join(test_Gene_names)))
                                     for prev_line in prev_lines:
-                                        print "\t", prev_line
+                                        print("\t", prev_line)
 
                             prev_lines = []
 
@@ -1880,18 +1880,18 @@
                         alleles = Links[var_id]
                         if verbose >= 2:
                             if add > 0 and not (set(alleles) & debug_allele_names):
-                                print "Add:", add, debug_allele_names, "-", var_id
-                                print "\t", line
-                                print "\t", alleles
+                                print("Add:", add, debug_allele_names, "-", var_id)
+                                print("\t", line)
+                                print("\t", alleles)
                             if add < 0 and set(alleles) & debug_allele_names:
-                                print "Add:", add, debug_allele_names, "-", var_id
-                                print "\t", line
+                                print("Add:", add, debug_allele_names, "-", var_id)
+                                print("\t", line)
 
                         for allele in alleles:
                             count_per_read[allele] += add
 
                     # Decide which allele(s) a read most likely came from
-                    for var_id, data in gene_vars.items():
+                    for var_id, data in list(gene_vars.items()):
                         if var_id == "unknown" or var_id.startswith("nv"):
                             continue
                         var_type, var_pos, var_data = data
@@ -1924,14 +1924,14 @@
                     DK_debug = False
                     if orig_read_id == "a46|L_451_88M12D12M_88|D|hv2":
                         DK_debug = True
-                        print line
-                        print cmp_list
-                        print "positive vars:", positive_vars
-                        print "negative vars:", negative_vars
-                        print "cmp_list[%d, %d]" % (cmp_list_left, cmp_list_right)
+                        print(line)
+                        print(cmp_list)
+                        print("positive vars:", positive_vars)
+                        print("negative vars:", negative_vars)
+                        print("cmp_list[%d, %d]" % (cmp_list_left, cmp_list_right))
 
                     # Deletions at 5' and 3' ends
-                    for var_id, data in gene_vars.items():
+                    for var_id, data in list(gene_vars.items()):
                         var_type, var_pos, var_data = data
                         if var_type != "deletion":
                             continue
@@ -2002,8 +2002,8 @@
                                     positive_vars.add(var_id)
 
                                     if read_id == "HSQ1008:175:C0JVFACXX:6:2207:13481:60924" and False:
-                                        print "add positive var:", var_id
-                                        print "\tcmp_list:", cmp_list_left, cmp_list_right, cmp_list
+                                        print("add positive var:", var_id)
+                                        print("\tcmp_list:", cmp_list_left, cmp_list_right, cmp_list)
 
                             
                             cmp_MD += ("%d%s" % (MD_match_len, ref_seq[ref_pos]))
@@ -2069,15 +2069,15 @@
                     if read_pos != len(read_seq) or \
                             cmp_cigar_str != cigar_str:
                             # cmp_MD != MD: # Disabled due to error correction
-                        print >> sys.stderr, "Error:", cigar_str, MD
-                        print >> sys.stderr, "\tcomputed:", cmp_cigar_str, cmp_MD
-                        print >> sys.stderr, "\tcmp list:", cmp_list
+                        print("Error:", cigar_str, MD, file=sys.stderr)
+                        print("\tcomputed:", cmp_cigar_str, cmp_MD, file=sys.stderr)
+                        print("\tcmp list:", cmp_list, file=sys.stderr)
                         assert False
 
                     # DK - debugging purposes
                     if DK_debug:
-                        print "positive:", positive_vars
-                        print "negative:", negative_vars
+                        print("positive:", positive_vars)
+                        print("negative:", negative_vars)
 
                     # Node
                     if assembly:
@@ -2100,8 +2100,8 @@
                         add_count(Gene_gen_count_per_read, positive_var, 1)
 
                     if read_id == "HSQ1008:175:C0JVFACXX:6:2207:13481:60924" and False:
-                        print "positive_vars:", positive_vars
-                        print "negative_vars:", negative_vars
+                        print("positive_vars:", positive_vars)
+                        print("negative_vars:", negative_vars)
 
 
                     for negative_var in negative_vars:
@@ -2118,7 +2118,7 @@
                     continue
 
                 for f_ in [sys.stderr, report_file]:
-                    print >> f_, "\t\t\tNumber of reads aligned: %d" % num_reads
+                    print("\t\t\tNumber of reads aligned: %d" % num_reads, file=f_)
 
                 if prev_read_id != None:
                     if base_fname == "hla":
@@ -2182,7 +2182,7 @@
 
             if base_fname != "hla":
                 Gene_counts = Gene_gen_counts
-            Gene_counts = [[allele, count] for allele, count in Gene_counts.items()]
+            Gene_counts = [[allele, count] for allele, count in list(Gene_counts.items())]
             def Gene_count_cmp(a, b):
                 if a[1] != b[1]:
                     return b[1] - a[1]
@@ -2199,7 +2199,7 @@
                     for test_Gene_name in test_Gene_names:
                         if count[0] == test_Gene_name:
                             for f_ in [sys.stderr, report_file]:
-                                print >> f_, "\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_Gene_name, count[1])
+                                print("\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_Gene_name, count[1]), file=f_)
                             found = True
                             """
                             if count_i > 0 and Gene_counts[0][1] > count[1]:
@@ -2210,14 +2210,14 @@
                             """
                     if count_i < 5 and not found:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                            print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=f_)
                 else:
                     for f_ in [sys.stderr, report_file]:
-                        print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                        print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=f_)
                     if count_i >= 9:
                         break
             for f_ in [sys.stderr, report_file]:
-                print >> f_
+                print(file=f_)
 
             # Calculate the abundance of representative alleles on exonic sequences
             if base_fname == "hla":
@@ -2238,7 +2238,7 @@
 
                 if len(gen_alleles) > 0:
                     Gene_gen_cmpt2 = {}
-                    for cmpt, value in Gene_gen_cmpt.items():
+                    for cmpt, value in list(Gene_gen_cmpt.items()):
                         cmpt2 = []
                         for allele in cmpt.split('-'):
                             if allele in gen_alleles:
@@ -2262,7 +2262,7 @@
                             Gene_combined_prob[allele] = prob
                     for allele, prob in Gene_gen_prob:
                         Gene_combined_prob[allele] = prob * gen_prob_sum
-                    Gene_prob = [[allele, prob] for allele, prob in Gene_combined_prob.items()]
+                    Gene_prob = [[allele, prob] for allele, prob in list(Gene_combined_prob.items())]
                     Gene_prob = sorted(Gene_prob, cmp=Gene_prob_cmp)
             else:
                 Gene_prob = single_abundance(Gene_gen_cmpt, Gene_lengths[gene])
@@ -2302,8 +2302,8 @@
                                                             ref_allele)
 
                     def get_best_alleles(left, right, vars):
-                        max_alleles, max_common = [], -sys.maxint
-                        for allele_name, allele_node in predicted_allele_nodes.items():
+                        max_alleles, max_common = [], -sys.maxsize
+                        for allele_name, allele_node in list(predicted_allele_nodes.items()):
                             tmp_vars = allele_node.get_var_ids(left, right)
                             tmp_common = len(set(vars) & set(tmp_vars))
                             tmp_common -= len(set(vars) | set(tmp_vars))
@@ -2315,13 +2315,13 @@
                         return max_alleles
 
                     for run, plus, minus in run_alignments:
-                        print run
-                        print "\tplus:"
+                        print(run)
+                        print("\tplus:")
                         for left, right, vars in plus:
-                            print "\t\t", left, right, vars, get_best_alleles(left, right, vars)
-                        print "\tminus:"
+                            print("\t\t", left, right, vars, get_best_alleles(left, right, vars))
+                        print("\tminus:")
                         for left, right, vars in minus:
-                            print "\t\t", left, right, vars, get_best_alleles(left, right, vars)
+                            print("\t\t", left, right, vars, get_best_alleles(left, right, vars))
                             
                     assert False
 
@@ -2342,7 +2342,7 @@
                 # Compare two alleles
                 if simulation and len(test_Gene_names) == 2:
                     allele_name1, allele_name2 = test_Gene_names
-                    print >> sys.stderr, allele_name1, "vs.", allele_name2
+                    print(allele_name1, "vs.", allele_name2, file=sys.stderr)
                     asm_graph.print_node_comparison(asm_graph.true_allele_nodes)
 
                 def compare_alleles(vars1, vars2, print_output = True):
@@ -2374,8 +2374,8 @@
                             skip = False
                             if print_output:
                                 if cmp_var_in_exon:
-                                    print >> sys.stderr, "\033[94mexon%d\033[00m" % (exon_i + 1),
-                                print >> sys.stderr, cmp_var_id, cmp_var, "\t\t\t", mpileup[cmp_var[1]]
+                                    print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=sys.stderr)
+                                print(cmp_var_id, cmp_var, "\t\t\t", mpileup[cmp_var[1]], file=sys.stderr)
                             var_i += 1; var_j += 1
                             continue
                         if cmp_var[1] <= node_var[1]:
@@ -2384,42 +2384,42 @@
                                     if print_output:
                                         if cmp_var_in_exon:
                                             for f_ in [sys.stderr, report_file]:
-                                                print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1),
+                                                print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=f_)
                                         for f_ in [sys.stderr, report_file]:
-                                            print >> f_, "***", cmp_var_id, cmp_var, "==", "\t\t\t", mpileup[cmp_var[1]]
+                                            print("***", cmp_var_id, cmp_var, "==", "\t\t\t", mpileup[cmp_var[1]], file=f_)
                                     mismatches += 1
                             var_i += 1
                         else:
                             if print_output:
                                 if node_var_in_exon:
                                     for f_ in [sys.stderr, report_file]:
-                                        print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1),
+                                        print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=f_)
                                 for f_ in [sys.stderr, report_file]:
-                                    print >> f_, "*** ==", node_var_id, node_var, "\t\t\t", mpileup[node_var[1]]
+                                    print("*** ==", node_var_id, node_var, "\t\t\t", mpileup[node_var[1]], file=f_)
                             mismatches += 1
                             var_j += 1
                             
                     return mismatches
                     
                 tmp_nodes = asm_graph.nodes
-                print >> sys.stderr, "Number of tmp nodes:", len(tmp_nodes)
+                print("Number of tmp nodes:", len(tmp_nodes), file=sys.stderr)
                 count = 0
-                for id, node in tmp_nodes.items():
+                for id, node in list(tmp_nodes.items()):
                     count += 1
                     if count > 10:
                         break
                     node_vars = node.get_var_ids()
-                    node.print_info(); print >> sys.stderr
+                    node.print_info(); print(file=sys.stderr)
                     if node.id in asm_graph.to_node:
                         for id2, at in asm_graph.to_node[node.id]:
-                            print >> sys.stderr, "\tat %d ==> %s" % (at, id2)
+                            print("\tat %d ==> %s" % (at, id2), file=sys.stderr)
 
                     if simulation:
                         cmp_Gene_names = test_Gene_names
                     else:
                         cmp_Gene_names = [allele_name for allele_name, _ in allele_node_order]
                         
-                    alleles, cmp_vars, max_common = [], [], -sys.maxint
+                    alleles, cmp_vars, max_common = [], [], -sys.maxsize
                     for cmp_Gene_name in cmp_Gene_names:
                         tmp_vars = allele_nodes[cmp_Gene_name].get_var_ids(node.left, node.right)
                         tmp_common = len(set(node_vars) & set(tmp_vars))
@@ -2432,19 +2432,19 @@
 
                     for allele_name, cmp_vars in alleles:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "vs.", allele_name
+                            print("vs.", allele_name, file=f_)
                         compare_alleles(cmp_vars, node_vars)
 
-                    print >> sys.stderr
-                    print >> sys.stderr
+                    print(file=sys.stderr)
+                    print(file=sys.stderr)
 
 
             # Identify alleles that perfectly or closesly match assembled alleles
-            for node_name, node in asm_graph.nodes.items():
+            for node_name, node in list(asm_graph.nodes.items()):
                 vars = set(node.get_var_ids())
 
-                max_allele_names, max_common = [], -sys.maxint
-                for allele_name, vars2 in allele_vars.items():
+                max_allele_names, max_common = [], -sys.maxsize
+                for allele_name, vars2 in list(allele_vars.items()):
                     vars2 = set(vars2)
                     tmp_common = len(vars & vars2) - len(vars | vars2)
                     if tmp_common > max_common:
@@ -2454,21 +2454,21 @@
                         max_allele_names.append(allele_name)
 
                 for f_ in [sys.stderr, report_file]:
-                    print >> f_, "Genomic:", node_name
+                    print("Genomic:", node_name, file=f_)
                     node_vars = node.get_var_ids()
-                    min_mismatches = sys.maxint
+                    min_mismatches = sys.maxsize
                     for max_allele_name in max_allele_names:
                         cmp_vars = allele_vars[max_allele_name]
                         cmp_vars = sorted(cmp_vars, cmp=lambda a, b: int(a[2:]) - int(b[2:]))
                         print_output = False
                         tmp_mismatches = compare_alleles(cmp_vars, node_vars, print_output)
-                        print >> f_, "\t\t%s:" % max_allele_name, max_common, tmp_mismatches
+                        print("\t\t%s:" % max_allele_name, max_common, tmp_mismatches, file=f_)
                         if tmp_mismatches < min_mismatches:
                             min_mismatches = tmp_mismatches
                     if min_mismatches > 0:
-                        print >> f_, "Novel allele"
+                        print("Novel allele", file=f_)
                     else:
-                        print >> f_, "Known allele"
+                        print("Known allele", file=f_)
 
             """
             allele_exon_vars = {}
@@ -2520,7 +2520,7 @@
                                 else:
                                     break
                             for f_ in [sys.stderr, report_file]:
-                                print >> f_, "\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_Gene_name, prob[1] * 100.0)
+                                print("\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_Gene_name, prob[1] * 100.0), file=f_)
                             if rank_i < len(success):
                                 success[rank_i] = True
                             found_list[name_i] = True
@@ -2530,15 +2530,15 @@
                         break
                 if not found:
                     for f_ in [sys.stderr, report_file]:
-                        print >> f_, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, _allele_rep, prob[1] * 100.0)
+                        print("\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, _allele_rep, prob[1] * 100.0), file=f_)
                     if best_alleles and prob_i < 2:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "SingleModel %s (abundance: %.2f%%)" % (_allele_rep, prob[1] * 100.0)
+                            print("SingleModel %s (abundance: %.2f%%)" % (_allele_rep, prob[1] * 100.0), file=f_)
                 if not simulation and prob_i >= 9:
                     break
                 if prob_i >= 19:
                     break
-            print >> sys.stderr
+            print(file=sys.stderr)
 
             if simulation and not False in success:
                 aligner_type = "%s %s" % (aligner, index_type)
@@ -2589,7 +2589,7 @@
         Vars[gene][var_id] = [var_type, pos - left, data]
         Var_list[gene].append([pos - left, var_id])
         
-    for gene, in_var_list in Var_list.items():
+    for gene, in_var_list in list(Var_list.items()):
         Var_list[gene] = sorted(in_var_list)
 
     return Vars, Var_list
@@ -2707,7 +2707,7 @@
                    base_fname + ".link"]
     
     if verbose >= 1:
-        print >> sys.stderr, Gene_fnames
+        print(Gene_fnames, file=sys.stderr)
     
     if not typing_common.check_files(Gene_fnames):
         extract_hla_script = os.path.join(ex_path, "hisatgenotype_extract_vars.py")
@@ -2731,12 +2731,12 @@
         # DK - debugging purposes
         # extract_cmd += ["--ext-seq", "300"]
         if verbose >= 1:
-            print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+            print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
         proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
         proc.communicate()
         
         if not typing_common.check_files(Gene_fnames):
-            print >> sys.stderr, "Error: hisatgenotype_extract_vars failed!"
+            print("Error: hisatgenotype_extract_vars failed!", file=sys.stderr)
             sys.exit(1)
 
     for aligner, index_type in aligners:
@@ -2753,11 +2753,11 @@
                                  "%s_backbone.fa" % base_fname,
                                  "%s.graph" % base_fname]
                     if verbose >= 1:
-                        print >> sys.stderr, "\tRunning:", ' '.join(build_cmd)
+                        print("\tRunning:", ' '.join(build_cmd), file=sys.stderr)
                     proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                     proc.communicate()        
                     if not typing_common.check_files(Gene_hisat2_graph_index_fnames):
-                        print >> sys.stderr, "Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?"
+                        print("Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?", file=sys.stderr)
                         sys.exit(1)
             # Build HISAT2 linear indexes based on the above information
             else:
@@ -2771,7 +2771,7 @@
                     proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                     proc.communicate()        
                     if not typing_common.check_files(Gene_hisat2_linear_index_fnames):
-                        print >> sys.stderr, "Error: indexing HLA failed!"
+                        print("Error: indexing HLA failed!", file=sys.stderr)
                         sys.exit(1)
         else:
             assert aligner == "bowtie2" and index_type == "linear"
@@ -2785,7 +2785,7 @@
                 proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'))
                 proc.communicate()        
                 if not typing_common.check_files(Gene_bowtie2_index_fnames):
-                    print >> sys.stderr, "Error: indexing HLA failed!"
+                    print("Error: indexing HLA failed!", file=sys.stderr)
                     sys.exit(1)
 
     # Read partial alleles from hla.data (temporary)
@@ -2815,21 +2815,21 @@
         refGene_loci[Gene_gene] = [Gene_name, chr, left, right, exons]
     Genes = {}
     if len(locus_list) == 0:
-        locus_list = refGene_loci.keys()
+        locus_list = list(refGene_loci.keys())
 
     read_Gene_alleles(base_fname + "_backbone.fa", Genes)
     read_Gene_alleles(base_fname + "_sequences.fa", Genes)
 
     # HLA gene alleles
     Gene_names = {}
-    for Gene_gene, data in Genes.items():
+    for Gene_gene, data in list(Genes.items()):
         Gene_names[Gene_gene] = list(data.keys())
 
     # HLA gene allele lengths
     Gene_lengths = {}
-    for Gene_gene, Gene_alleles in Genes.items():
+    for Gene_gene, Gene_alleles in list(Genes.items()):
         Gene_lengths[Gene_gene] = {}
-        for allele_name, seq in Gene_alleles.items():
+        for allele_name, seq in list(Gene_alleles.items()):
             Gene_lengths[Gene_gene][allele_name] = len(seq)
 
     # Read HLA variants, and link information
@@ -2886,7 +2886,7 @@
                 if str(test_i + 1) not in test_ids:
                     continue
 
-            print >> sys.stderr, "Test %d" % (test_i + 1), str(datetime.now())
+            print("Test %d" % (test_i + 1), str(datetime.now()), file=sys.stderr)
             test_locus_list = test_list[test_i]
             num_frag_list = typing_common.simulate_reads(Genes,
                                                          base_fname,
@@ -2910,7 +2910,7 @@
                     gene = test_Gene_name.split('*')[0]
                     test_Gene_seq = Genes[gene][test_Gene_name]
                     seq_type = "partial" if test_Gene_name in partial_alleles else "full"
-                    print >> sys.stderr, "\t%s - %d bp (%s sequence, %d pairs)" % (test_Gene_name, len(test_Gene_seq), seq_type, num_frag_list_i[j_])
+                    print("\t%s - %d bp (%s sequence, %d pairs)" % (test_Gene_name, len(test_Gene_seq), seq_type, num_frag_list_i[j_]), file=sys.stderr)
 
             if "single-end" in debug_instr:
                 read_fname = ["%s_input_1.fa" % base_fname]
@@ -2948,20 +2948,20 @@
                                      best_alleles,
                                      verbose)
 
-            for aligner_type, passed in tmp_test_passed.items():
+            for aligner_type, passed in list(tmp_test_passed.items()):
                 if aligner_type in test_passed:
                     test_passed[aligner_type] += passed
                 else:
                     test_passed[aligner_type] = passed
 
-                print >> sys.stderr, "\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1)))
+                print("\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1))), file=sys.stderr)
 
 
-        for aligner_type, passed in test_passed.items():
-            print >> sys.stderr, "%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list))
+        for aligner_type, passed in list(test_passed.items()):
+            print("%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list)), file=sys.stderr)
     
     else: # With real reads or BAMs
-        print >> sys.stderr, "\t", ' '.join(locus_list)
+        print("\t", ' '.join(locus_list), file=sys.stderr)
         fastq = True
         typing(ex_path,
                simulation,
@@ -3125,7 +3125,7 @@
     else:
         locus_list = args.locus_list.split(',')
     if args.aligners == "":
-        print >> sys.stderr, "Error: --aligners must be non-empty."
+        print("Error: --aligners must be non-empty.", file=sys.stderr)
         sys.exit(1)    
     args.aligners = args.aligners.split(',')
     for i in range(len(args.aligners)):
@@ -3136,7 +3136,7 @@
         args.read_fname = []
     if args.alignment_fname != "" and \
             not os.path.exists(args.alignment_fname):
-        print >> sys.stderr, "Error: %s doesn't exist." % args.alignment_fname
+        print("Error: %s doesn't exist." % args.alignment_fname, file=sys.stderr)
         sys.exit(1)
 
     if args.verbose and args.verbose_level == 0:
@@ -3154,10 +3154,10 @@
                 debug[item] = 1
 
     if not args.partial:
-        print >> sys.stderr, "Warning: --no-partial will be no longer supported!"
+        print("Warning: --no-partial will be no longer supported!", file=sys.stderr)
 
     if args.read_len * 2 > args.fragment_len:
-        print >> sys.stderr, "Warning: fragment might be too short (%d)" % (args.fragment_len)
+        print("Warning: fragment might be too short (%d)" % (args.fragment_len), file=sys.stderr)
 
     skip_fragment_regions = []
     if args.skip_fragment_regions != "":
@@ -3178,7 +3178,7 @@
     if args.stranded_seq != "":
         stranded_seq = args.stranded_seq.split(',')
         if len(stranded_seq) != 2:
-            print >> sys.stderr, "Error: --stranded-seq is incorrectly specified"
+            print("Error: --stranded-seq is incorrectly specified", file=sys.stderr)
             sys.exit(1)
     else:
         stranded_seq = []
--- hisat2.orig/evaluation/tests/genotype_genome/hisatgenotype_prev.py
+++ hisat2/evaluation/tests/genotype_genome/hisatgenotype_prev.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2016, Daehwan Kim <infphilo@gmail.com>
@@ -51,9 +51,9 @@
         aligner_cmd += ["-1", read_fnames[0],
                         "-2", read_fnames[1]]
 
-    print >> sys.stderr, "Aligning %s to %s ..." % (' '.join(read_fnames), base_fname)
+    print("Aligning %s to %s ..." % (' '.join(read_fnames), base_fname), file=sys.stderr)
     if verbose:
-        print >> sys.stderr, "\t%s" % (' '.join(aligner_cmd))
+        print("\t%s" % (' '.join(aligner_cmd)), file=sys.stderr)
 
     align_proc = subprocess.Popen(aligner_cmd,
                                   stdout=subprocess.PIPE,
@@ -69,7 +69,7 @@
                                    stderr=open("/dev/null", 'w'))
     sambam_proc.communicate()
 
-    print >> sys.stderr, "Sorting %s ..." % "TBD"
+    print("Sorting %s ..." % "TBD", file=sys.stderr)
     bamsort_cmd = ["samtools",
                    "sort",
                    "--threads", str(threads),
@@ -79,7 +79,7 @@
                                     stderr=open("/dev/null", 'w'))
     bamsort_proc.communicate()
 
-    print >> sys.stderr, "Indexing %s ..." % "TBD"
+    print("Indexing %s ..." % "TBD", file=sys.stderr)
 
     bamindex_cmd = ["samtools",
                     "index",
@@ -116,9 +116,9 @@
     # hisat2 graph index files
     genotype_fnames += ["%s.%d.ht2" % (base_fname, i+1) for i in range(8)]
     if not typing_common.check_files(genotype_fnames):
-        print >> sys.stderr, "Error: some of the following files are missing!"
+        print("Error: some of the following files are missing!", file=sys.stderr)
         for fname in genotype_fnames:
-            print >> sys.stderr, "\t%s" % fname
+            print("\t%s" % fname, file=sys.stderr)
         sys.exit(1)
 
     # Align reads, and sort the alignments into a BAM file
@@ -165,11 +165,11 @@
 
     # gene alleles
     allele_names = {}
-    for gene_name in genes.keys():
+    for gene_name in list(genes.keys()):
         if gene_name not in allele_names:
             allele_names[gene_name] = []
         gene_name2 = gene_name.split('-')[1]
-        for allele_name in allele_vars.keys():
+        for allele_name in list(allele_vars.keys()):
             allele_name1 = allele_name.split('*')[0]
             if gene_name2 == allele_name1:
                 allele_names[gene_name].append(allele_name)
@@ -196,7 +196,7 @@
         Vars[gene_name][var_id] = [var_type, pos, data]
         Var_list[gene_name].append([pos, var_id])
 
-    for gene_name, in_var_list in Var_list.items():
+    for gene_name, in_var_list in list(Var_list.items()):
         Var_list[gene_name] = sorted(in_var_list)
     def lower_bound(Var_list, pos):
         low, high = 0, len(Var_list)
@@ -233,7 +233,7 @@
     for test_i in range(len(test_list)):
         test_HLA_list = test_list[test_i]
         for test_HLA_names in test_HLA_list:
-            print >> sys.stderr, "\t%s" % (test_HLA_names)
+            print("\t%s" % (test_HLA_names), file=sys.stderr)
             for gene in test_HLA_names:
                 ref_allele = genes[gene]
                 ref_seq = gene_seqs[gene]
@@ -303,8 +303,8 @@
                     debug = False
                     if read_id in ["2339"] and False:
                         debug = True
-                        print "read_id: %s)" % read_id, pos, cigar_str, "NM:", NM, MD, Zs
-                        print "            ", read_seq
+                        print("read_id: %s)" % read_id, pos, cigar_str, "NM:", NM, MD, Zs)
+                        print("            ", read_seq)
 
                     vars = []
                     if Zs:
@@ -394,7 +394,7 @@
                     def add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read, exon = True):
                         max_count = max(HLA_count_per_read.values())
                         cur_cmpt = set()
-                        for allele, count in HLA_count_per_read.items():
+                        for allele, count in list(HLA_count_per_read.items()):
                             if count < max_count:
                                 continue
                             """
@@ -414,7 +414,7 @@
                         alleles = ["", ""]
                         # alleles = ["B*40:304", "B*40:02:01"]
                         allele1_found, allele2_found = False, False
-                        for allele, count in HLA_count_per_read.items():
+                        for allele, count in list(HLA_count_per_read.items()):
                             if count < max_count:
                                 continue
                             if allele == alleles[0]:
@@ -422,13 +422,13 @@
                             elif allele == alleles[1]:
                                 allele2_found = True
                         if allele1_found != allele2_found:
-                            print alleles[0], HLA_count_per_read[alleles[0]]
-                            print alleles[1], HLA_count_per_read[alleles[1]]
+                            print(alleles[0], HLA_count_per_read[alleles[0]])
+                            print(alleles[1], HLA_count_per_read[alleles[1]])
                             if allele1_found:
-                                print ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, HLA_count_per_read[alleles[1]]))
+                                print(("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, HLA_count_per_read[alleles[1]])))
                             else:
-                                print ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, HLA_count_per_read[alleles[0]]))
-                            print read_seq
+                                print(("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, HLA_count_per_read[alleles[0]])))
+                            print(read_seq)
 
                         cur_cmpt = sorted(list(cur_cmpt))
                         cur_cmpt = '-'.join(cur_cmpt)
@@ -462,11 +462,11 @@
                             # daehwan - for debugging purposes
                             if debug:
                                 if allele in ["DQA1*05:05:01:01", "DQA1*05:05:01:02"]:
-                                    print allele, add, var_id
+                                    print(allele, add, var_id)
 
                     # Decide which allele(s) a read most likely came from
                     # also sanity check - read length, cigar string, and MD string
-                    for var_id, data in Vars[gene].items():
+                    for var_id, data in list(Vars[gene].items()):
                         var_type, var_pos, var_data = data
                         if var_type != "deletion":
                             continue
@@ -490,13 +490,13 @@
                                             add_count(var_id, -1)
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, Links[var_id]
+                                                print(cmp, var_id, Links[var_id])
                                     elif var_type == "deletion":
                                         del_len = int(var_data)
                                         if ref_pos < var_pos and ref_pos + length > var_pos + del_len:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, Links[var_id], -1, Vars[gene][var_id]
+                                                print(cmp, var_id, Links[var_id], -1, Vars[gene][var_id])
                                             # Check if this might be one of the two tandem repeats (the same left coordinate)
                                             cmp_left, cmp_right = cmp[1], cmp[1] + cmp[2]
                                             test1_seq1 = ref_seq[cmp_left-base_locus:cmp_right-base_locus]
@@ -510,7 +510,7 @@
                                                 add_count(var_id, -1)
                                     else:
                                         if debug:
-                                            print cmp, var_id, Links[var_id], -1
+                                            print(cmp, var_id, Links[var_id], -1)
                                         add_count(var_id, -1)
                                 var_idx += 1
 
@@ -531,7 +531,7 @@
                                         if var_data == read_base:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, 1, var_data, read_base, Links[var_id]
+                                                print(cmp, var_id, 1, var_data, read_base, Links[var_id])
 
                                             # daehwan - for debugging purposes
                                             if False:
@@ -553,8 +553,8 @@
                             var_idx = lower_bound(Var_list[gene], ref_pos)
                             # daehwan - for debugging purposes
                             if debug:
-                                print left_pos, cigar_str, MD, vars
-                                print ref_pos, ins_seq, Var_list[gene][var_idx], Vars[gene][Var_list[gene][var_idx][1]]
+                                print(left_pos, cigar_str, MD, vars)
+                                print(ref_pos, ins_seq, Var_list[gene][var_idx], Vars[gene][Var_list[gene][var_idx][1]])
                                 # sys.exit(1)
                             while var_idx < len(Var_list[gene]):
                                 var_pos, var_id = Var_list[gene][var_idx]
@@ -566,7 +566,7 @@
                                         if var_data == ins_seq:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, 1, Links[var_id]
+                                                print(cmp, var_id, 1, Links[var_id])
                                             add_count(var_id, 1)
                                 var_idx += 1
 
@@ -602,8 +602,8 @@
                                         var_len = int(var_data)
                                         if var_len == length:
                                             if debug:
-                                                print cmp, var_id, 1, Links[var_id]
-                                                print ref_seq[var_pos - 10-base_locus:var_pos-base_locus], ref_seq[var_pos-base_locus:var_pos+int(var_data)-base_locus], ref_seq[var_pos+int(var_data)-base_locus:var_pos+int(var_data)+10-base_locus]
+                                                print(cmp, var_id, 1, Links[var_id])
+                                                print(ref_seq[var_pos - 10-base_locus:var_pos-base_locus], ref_seq[var_pos-base_locus:var_pos+int(var_data)-base_locus], ref_seq[var_pos+int(var_data)-base_locus:var_pos+int(var_data)+10-base_locus])
                                             add_count(var_id, 1)
                                 var_idx += 1
 
@@ -634,9 +634,9 @@
                     if read_pos != len(read_seq) or \
                             cmp_cigar_str != cigar_str or \
                             cmp_MD != MD:
-                        print >> sys.stderr, "Error:", cigar_str, MD
-                        print >> sys.stderr, "\tcomputed:", cmp_cigar_str, cmp_MD
-                        print >> sys.stderr, "\tcmp list:", cmp_list
+                        print("Error:", cigar_str, MD, file=sys.stderr)
+                        print("\tcomputed:", cmp_cigar_str, cmp_MD, file=sys.stderr)
+                        print("\tcmp list:", cmp_list, file=sys.stderr)
                         assert False            
 
                     prev_read_id = read_id
@@ -648,7 +648,7 @@
                 if prev_read_id != None:
                     add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read)
 
-                HLA_counts = [[allele, count] for allele, count in HLA_counts.items()]
+                HLA_counts = [[allele, count] for allele, count in list(HLA_counts.items())]
                 def HLA_count_cmp(a, b):
                     if a[1] != b[1]:
                         return b[1] - a[1]
@@ -660,28 +660,28 @@
                 HLA_counts = sorted(HLA_counts, cmp=HLA_count_cmp)
                 for count_i in range(len(HLA_counts)):
                     count = HLA_counts[count_i]
-                    print >> sys.stderr, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                    print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=sys.stderr)
                     if count_i >= 9:
                         break
-                print >> sys.stderr
+                print(file=sys.stderr)
 
                 def normalize(prob):
                     total = sum(prob.values())
-                    for allele, mass in prob.items():
+                    for allele, mass in list(prob.items()):
                         prob[allele] = mass / total
 
                 def normalize2(prob, length):
                     total = 0
-                    for allele, mass in prob.items():
+                    for allele, mass in list(prob.items()):
                         assert allele in length
                         total += (mass / length[allele])
-                    for allele, mass in prob.items():
+                    for allele, mass in list(prob.items()):
                         assert allele in length
                         prob[allele] = mass / length[allele] / total
 
                 def prob_diff(prob1, prob2):
                     diff = 0.0
-                    for allele in prob1.keys():
+                    for allele in list(prob1.keys()):
                         if allele in prob2:
                             diff += abs(prob1[allele] - prob2[allele])
                         else:
@@ -701,7 +701,7 @@
                         return 1
 
                 HLA_prob, HLA_prob_next = {}, {}
-                for cmpt, count in HLA_cmpt.items():
+                for cmpt, count in list(HLA_cmpt.items()):
                     alleles = cmpt.split('-')
                     for allele in alleles:
                         if allele not in HLA_prob:
@@ -718,7 +718,7 @@
                 normalize(HLA_prob)
                 def next_prob(HLA_cmpt, HLA_prob, HLA_length):
                     HLA_prob_next = {}
-                    for cmpt, count in HLA_cmpt.items():
+                    for cmpt, count in list(HLA_cmpt.items()):
                         alleles = cmpt.split('-')
                         alleles_prob = 0.0
                         for allele in alleles:
@@ -745,17 +745,17 @@
                     HLA_prob[allele] /= float(allele_len)
                 normalize(HLA_prob)
                 """
-                HLA_prob = [[allele, prob] for allele, prob in HLA_prob.items()]
+                HLA_prob = [[allele, prob] for allele, prob in list(HLA_prob.items())]
 
                 HLA_prob = sorted(HLA_prob, cmp=HLA_prob_cmp)
                 success = [False for i in range(len(test_HLA_names))]
                 found_list = [False for i in range(len(test_HLA_names))]
                 for prob_i in range(len(HLA_prob)):
                     prob = HLA_prob[prob_i]
-                    print >> sys.stderr, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, prob[0], prob[1] * 100.0)
+                    print("\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, prob[0], prob[1] * 100.0), file=sys.stderr)
                     if prob_i >= 9:
                         break
-                print >> sys.stderr
+                print(file=sys.stderr)
 
                 """
                 if len(test_HLA_names) == 2:
@@ -963,15 +963,15 @@
             if cigar_op in "MIS":
                 read_pos += length
 
-    for var_id, counts in var_counts.items():
+    for var_id, counts in list(var_counts.items()):
         if counts[0] < 2: # or counts[0] * 3 < counts[1]:
             continue
         assert var_id in vars
         var_chr, var_left, var_type, var_data = vars[var_id]
         assert var_id in clnsigs
         var_gene, var_clnsig = clnsigs[var_id]
-        print >> sys.stderr, "\t\t\t%s %s: %s:%d %s %s (%s): %d-%d" % \
-                (var_gene, var_id, var_chr, var_left, var_type, var_data, var_clnsig, counts[0], counts[1])
+        print("\t\t\t%s %s: %s:%d %s %s (%s): %d-%d" % \
+                (var_gene, var_id, var_chr, var_left, var_type, var_data, var_clnsig, counts[0], counts[1]), file=sys.stderr)
 
 
                 
@@ -1038,7 +1038,7 @@
         read_fnames = [args.read_fname_U]
     else:
         if args.read_fname_1 == "" or args.read_fname_2 == "":
-            print >> sys.stderr, "Error: please specify read file names correctly: -U or -1 and -2"
+            print("Error: please specify read file names correctly: -U or -1 and -2", file=sys.stderr)
             sys.exit(1)
         read_fnames = [args.read_fname_1, args.read_fname_2] 
 
--- hisat2.orig/evaluation/tests/genotype_genome/paper_sensitivity/sensitivity.py
+++ hisat2/evaluation/tests/genotype_genome/paper_sensitivity/sensitivity.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
 #
@@ -69,7 +69,7 @@
             aligner_cmd += ["-1", fq_fnames[0],
                             "-2", fq_fnames[1]]
             # print >> sys.stderr, "Running:", ' '.join(aligner_cmd)
-            print sample, aligner, type
+            print(sample, aligner, type)
             align_proc = subprocess.Popen(aligner_cmd,
                                           stdout=subprocess.PIPE,
                                           stderr=open("/dev/null", 'w'))
@@ -113,7 +113,7 @@
 
                 if ((aligner == "hisat2" and NH == 1) or (aligner == "bowtie2" and AS > XS and read1_first if flag & 0x40 else read2_first)):
                     if chr in region_loci:
-                        for region, loci in region_loci[chr].items():
+                        for region, loci in list(region_loci[chr].items()):
                             _, _, loci_left, loci_right = loci
                             # there might be a different candidate region for each of left and right reads
                             if pos >= loci_left and pos < loci_right:
@@ -136,5 +136,5 @@
                 gene = "HLA-" + gene
                 if gene not in region_count:
                     continue
-                print "\t%s pair: %d, left+right: %d" % (gene, region_count[gene], region_read1_count[gene] + region_read2_count[gene])
+                print("\t%s pair: %d, left+right: %d" % (gene, region_count[gene], region_read1_count[gene] + region_read2_count[gene]))
             
--- hisat2.orig/evaluation/tests/one_snp_test/evaluate_one_snp_reads.py
+++ hisat2/evaluation/tests/one_snp_test/evaluate_one_snp_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os, subprocess
 import multiprocessing
@@ -154,7 +154,7 @@
             if aligner == "hisat2" and index_type != "":
                 aligner_name += ("_" + index_type)
             two_step = (aligner == "tophat2" or type == "x2" or (aligner in ["hisat2", "hisat"] and type == ""))
-            print >> sys.stderr, "\t%s\t%s" % (aligner_name, str(datetime.now()))
+            print("\t%s\t%s" % (aligner_name, str(datetime.now())), file=sys.stderr)
             if paired:
                 aligner_dir = aligner_name + "_paired"
             else:
@@ -170,7 +170,7 @@
             aligner_cmd = get_aligner_cmd(aligner, type, index_type, version, "../" + type_read1_fname, "../" + type_read2_fname, out_fname)
             start_time = datetime.now()
             if verbose:
-                print >> sys.stderr, "\t", start_time, " ".join(aligner_cmd)
+                print("\t", start_time, " ".join(aligner_cmd), file=sys.stderr)
             if aligner in ["hisat2", "hisat", "bowtie", "bowtie2", "gsnap", "bwa"]:
                 proc = subprocess.Popen(aligner_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
             else:
@@ -180,7 +180,7 @@
             duration = finish_time - start_time
             duration = duration.total_seconds()
             if verbose:
-                print >> sys.stderr, "\t", finish_time, "finished:", duration
+                print("\t", finish_time, "finished:", duration, file=sys.stderr)
 
             assert os.path.exists(out_fname)
             correct_reads, correct_multi_reads, num_reads = 0, 0, 0
@@ -205,8 +205,8 @@
 
                 prev_read_id = read_id
 
-            print >> sys.stderr, "\tfirst: %d / %d (%.2f%%)" % (correct_reads, num_reads, float(correct_reads)/num_reads*100)
-            print >> sys.stderr, "\tall: %d / %d (%.2f%%)" % (correct_multi_reads, num_reads, float(correct_multi_reads)/num_reads*100)
+            print("\tfirst: %d / %d (%.2f%%)" % (correct_reads, num_reads, float(correct_reads)/num_reads*100), file=sys.stderr)
+            print("\tall: %d / %d (%.2f%%)" % (correct_multi_reads, num_reads, float(correct_multi_reads)/num_reads*100), file=sys.stderr)
 
             os.chdir("..")
 
--- hisat2.orig/evaluation/tests/one_snp_test/simulate_one_snp_reads.py
+++ hisat2/evaluation/tests/one_snp_test/simulate_one_snp_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
 #
@@ -125,8 +125,8 @@
         Zs = "%d|I|%s" % (left_read_len, snp_id)
 
     if len(read_seq) != read_len:
-        print >> sys.stderr, "read length differs:", len(read_seq), "vs.", read_len
-        print >> sys.stderr, pos, cigar_str, MD, Zs
+        print("read length differs:", len(read_seq), "vs.", read_len, file=sys.stderr)
+        print(pos, cigar_str, MD, Zs, file=sys.stderr)
         assert False
 
     ref_read_seq = chr_seq[pos:pos+read_len]
@@ -257,8 +257,8 @@
         tMD += ("{}".format(match_len))
 
     if tMD != MD:
-        print >> sys.stderr, chr, pos, cigar, MD, Zs
-        print >> sys.stderr, tMD
+        print(chr, pos, cigar, MD, Zs, file=sys.stderr)
+        print(tMD, file=sys.stderr)
         assert False
         
         
@@ -272,14 +272,14 @@
 
     genome_seq = read_genome(genome_file)
     snps = read_snp(snp_file)
-    chr_ids = genome_seq.keys()
+    chr_ids = list(genome_seq.keys())
 
     sam_file = open(base_fname + ".sam", "w")
 
     # Write SAM header
-    print >> sam_file, "@HD\tVN:1.0\tSO:unsorted"
-    for chr in genome_seq.keys():
-        print >> sam_file, "@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr]))
+    print("@HD\tVN:1.0\tSO:unsorted", file=sam_file)
+    for chr in list(genome_seq.keys()):
+        print("@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr])), file=sam_file)
     
     read_file = open(base_fname + "_snp_1.fa", "w")
     ref_read_file = open(base_fname + "_ref_1.fa", "w")
@@ -312,12 +312,12 @@
             #    Zs2 = ("\tZs:Z:{}".format(Zs2))
 
             read_id_str = "{}_{}_{}_{}".format(cur_read_id, chr, pos, cigar_str)
-            print >> read_file, ">{}".format(read_id_str)
-            print >> read_file, read_seq
-            print >> sam_file, "{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:0\tNM:i:0\tMD:Z:{}{}".format(read_id_str, flag, chr, pos + 1, cigar_str, chr, pos + 1, read_seq, MD, Zs)
+            print(">{}".format(read_id_str), file=read_file)
+            print(read_seq, file=read_file)
+            print("{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:0\tNM:i:0\tMD:Z:{}{}".format(read_id_str, flag, chr, pos + 1, cigar_str, chr, pos + 1, read_seq, MD, Zs), file=sam_file)
 
-            print >> ref_read_file, ">{}_{}_{}_100M".format(cur_read_id, chr, pos)
-            print >> ref_read_file, ref_read_seq
+            print(">{}_{}_{}_100M".format(cur_read_id, chr, pos), file=ref_read_file)
+            print(ref_read_seq, file=ref_read_file)
             """
             if paired_end:
                 print >> read2_file, ">{}".format(cur_read_id)
--- hisat2.orig/evaluation/tests/repeat/generate_repeats.py
+++ hisat2/evaluation/tests/repeat/generate_repeats.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys
 import struct
@@ -25,7 +25,7 @@
     chr_sa.append(num)
 
     if len(chr_sa) % 5000000 == 0:
-        print len(chr_sa)
+        print(len(chr_sa))
 f.close()
 
 assert chr_sa[0] + 1 == len(chr_sa)
@@ -41,7 +41,7 @@
 while i < len(chr_sa) - 1:
     pos = chr_sa[i]
     base_seq = chr_seq[pos:pos+seq_len]
-    for j in xrange(i+1, len(chr_sa)):
+    for j in range(i+1, len(chr_sa)):
         pos2 = chr_sa[j]
         cmp_seq = chr_seq[pos2:pos2+seq_len]
         if base_seq != cmp_seq:
@@ -53,13 +53,13 @@
     i = j
 
     if i % 5000000 == 0:
-        print i
+        print(i)
 
 found = False
-print len(repeats), "repeats"
+print(len(repeats), "repeats")
 deleted = set()
-for i in xrange(len(repeats) - 1):
-    for j in xrange(i + 1, len(repeats)):
+for i in range(len(repeats) - 1):
+    for j in range(i + 1, len(repeats)):
         if j in deleted:
             continue
         
@@ -84,17 +84,17 @@
 
         if num_close == 1 and num_close2 < 5:
             found = True
-            print pos_set
-            print pos_set2
-            print pos_seq
-            print pos_seq2
+            print(pos_set)
+            print(pos_set2)
+            print(pos_seq)
+            print(pos_seq2)
 
             file1 = open("1.fa", "w")
             file2 = open("2.fa", "w")
 
             pos_seq2_rc = list(pos_seq2)
             pos_seq2_rc = pos_seq2_rc[::-1]
-            for k in xrange(seq_len):
+            for k in range(seq_len):
                 nt = pos_seq2_rc[k]
                 if nt == 'A':
                     nt = 'T'
@@ -108,11 +108,11 @@
                 pos_seq2_rc[k] = nt
             pos_seq2_rc = ''.join(pos_seq2_rc)
 
-            for k in xrange(1000000):
-                print >> file1, ">%d" % k
-                print >> file2, ">%d" % k
-                print >> file1, pos_seq
-                print >> file2, pos_seq2_rc
+            for k in range(1000000):
+                print(">%d" % k, file=file1)
+                print(">%d" % k, file=file2)
+                print(pos_seq, file=file1)
+                print(pos_seq2_rc, file=file2)
 
             file1.close()
             file2.close()
@@ -121,7 +121,7 @@
     if found:
         break
 
-    print i
+    print(i)
 
 chr_seq = ""
 for line in open("%s.fa" % chr_name):
@@ -132,7 +132,7 @@
 
 N_ranges = []
 prev_nt = None
-for i in xrange(len(chr_seq)):
+for i in range(len(chr_seq)):
     nt = chr_seq[i]
     if nt == 'N':
         if prev_nt != 'N':
@@ -158,7 +158,7 @@
 to_genome_list = [[y, x] for x, y in to_joined_list]
 
 N_ranges_tmp = []
-for i in xrange(len(to_genome_list)):
+for i in range(len(to_genome_list)):
     to_genome = to_genome_list[i]
     if i == 0:
         if to_genome[1] > 0:
@@ -171,8 +171,8 @@
 
 file = open("%s_rep.info" % chr_name, "w")
 def print_rep_info(rep_name, rep_pos, rep_len, pos_set, pos_seq):
-    print >> file, ">%s*0\trep\t%d\t%d\t%d\t0" % (rep_name, rep_pos, rep_len, len(pos_set))
-    for i in xrange(0, len(pos_set), 10):
+    print(">%s*0\trep\t%d\t%d\t%d\t0" % (rep_name, rep_pos, rep_len, len(pos_set)), file=file)
+    for i in range(0, len(pos_set), 10):
         output = ""
         for j in range(i, i + 10):
             if j >= len(pos_set):
@@ -181,7 +181,7 @@
                 output += " "
 
             def convert(pos):
-                for i in xrange(len(to_genome_list)):
+                for i in range(len(to_genome_list)):
                     if i + 1 == len(to_genome_list) or (pos >= to_genome_list[i][0] and pos < to_genome_list[i+1][0]):
                         return pos - to_genome_list[i][0] + to_genome_list[i][1]
 
@@ -190,7 +190,7 @@
             pos = convert(pos_set[j])
             assert chr_seq[pos:pos+seq_len] == pos_seq
             output += ("%s:%d:+" % (chr_name, pos))
-        print >> file, output
+        print(output, file=file)
 print_rep_info("rep1", 0, seq_len, pos_set, pos_seq)
 print_rep_info("rep2", seq_len, seq_len, pos_set2, pos_seq2)
 file.close()
@@ -198,15 +198,15 @@
 chr_seq = chr_seq.replace(pos_seq, 'N' * seq_len)
 chr_seq = chr_seq.replace(pos_seq2, 'N' * seq_len)
 file = open("%s_mask.fa" % chr_name, "w")
-print >> file, ">%s_mask" % chr_name
-for i in xrange(0, len(chr_seq), 60):
-    print >> file, chr_seq[i:i+60]
+print(">%s_mask" % chr_name, file=file)
+for i in range(0, len(chr_seq), 60):
+    print(chr_seq[i:i+60], file=file)
 file.close()
 
 file = open("%s_rep.fa" % chr_name, "w")
 rep_seq = pos_seq + pos_seq2
-print >> file, ">rep"
-for i in xrange(0, len(rep_seq), 60):
-    print >> file, rep_seq[i:i+60]
+print(">rep", file=file)
+for i in range(0, len(rep_seq), 60):
+    print(rep_seq[i:i+60], file=file)
 file.close()
     
--- hisat2.orig/evaluation/tests/repeat/test_repeat.py
+++ hisat2/evaluation/tests/repeat/test_repeat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 import sys, os, subprocess, random
 from argparse import ArgumentParser, FileType
 
@@ -52,7 +52,7 @@
 
     genome_file.close()
 
-    print >> sys.stderr, "genome is loaded"
+    print("genome is loaded", file=sys.stderr)
     
     return chr_dic
 
@@ -62,7 +62,7 @@
 def generate_random_seq(seq_len):
     assert seq_len > 0
     random_seq = ""
-    for i in xrange(seq_len):
+    for i in range(seq_len):
         random_seq += "ACGT"[random.randint(0, 3)]
     return random_seq
 
@@ -109,10 +109,10 @@
     ]
     
     for id, seq in seqs:
-        print ">%s" % id
-        print generate_random_seq(20)
-        print seq
-        print generate_random_seq(20)
+        print(">%s" % id)
+        print(generate_random_seq(20))
+        print(seq)
+        print(generate_random_seq(20))
 
 
 """
--- hisat2.orig/hisat2-build
+++ hisat2/hisat2-build
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
  Copyright 2015, Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/hisat2-build-new
+++ hisat2/hisat2-build-new
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
  Copyright 2018, Chanhee Park <parkchanhee@gmail.com> and Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/hisat2-inspect
+++ hisat2/hisat2-inspect
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
  Copyright 2015, Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/hisat2_extract_exons.py
+++ hisat2/hisat2_extract_exons.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
@@ -19,7 +19,7 @@
 # along with HISAT 2.  If not, see <http://www.gnu.org/licenses/>.
 #
 
-from __future__ import print_function
+
 
 from sys import stderr, exit
 from collections import defaultdict as dd, Counter
@@ -65,7 +65,7 @@
             trans[transcript_id][2].append([left, right])
 
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -77,7 +77,7 @@
 
     # Calculate and print the unique junctions
     tmp_exons = set()
-    for chrom, strand, texons in trans.values():
+    for chrom, strand, texons in list(trans.values()):
         for i in range(len(texons)):
             tmp_exons.add((chrom, texons[i][0], texons[i][1], strand))
     tmp_exons = sorted(tmp_exons)
--- hisat2.orig/hisat2_extract_snps_haplotypes_UCSC.py
+++ hisat2/hisat2_extract_snps_haplotypes_UCSC.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
@@ -80,8 +80,8 @@
 
     # daehwan - for debugging purposes
     if a_chr != b_chr:
-        print a
-        print b
+        print(a)
+        print(b)
     
     assert a_chr == b_chr
     if a_pos != b_pos:
@@ -154,7 +154,7 @@
             vars_count[id] = 0
         vars_count[id] += 1
     vars_duplicate = set()
-    for id, count in vars_count.items():
+    for id, count in list(vars_count.items()):
         if count <= 1:
             continue
         vars_duplicate.add(id)
@@ -223,8 +223,8 @@
         else:
             assert type == 'I'
             type = "insertion"
-        print >> snp_file, "%s\t%s\t%s\t%s\t%s" % \
-            (varID, type, chr, pos, data)
+        print("%s\t%s\t%s\t%s\t%s" % \
+            (varID, type, chr, pos, data), file=snp_file)
 
     # genotypes_list looks like
     #    Var0: 0
@@ -301,7 +301,7 @@
             h_end += (int(h2_data) - 1)
         assert h_begin <= h_end
         h_new_begin = h_begin
-        for h_j in reversed(range(0, h_i)):
+        for h_j in reversed(list(range(0, h_i))):
             hc = haplotypes[h_j].split('#')
             _, hc_begin, hc_type, hc_data, _ = vars[int(hc[-1])]
             hc_begin = int(hc_begin)
@@ -317,8 +317,8 @@
         for id in h:
             var_dic = vars[int(id)][4]
             h_add.append(var_dic["id2"])
-        print >> haplotype_file, "ht%d\t%s\t%d\t%d\t%s" % \
-            (num_haplotypes, chr, h_new_begin, h_end, ','.join(h_add))
+        print("ht%d\t%s\t%d\t%d\t%s" % \
+            (num_haplotypes, chr, h_new_begin, h_end, ','.join(h_add)), file=haplotype_file)
         num_haplotypes += 1
 
     return num_haplotypes
@@ -447,10 +447,10 @@
                 if testset:
                     ref_seq = chr_seq[start-50:start+50]
                     alt_seq = chr_seq[start-50:start] + allele + chr_seq[start+1:start+50]
-                    print >> ref_testset_file, ">%s_single_%d" % (rs_id, start - 50)
-                    print >> ref_testset_file, ref_seq
-                    print >> alt_testset_file, ">%s_single_%d_%s" % (rs_id, start - 50, ref_seq)
-                    print >> alt_testset_file, alt_seq
+                    print(">%s_single_%d" % (rs_id, start - 50), file=ref_testset_file)
+                    print(ref_seq, file=ref_testset_file)
+                    print(">%s_single_%d_%s" % (rs_id, start - 50, ref_seq), file=alt_testset_file)
+                    print(alt_seq, file=alt_testset_file)
                 
         elif classType == "deletion":
             if start > 0:
@@ -475,10 +475,10 @@
             if testset and delLen > 0 and delLen <= 10:
                 ref_seq = chr_seq[start-50:start+50]
                 alt_seq = chr_seq[start-50:start] + chr_seq[start+delLen:start+50+delLen]
-                print >> ref_testset_file, ">%s_deletion_%d" % (rs_id, start - 50)
-                print >> ref_testset_file, ref_seq
-                print >> alt_testset_file, ">%s_deletion_%d_%s" % (rs_id, start - 50, ref_seq)
-                print >> alt_testset_file, alt_seq
+                print(">%s_deletion_%d" % (rs_id, start - 50), file=ref_testset_file)
+                print(ref_seq, file=ref_testset_file)
+                print(">%s_deletion_%d_%s" % (rs_id, start - 50, ref_seq), file=alt_testset_file)
+                print(alt_seq, file=alt_testset_file)
         else:
             assert classType == "insertion"
             if start > 0:
@@ -497,10 +497,10 @@
                     if testset and insLen > 0 and insLen <= 10:
                         ref_seq = chr_seq[start-50:start+50]
                         alt_seq = chr_seq[start-50:start] + allele + chr_seq[start:start+50-insLen]
-                        print >> ref_testset_file, ">%s_insertion_%d" % (rs_id, start - 50)
-                        print >> ref_testset_file, ref_seq
-                        print >> alt_testset_file, ">%s_insertion_%d_%s" % (rs_id, start - 50, ref_seq)
-                        print >> alt_testset_file, alt_seq
+                        print(">%s_insertion_%d" % (rs_id, start - 50), file=ref_testset_file)
+                        print(ref_seq, file=ref_testset_file)
+                        print(">%s_insertion_%d_%s" % (rs_id, start - 50, ref_seq), file=alt_testset_file)
+                        print(alt_seq, file=alt_testset_file)
 
         if curr_right < end:
             curr_right = end
--- hisat2.orig/hisat2_extract_snps_haplotypes_VCF.py
+++ hisat2/hisat2_extract_snps_haplotypes_VCF.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2016, Daehwan Kim <infphilo@gmail.com>
 #
@@ -100,11 +100,11 @@
         ref_allele2, pos2 = ref_allele, pos
 
         if chr_seq[pos:pos+len(ref_allele)] != ref_allele:
-            print >> sys.stderr, "Error: the reference genome you provided seems to be incompatible with the VCF file at %d of chromosome %s where %s is in the reference genome while %s is in the VCF file" % (pos, chr, chr_seq[pos:pos+len(ref_allele)], ref_allele)
+            print("Error: the reference genome you provided seems to be incompatible with the VCF file at %d of chromosome %s where %s is in the reference genome while %s is in the VCF file" % (pos, chr, chr_seq[pos:pos+len(ref_allele)], ref_allele), file=sys.stderr)
 
         def warning_msg():
-            print >> sys.stderr, "Warning) ref allele (%s) and alt allele (%s in %s) at chr%s:%d are excluded." % \
-                (ref_allele, alt_allele, ','.join(alt_alleles), chr, pos + 1)
+            print("Warning) ref allele (%s) and alt allele (%s in %s) at chr%s:%d are excluded." % \
+                (ref_allele, alt_allele, ','.join(alt_alleles), chr, pos + 1), file=sys.stderr)
             
         min_len = min(len(ref_allele2), len(alt_allele2))
         if min_len >= 2:
@@ -203,8 +203,8 @@
         else:
             assert type == 'I'
             type = "insertion"
-        print >> snp_file, "%s\t%s\t%s\t%s\t%s" % \
-            (varID, type, chr, pos, data)
+        print("%s\t%s\t%s\t%s\t%s" % \
+            (varID, type, chr, pos, data), file=snp_file)
 
     # variant compatibility
     vars_cmpt = [-1 for i in range(len(vars))]
@@ -408,7 +408,7 @@
             h_end += (int(h2_data) - 1)
         assert h_begin <= h_end
         h_new_begin = h_begin
-        for h_j in reversed(range(0, h_i)):
+        for h_j in reversed(list(range(0, h_i))):
             hc = haplotypes[h_j].split('#')
             _, hc_begin, hc_type, hc_data, _ = vars[int(hc[-1])]
             hc_begin = int(hc_begin)
@@ -424,8 +424,8 @@
         for id in h:
             var_dic = vars[int(id)][4]
             h_add.append(var_dic["id2"])
-        print >> haplotype_file, "ht%d\t%s\t%d\t%d\t%s" % \
-            (num_haplotypes, chr, h_new_begin, h_end, ','.join(h_add))
+        print("ht%d\t%s\t%d\t%d\t%s" % \
+            (num_haplotypes, chr, h_new_begin, h_end, ','.join(h_add)), file=haplotype_file)
         num_haplotypes += 1
 
     return num_haplotypes
@@ -525,26 +525,26 @@
                     
                 var_set.add(var_str)
 
-        print >> sys.stderr, "Number of variants in %s is:" % (genotype_vcf)
-        for chr, vars in genotype_var_list.items():
+        print("Number of variants in %s is:" % (genotype_vcf), file=sys.stderr)
+        for chr, vars in list(genotype_var_list.items()):
             vars = sorted(vars, cmp=compare_vars)
-            print >> sys.stderr, "\tChromosome %s: %d variants" % (chr, len(vars))
+            print("\tChromosome %s: %d variants" % (chr, len(vars)), file=sys.stderr)
 
-        for chr, gene_ranges in genotype_ranges.items():
-            for gene, value in gene_ranges.items():
+        for chr, gene_ranges in list(genotype_ranges.items()):
+            for gene, value in list(gene_ranges.items()):
                 gene_ranges[gene] = [value[0] - 100, value[1] + 100]
                 value = genotype_ranges[chr][gene]
                 if verbose:
-                    print >> sys.stderr, "%s\t%s\t%d-%d" % (chr, gene, value[0], value[1])
+                    print("%s\t%s\t%d-%d" % (chr, gene, value[0], value[1]), file=sys.stderr)
 
         if extra_files or True:
             clnsig_file = open("%s.clnsig" % base_fname, 'w')
-            for chr, vars in genotype_var_list.items():
+            for chr, vars in list(genotype_var_list.items()):
                 for var in vars:
                     varID = var[4]["id2"]
                     CLNSIG = var[4]["CLNSIG"]
                     gene = var[4]["gene"]
-                    print >> clnsig_file, "%s\t%s\t%s" % (varID, gene, CLNSIG)
+                    print("%s\t%s\t%s" % (varID, gene, CLNSIG), file=clnsig_file)
             clnsig_file.close()
 
     SNP_file = open("%s.snp" % base_fname, 'w')
@@ -553,28 +553,28 @@
     # Write reference information and backbone sequences into files
     if extra_files:
         ref_file = open("%s.ref" % base_fname, 'w')
-        for chr, gene_ranges in genotype_ranges.items():
-            for gene, value in gene_ranges.items():
+        for chr, gene_ranges in list(genotype_ranges.items()):
+            for gene, value in list(gene_ranges.items()):
                 left, right = value
                 if reference_type == "gene":
                     left, right = 0, right - left
-                print >> ref_file, "%s\t%s\t%d\t%d" % (gene, chr, left, right)
+                print("%s\t%s\t%d\t%d" % (gene, chr, left, right), file=ref_file)
         ref_file.close()
 
         if reference_type == "gene":
             backbone_file = open("%s_backbone.fa" % base_fname, 'w')
-            for chr, gene_ranges in genotype_ranges.items():
-                for gene, value in gene_ranges.items():
+            for chr, gene_ranges in list(genotype_ranges.items()):
+                for gene, value in list(gene_ranges.items()):
                     left, right = value
                     left, right = 0, right - left
-                    print >> backbone_file, ">%s" % (gene)
+                    print(">%s" % (gene), file=backbone_file)
                     backbone_seq = chr_dic[chr][value[0]:value[1]+1]
                     for s in range(0, len(backbone_seq), 60):
-                        print >> backbone_file, backbone_seq[s:s+60]
+                        print(backbone_seq[s:s+60], file=backbone_file)
             backbone_file.close()
         elif reference_type == "chromosome":
             first = True
-            for chr in genotype_ranges.keys():
+            for chr in list(genotype_ranges.keys()):
                 if first:
                     os.system("samtools faidx genome.fa %s > %s_backbone.fa" % (chr, base_fname))
                     first = False
@@ -665,11 +665,11 @@
                 offset = 0
                 gene = None
                 if num_lines % 10000 == 1:
-                    print >> sys.stderr, "\t%s:%d\r" % (chr, pos),
+                    print("\t%s:%d\r" % (chr, pos), end=' ', file=sys.stderr)
 
                 if chr_genotype_ranges:
                     skip = True
-                    for gene_, range_ in chr_genotype_ranges.items():
+                    for gene_, range_ in list(chr_genotype_ranges.items()):
                         if pos > range_[0] and pos < range_[1]:
                             skip = False
                             break
@@ -773,7 +773,7 @@
                 vars = []
 
         else:            
-            for chr in genotype_var_list.keys():
+            for chr in list(genotype_var_list.keys()):
                 chr_seq = chr_dic[chr]
                 chr_genotype_vars = genotype_var_list[chr]
                 curr_right = -1
@@ -900,7 +900,7 @@
             args.genotype_gene_list = args.genotype_gene_list.split(',')
 
         if len(args.genotype_gene_list) == 0:
-            print >> sys.stderr, "Error: please specify --genotype-gene-list."
+            print("Error: please specify --genotype-gene-list.", file=sys.stderr)
             sys.exit(1)
 
     else:
--- hisat2.orig/hisat2_extract_splice_sites.py
+++ hisat2/hisat2_extract_splice_sites.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
@@ -19,7 +19,7 @@
 # along with HISAT 2.  If not, see <http://www.gnu.org/licenses/>.
 #
 
-from __future__ import print_function
+
 
 from sys import stderr, exit
 from collections import defaultdict as dd, Counter
@@ -66,7 +66,7 @@
             trans[transcript_id][2].append([left, right])
 
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chrom, strand, exons] in trans.items():
+    for tran, [chrom, strand, exons] in list(trans.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -78,7 +78,7 @@
 
     # Calculate and print the unique junctions
     junctions = set()
-    for chrom, strand, exons in trans.values():
+    for chrom, strand, exons in list(trans.values()):
         for i in range(1, len(exons)):
             junctions.add((chrom, exons[i-1][1], exons[i][0], strand))
     junctions = sorted(junctions)
@@ -90,7 +90,7 @@
     if verbose:
         exon_lengths, intron_lengths, trans_lengths = \
             Counter(), Counter(), Counter()
-        for chrom, strand, exons in trans.values():
+        for chrom, strand, exons in list(trans.values()):
             tran_len = 0
             for i, exon in enumerate(exons):
                 exon_len = exon[1]-exon[0]+1
@@ -102,7 +102,7 @@
             trans_lengths[tran_len] += 1
 
         print('genes: {}, genes with multiple isoforms: {}'.format(
-                len(genes), sum(len(v) > 1 for v in genes.values())),
+                len(genes), sum(len(v) > 1 for v in list(genes.values()))),
               file=stderr)
         print('transcripts: {}, transcript avg. length: {:.0f}'.format(
                 len(trans), sum(trans_lengths.elements())/len(trans)),
--- hisat2.orig/hisat2_read_statistics.py
+++ hisat2/hisat2_read_statistics.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2018, Chanhee Park <parkchanhee@gmail.com> and Daehwan Kim <infphilo@gmail.com>
@@ -179,10 +179,10 @@
     fp.close()
 
     cnt, mn, mx, avg =  generate_stats(length_map)
-    length_map = sorted(length_map.iteritems(), key=lambda (k,v):(v,k), reverse=True)
+    length_map = sorted(iter(length_map.items()), key=lambda k_v:(k_v[1],k_v[0]), reverse=True)
     if len(length_map) == 0:
         length_map.append((0,0))
-    print cnt, mn, mx, avg, ",".join([str(k) for (k,v) in length_map])
+    print(cnt, mn, mx, avg, ",".join([str(k) for (k,v) in length_map]))
 
 if __name__ == '__main__':
 
--- hisat2.orig/hisat2_simulate_reads.py
+++ hisat2/hisat2_simulate_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
 #
@@ -91,11 +91,11 @@
         chr_dic[chr_name] = sequence
 
 
-    chr_filter = [str(x) for x in range(1, 23) + ['X', 'Y']]
+    chr_filter = [str(x) for x in list(range(1, 23)) + ['X', 'Y']]
     #chr_filter = None
 
     if chr_filter:
-        for chr_id, chr_seq in chr_dic.items():
+        for chr_id, chr_seq in list(chr_dic.items()):
             if not chr_id in chr_filter: 
                 chr_dic.pop(chr_id, None)
     
@@ -145,7 +145,7 @@
             transcripts[transcript_id][2].append([left, right])
 
     # Sort exons and merge where separating introns are <=5 bps
-    for tran, [chr, strand, exons] in transcripts.items():
+    for tran, [chr, strand, exons] in list(transcripts.items()):
             exons.sort()
             tmp_exons = [exons[0]]
             for i in range(1, len(exons)):
@@ -156,7 +156,7 @@
             transcripts[tran] = [chr, strand, tmp_exons]
 
     tmp_transcripts = {}
-    for tran, [chr, strand, exons] in transcripts.items():
+    for tran, [chr, strand, exons] in list(transcripts.items()):
         exon_lens = [e[1] - e[0] + 1 for e in exons]
         transcript_len = sum(exon_lens)
         if transcript_len >= frag_len:
@@ -192,7 +192,7 @@
 """
 def sanity_check_input(genome_seq, genes, transcripts, snps, frag_len):
     num_canon_ss, num_ss = 0, 0
-    for transcript, [chr, strand, transcript_len, exons] in transcripts.items():
+    for transcript, [chr, strand, transcript_len, exons] in list(transcripts.items()):
         assert transcript_len >= frag_len
         if len(exons) <= 1:
             continue
@@ -214,10 +214,10 @@
             num_ss += 1
 
     if num_ss > 0:
-        print >> sys.stderr, "GT/AG splice sites: {}/{} ({:.2%})".format(num_canon_ss, num_ss, (float(num_canon_ss) / num_ss))
+        print("GT/AG splice sites: {}/{} ({:.2%})".format(num_canon_ss, num_ss, (float(num_canon_ss) / num_ss)), file=sys.stderr)
 
     num_alt_single, num_single = 0, 0
-    for chr, chr_snps in snps.items():
+    for chr, chr_snps in list(snps.items()):
         if chr not in genome_seq:
             continue
         chr_seq = genome_seq[chr]
@@ -235,7 +235,7 @@
             num_single += 1
 
     if num_single > 0:
-        print >> sys.stderr, "Alternative bases: {}/{} ({:.2%})".format(num_alt_single, num_single, (float(num_alt_single) / num_single))
+        print("Alternative bases: {}/{} ({:.2%})".format(num_alt_single, num_single, (float(num_alt_single) / num_single)), file=sys.stderr)
 
 
 """
@@ -267,7 +267,7 @@
 """
 def generate_dna_expr_profile(genome_seq):
     expr_profile = []
-    for chr_id, chr_seq in genome_seq.items():
+    for chr_id, chr_seq in list(genome_seq.items()):
         expr_profile.append(len(chr_seq))
     expr_sum = float(sum(expr_profile))
     expr_profile = [expr_profile[i] / expr_sum for i in range(len(expr_profile))]
@@ -545,8 +545,8 @@
         MD += ("{}".format(MD_match_len))
 
     if len(read_seq) != read_len:
-        print >> sys.stderr, "read length differs:", len(read_seq), "vs.", read_len
-        print >> sys.stderr, pos, "".join(cigars), cigar_descs, MD, XM, NM, Zs
+        print("read length differs:", len(read_seq), "vs.", read_len, file=sys.stderr)
+        print(pos, "".join(cigars), cigar_descs, MD, XM, NM, Zs, file=sys.stderr)
         assert False
 
     return pos, cigars, cigar_descs, MD, XM, NM, Zs, read_seq
@@ -676,8 +676,8 @@
         tMD += ("{}".format(match_len))
 
     if tMD != MD or tXM != XM or tNM != NM or XM > max_mismatch or XM != NM:
-        print >> sys.stderr, chr, pos, cigar, MD, XM, NM, Zs
-        print >> sys.stderr, tMD, tXM, tNM
+        print(chr, pos, cigar, MD, XM, NM, Zs, file=sys.stderr)
+        print(tMD, tXM, tNM, file=sys.stderr)
         assert False
         
         
@@ -730,18 +730,18 @@
                 repeat_loci[chr].append([int(pos), strand])
 
     if rna:
-        transcript_ids = transcripts.keys()
+        transcript_ids = list(transcripts.keys())
         random.shuffle(transcript_ids)
         assert len(transcript_ids) >= len(expr_profile)
     else:
-        chr_ids = genome_seq.keys()
+        chr_ids = list(genome_seq.keys())
 
     sam_file = open(base_fname + ".sam", "w")
 
     # Write SAM header
-    print >> sam_file, "@HD\tVN:1.0\tSO:unsorted"
-    for chr in genome_seq.keys():
-        print >> sam_file, "@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr]))
+    print("@HD\tVN:1.0\tSO:unsorted", file=sam_file)
+    for chr in list(genome_seq.keys()):
+        print("@SQ\tSN:%s\tLN:%d" % (chr, len(genome_seq[chr])), file=sam_file)
     
     read_file = open(base_fname + "_1.fa", "w")
     if paired_end:
@@ -753,10 +753,10 @@
         if rna:
             transcript_id = transcript_ids[t]
             chr, strand, transcript_len, exons = transcripts[transcript_id]
-            print >> sys.stderr, transcript_id, t_num_frags
+            print(transcript_id, t_num_frags, file=sys.stderr)
         else:
             chr = chr_ids[t]
-            print >> sys.stderr, chr, t_num_frags
+            print(chr, t_num_frags, file=sys.stderr)
 
         assert chr in genome_seq
         chr_seq = genome_seq[chr]
@@ -830,19 +830,19 @@
             else:
                 XS, TI = "", ""                
 
-            print >> read_file, ">{}".format(cur_read_id)
+            print(">{}".format(cur_read_id), file=read_file)
             if swapped:
-                print >> read_file, reverse_complement(read_seq)
+                print(reverse_complement(read_seq), file=read_file)
             else:
-                print >> read_file, read_seq
-            print >> sam_file, "{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:{}\tNM:i:{}\tMD:Z:{}{}{}{}".format(cur_read_id, flag, chr, pos + 1, cigar_str, chr, pos2 + 1, read_seq, XM, NM, MD, Zs, XS, TI)
+                print(read_seq, file=read_file)
+            print("{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:{}\tNM:i:{}\tMD:Z:{}{}{}{}".format(cur_read_id, flag, chr, pos + 1, cigar_str, chr, pos2 + 1, read_seq, XM, NM, MD, Zs, XS, TI), file=sam_file)
             if paired_end:
-                print >> read2_file, ">{}".format(cur_read_id)
+                print(">{}".format(cur_read_id), file=read2_file)
                 if swapped:
-                    print >> read2_file, read2_seq
+                    print(read2_seq, file=read2_file)
                 else:
-                    print >> read2_file, reverse_complement(read2_seq)
-                print >> sam_file, "{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:{}\tNM:i:{}\tMD:Z:{}{}{}{}".format(cur_read_id, flag2, chr, pos2 + 1, cigar2_str, chr, pos + 1, read2_seq, XM2, NM2, MD2, Zs2, XS, TI)
+                    print(reverse_complement(read2_seq), file=read2_file)
+                print("{}\t{}\t{}\t{}\t255\t{}\t{}\t{}\t0\t{}\t*\tXM:i:{}\tNM:i:{}\tMD:Z:{}{}{}{}".format(cur_read_id, flag2, chr, pos2 + 1, cigar2_str, chr, pos + 1, read2_seq, XM2, NM2, MD2, Zs2, XS, TI), file=sam_file)
 
             cur_read_id += 1
             
--- hisat2.orig/hisat2lib/pymodule/ht2example.py
+++ hisat2/hisat2lib/pymodule/ht2example.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 #
 # Copyright 2018, Chanhee Park <parkchanhee@gmail.com> and Daehwan Kim <infphilo@gmail.com>
@@ -30,7 +30,7 @@
 # Get default options
 ht2_options = ht2py.get_options()
 
-print ht2_options
+print(ht2_options)
 ht2_options['gVerbose'] = 1
 ht2_options['startVerbose'] = 1
 # or
@@ -38,7 +38,7 @@
 
 handle = ht2py.init(ht2_index, ht2_options)
 
-print ht2py.index_getrefnamebyid(handle, 0)
+print(ht2py.index_getrefnamebyid(handle, 0))
 
 #print ht2py.index_getrefnamebyid(handle, 0, 1, 3, 5, 7, 9)
 # outofindex
@@ -62,7 +62,7 @@
     if direction == 1:
         chr_dir = '-'
 
-    print refnames[chr_id].split()[0] + ":" + str(chr_pos) + ':' + chr_dir
+    print(refnames[chr_id].split()[0] + ":" + str(chr_pos) + ':' + chr_dir)
 
 # close handle
 ht2py.close(handle)
--- hisat2.orig/hisat2lib/pymodule/setup.py
+++ hisat2/hisat2lib/pymodule/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 #
 # Copyright 2018, Chanhee Park <parkchanhee@gmail.com> and Daehwan Kim <infphilo@gmail.com>
--- hisat2.orig/hisatgenotype.py
+++ hisat2/hisatgenotype.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
@@ -57,9 +57,9 @@
 
     out_base_fname = read_fnames[0].split('/')[-1].split('.')[0]
 
-    print >> sys.stderr, "%s Aligning %s to %s ..." % (str(datetime.now()), ' '.join(read_fnames), base_fname)
+    print("%s Aligning %s to %s ..." % (str(datetime.now()), ' '.join(read_fnames), base_fname), file=sys.stderr)
     if verbose:
-        print >> sys.stderr, "\t%s" % (' '.join(aligner_cmd))
+        print("\t%s" % (' '.join(aligner_cmd)), file=sys.stderr)
 
     align_proc = subprocess.Popen(aligner_cmd,
                                   stdout=subprocess.PIPE,
@@ -78,7 +78,7 @@
     # Increase the maximum number of files that can be opened
     resource.setrlimit(resource.RLIMIT_NOFILE, (10000, 10240))
     
-    print >> sys.stderr, "%s Sorting %s ..." % (str(datetime.now()), unsorted_bam_fname)
+    print("%s Sorting %s ..." % (str(datetime.now()), unsorted_bam_fname), file=sys.stderr)
     bam_fname = "%s.bam" % out_base_fname
     bamsort_cmd = ["samtools",
                    "sort",
@@ -87,7 +87,7 @@
                    unsorted_bam_fname,
                    "-o", bam_fname]    
     if verbose:
-        print >> sys.stderr, "\t%s" % ' '.join(bamsort_cmd)
+        print("\t%s" % ' '.join(bamsort_cmd), file=sys.stderr)
     bamsort_proc = subprocess.call(bamsort_cmd)
     os.remove(unsorted_bam_fname)
 
@@ -101,12 +101,12 @@
 """
 def index_bam(bam_fname,
               verbose):
-    print >> sys.stderr, "%s Indexing %s ..." % (str(datetime.now()), bam_fname)
+    print("%s Indexing %s ..." % (str(datetime.now()), bam_fname), file=sys.stderr)
     bamindex_cmd = ["samtools",
                     "index",
                     bam_fname]
     if verbose:
-        print >> sys.stderr, "\t%s" % ' '.join(bamindex_cmd)
+        print("\t%s" % ' '.join(bamindex_cmd), file=sys.stderr)
     bamindex_proc = subprocess.call(bamindex_cmd)
 
 
@@ -159,7 +159,7 @@
 
     bamview_cmd = ["samtools", "view", bam_fname, "%s:%d-%d" % (chr, left+1, right+1)]
     if verbose:
-        print >> sys.stderr, "\t%s" % ' '.join(bamview_cmd)
+        print("\t%s" % ' '.join(bamview_cmd), file=sys.stderr)
     bamview_proc = subprocess.Popen(bamview_cmd,
                                     stdout=subprocess.PIPE,
                                     stderr=open("/dev/null", 'w'))
@@ -266,7 +266,7 @@
         genotype_cmd += ["--assembly"]
 
     if verbose:
-        print >> sys.stderr, "\t%s" % ' '.join(genotype_cmd)
+        print("\t%s" % ' '.join(genotype_cmd), file=sys.stderr)
     genotype_proc = subprocess.Popen(genotype_cmd)
     genotype_proc.communicate()
         
@@ -296,9 +296,9 @@
     # hisat2 graph index files
     genotype_fnames += ["%s.%d.ht2" % (base_fname, i+1) for i in range(8)]
     if not typing_common.check_files(genotype_fnames):
-        print >> sys.stderr, "Error: some of the following files are missing!"
+        print("Error: some of the following files are missing!", file=sys.stderr)
         for fname in genotype_fnames:
-            print >> sys.stderr, "\t%s" % fname
+            print("\t%s" % fname, file=sys.stderr)
         sys.exit(1)
 
     # Read region alleles (names and sequences)
@@ -322,7 +322,7 @@
         region_loci[family].append([locus_name, allele_name, chr, left, right])
 
     if len(region_loci) <= 0:
-        print >> sys.stderr, "Warning: no region exists!"
+        print("Warning: no region exists!", file=sys.stderr)
         sys.exit(1)
 
     # Align reads, and sort the alignments into a BAM file
@@ -339,13 +339,13 @@
     assert os.path.exists(alignment_fname + ".bai")
 
     # Extract reads and perform genotyping
-    for family, loci in region_loci.items():
-        print >> sys.stderr, "Analyzing %s ..." % family.upper()
+    for family, loci in list(region_loci.items()):
+        print("Analyzing %s ..." % family.upper(), file=sys.stderr)
         for locus_name, allele_name, chr, left, right in loci:
             out_read_fname = "%s.%s" % (family, locus_name)
             if verbose:
-                print >> sys.stderr, "\tExtracting reads beloning to %s-%s ..." % \
-                    (family, locus_name)
+                print("\tExtracting reads beloning to %s-%s ..." % \
+                    (family, locus_name), file=sys.stderr)
 
             extracted_read_fnames = extract_reads(alignment_fname,
                                                   chr,
@@ -366,7 +366,7 @@
                                local_database,
                                threads,
                                verbose)
-        print >> sys.stderr
+        print(file=sys.stderr)
 
     
                 
@@ -443,7 +443,7 @@
         for region in args.region_list.split(','):
             region = region.split('.')
             if len(region) < 1 or len(region) > 2:
-                print >> sys.stderr, "Error: --region-list is incorrectly formatted."
+                print("Error: --region-list is incorrectly formatted.", file=sys.stderr)
                 sys.exit(1)
                 
             family = region[0].lower()
@@ -457,12 +457,12 @@
     read_fnames = []
     if args.alignment_fname != "":
         if not os.path.exists(args.alignment_fname):
-            print >> sys.stderr, "Error: %s does not exist." % args.alignment_fname
+            print("Error: %s does not exist." % args.alignment_fname, file=sys.stderr)
     elif args.read_fname_U != "":
         read_fnames = [args.read_fname_U]
     else:
         if args.read_fname_1 == "" or args.read_fname_2 == "":
-            print >> sys.stderr, "Error: please specify read file names correctly: -U or -1 and -2"
+            print("Error: please specify read file names correctly: -U or -1 and -2", file=sys.stderr)
             sys.exit(1)
         read_fnames = [args.read_fname_1, args.read_fname_2]
 
--- hisat2.orig/hisatgenotype_build_genome.py
+++ hisat2/hisatgenotype_build_genome.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2016, Daehwan Kim <infphilo@gmail.com>
@@ -78,11 +78,11 @@
                             "--genotype-vcf", "clinvar.vcf.gz",
                             "genome.fa", "/dev/null", "clinvar"]
             if verbose:
-                print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+                print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
             proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
             proc.communicate()
             if not typing_common.check_files(CLINVAR_fnames):
-                print >> sys.stderr, "Error: extract variants from clinvar failed!"
+                print("Error: extract variants from clinvar failed!", file=sys.stderr)
                 sys.exit(1)
 
         # Read variants to be genotyped
@@ -109,11 +109,11 @@
                            "--intra-gap", str(intra_gap),
                            "genome.fa", "%s.txt" % commonvar_fbase, commonvar_fbase]
             if verbose:
-                print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+                print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
             proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
             proc.communicate()
             if not typing_common.check_files(commonvar_fnames):
-                print >> sys.stderr, "Error: extract variants from clinvar failed!"
+                print("Error: extract variants from clinvar failed!", file=sys.stderr)
                 sys.exit(1)
 
         # Read variants to be genotyped
@@ -198,13 +198,13 @@
                     continue
 
                 out_str = "%s\t%s\t%s\t%d\t%s" % (var_id, var_type, chr, var_left + off, var_data)
-                print >> var_out_file, out_str
-                print >> index_var_out_file, out_str
+                print(out_str, file=var_out_file)
+                print(out_str, file=index_var_out_file)
 
                 if var_id in genotype_clnsig:
                     var_gene, clnsig = genotype_clnsig[var_id]
-                    print >> clnsig_out_file, "%s\t%s\t%s" % \
-                        (var_id, var_gene, clnsig)
+                    print("%s\t%s\t%s" % \
+                        (var_id, var_gene, clnsig), file=clnsig_out_file)
                 
                 chr_genotype_vari += 1
 
@@ -217,8 +217,8 @@
                     chr_genotype_hti += 1
                     continue
 
-                print >> haplotype_out_file, "ht%d\t%s\t%d\t%d\t%s" % \
-                    (haplotype_num, chr, ht_left + off, ht_right + off, ','.join(ht_vars))
+                print("ht%d\t%s\t%d\t%d\t%s" % \
+                    (haplotype_num, chr, ht_left + off, ht_right + off, ','.join(ht_vars)), file=haplotype_out_file)
                 chr_genotype_hti += 1
                 haplotype_num += 1
 
@@ -233,8 +233,8 @@
 
             if not graph_index:
                 # Output gene (genotype_genome.gene)
-                print >> locus_out_file, "%s\t%s\t%s\t%d\t%d\t%s\t%s" % \
-                    (family.upper(), name, chr, left, right, exon_str, strand)
+                print("%s\t%s\t%s\t%d\t%d\t%s\t%s" % \
+                    (family.upper(), name, chr, left, right, exon_str, strand), file=locus_out_file)
                 continue            
 
             chr_genotype_vari, chr_genotype_hti, haplotype_num = add_vars(left, right, chr_genotype_vari, chr_genotype_hti, haplotype_num)
@@ -272,7 +272,7 @@
             assert left < chr_len and right < chr_len
             # Skipping overlapping genes
             if left < prev_right:
-                print >> sys.stderr, "Warning: skipping %s ..." % (name)
+                print("Warning: skipping %s ..." % (name), file=sys.stderr)
                 continue
 
             varID2htID = {}
@@ -285,12 +285,12 @@
                 out_chr_seq += chr_seq[prev_right:left]
 
             # Output gene (genotype_genome.gene)
-            print >> locus_out_file, "%s\t%s\t%s\t%d\t%d\t%s\t%s" % \
-                (family.upper(), name, chr, len(out_chr_seq), len(out_chr_seq) + length - 1, exon_str, strand)
+            print("%s\t%s\t%s\t%d\t%d\t%s\t%s" % \
+                (family.upper(), name, chr, len(out_chr_seq), len(out_chr_seq) + length - 1, exon_str, strand), file=locus_out_file)
 
             # Output coord (genotype_genome.coord)
-            print >> coord_out_file, "%s\t%d\t%d\t%d" % \
-                (chr, len(out_chr_seq), left, right - left + 1)
+            print("%s\t%d\t%d\t%d" % \
+                (chr, len(out_chr_seq), left, right - left + 1), file=coord_out_file)
             out_chr_seq += allele_seq
 
             # Output variants (genotype_genome.snp and genotype_genome.index.snp)
@@ -309,9 +309,9 @@
                     assert var_type == "insertion"
 
                 out_str = "%s\t%s\t%s\t%d\t%s" % (new_var_id, var_type, chr, new_var_left, var_data)
-                print >> var_out_file, out_str
+                print(out_str, file=var_out_file)
                 if var_id in index_var_ids:
-                    print >> index_var_out_file, out_str
+                    print(out_str, file=index_var_out_file)
                 var_num += 1
                 
             # Output haplotypes (genotype_genome.haplotype)
@@ -326,8 +326,8 @@
                 for var_id in ht_vars:
                     assert var_id in varID2htID
                     new_ht_vars.append(varID2htID[var_id])
-                print >> haplotype_out_file, "ht%d\t%s\t%d\t%d\t%s" % \
-                    (haplotype_num, chr, new_ht_left, new_ht_right, ','.join(new_ht_vars))
+                print("ht%d\t%s\t%d\t%d\t%s" % \
+                    (haplotype_num, chr, new_ht_left, new_ht_right, ','.join(new_ht_vars)), file=haplotype_out_file)
                 haplotype_num += 1
 
             # Output link information between alleles and variants (genotype_genome.link)
@@ -336,7 +336,7 @@
                 if var_id not in varID2htID:
                     continue
                 new_var_id = varID2htID[var_id]
-                print >> link_out_file, "%s\t%s" % (new_var_id, allele_names)
+                print("%s\t%s" % (new_var_id, allele_names), file=link_out_file)
                 
             off += (length - prev_length)
 
@@ -346,19 +346,19 @@
             continue
 
         # Write the rest of the Vars
-        chr_genotype_vari, chr_genotype_hti, haplotype_num = add_vars(sys.maxint, sys.maxint, chr_genotype_vari, chr_genotype_hti, haplotype_num)            
+        chr_genotype_vari, chr_genotype_hti, haplotype_num = add_vars(sys.maxsize, sys.maxsize, chr_genotype_vari, chr_genotype_hti, haplotype_num)            
             
-        print >> coord_out_file, "%s\t%d\t%d\t%d" % \
-            (chr, len(out_chr_seq), prev_right, len(chr_seq) - prev_right)
+        print("%s\t%d\t%d\t%d" % \
+            (chr, len(out_chr_seq), prev_right, len(chr_seq) - prev_right), file=coord_out_file)
         out_chr_seq += chr_seq[prev_right:]
 
         assert len(out_chr_seq) == len(chr_seq) + off
 
         # Output chromosome sequence
-        print >> genome_out_file, ">%s" % (chr_full_name)
+        print(">%s" % (chr_full_name), file=genome_out_file)
         line_width = 60
         for s in range(0, len(out_chr_seq), line_width):
-            print >> genome_out_file, out_chr_seq[s:s+line_width]
+            print(out_chr_seq[s:s+line_width], file=genome_out_file)
 
     genome_out_file.close()
     locus_out_file.close()
@@ -374,7 +374,7 @@
         for database in database_list:
             for line in open("%s.allele" % database):
                 allele_name = line.strip()
-                print >> allele_out_file, "%s\t%s" % (database.upper(), allele_name)
+                print("%s\t%s" % (database.upper(), allele_name), file=allele_out_file)
     allele_out_file.close()
 
     partial_out_file = open("%s.partial" % base_fname, 'w')
@@ -382,7 +382,7 @@
         for database in database_list:
             for line in open("%s.partial" % database):
                 allele_name = line.strip()
-                print >> partial_out_file, "%s\t%s" % (database.upper(), allele_name)
+                print("%s\t%s" % (database.upper(), allele_name), file=partial_out_file)
     partial_out_file.close()
 
     if not graph_index:
@@ -408,7 +408,7 @@
                      "%s.fa" % base_fname,
                      "%s" % base_fname]
     if verbose:
-        print >> sys.stderr, "\tRunning:", ' '.join(build_cmd)
+        print("\tRunning:", ' '.join(build_cmd), file=sys.stderr)
         
     subprocess.call(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
 
@@ -418,7 +418,7 @@
         index_fnames = ["%s.%d.bt2" % (base_fname, i+1) for i in range(4)]
         index_fnames += ["%s.rev.%d.bt2" % (base_fname, i+1) for i in range(2)]
     if not typing_common.check_files(index_fnames):
-        print >> sys.stderr, "Error: indexing failed!  Perhaps, you may have forgotten to build %s executables?" % aligner
+        print("Error: indexing failed!  Perhaps, you may have forgotten to build %s executables?" % aligner, file=sys.stderr)
         sys.exit(1)
 
         
@@ -476,7 +476,7 @@
 
     args = parser.parse_args()
     if args.inter_gap > args.intra_gap:
-        print >> sys.stderr, "Error: --inter-gap (%d) must be smaller than --intra-gap (%d)" % (args.inter_gap, args.intra_gap)
+        print("Error: --inter-gap (%d) must be smaller than --intra-gap (%d)" % (args.inter_gap, args.intra_gap), file=sys.stderr)
         sys.exit(1)
         
     if args.database_list == "":
@@ -485,11 +485,11 @@
         database_list = args.database_list.split(',')
 
     if args.use_clinvar and args.use_commonvar:
-        print >> sys.stderr, "Error: both --clinvar and --commonvar cannot be used together."
+        print("Error: both --clinvar and --commonvar cannot be used together.", file=sys.stderr)
         sys.exit(1)
 
     if args.aligner not in ["hisat2", "bowtie2"]:
-        print >> sys.stderr, "Error: --aligner should be either hisat2 or bowtie2."
+        print("Error: --aligner should be either hisat2 or bowtie2.", file=sys.stderr)
         sys.exit(1)        
         
     build_genotype_genome(args.base_fname,
--- hisat2.orig/hisatgenotype_extract_reads.py
+++ hisat2/hisatgenotype_extract_reads.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
@@ -112,9 +112,9 @@
         genotype_fnames += ["%s.rev.%d.bt2" % (base_fname, i+1) for i in range(2)]
         
     if not typing_common.check_files(genotype_fnames):        
-        print >> sys.stderr, "Error: %s related files do not exist as follows:" % base_fname
+        print("Error: %s related files do not exist as follows:" % base_fname, file=sys.stderr)
         for fname in genotype_fnames:
-            print >> sys.stderr, "\t%s" % fname
+            print("\t%s" % fname, file=sys.stderr)
         sys.exit(1)
 
     filter_region = len(database_list) > 0
@@ -177,7 +177,7 @@
             else:
                 fq_fname2 = "%s/%s.2.%s" % (read_dir, fq_fname_base, suffix)
             if not os.path.exists(fq_fname2):
-                print >> sys.stderr, "%s does not exist." % fq_fname2
+                print("%s does not exist." % fq_fname2, file=sys.stderr)
                 continue
         else:
             fq_fname2 = ""
@@ -192,7 +192,7 @@
                     continue
         count += 1
 
-        print >> sys.stderr, "\t%d: Extracting reads from %s" % (count, fq_fname_base)
+        print("\t%d: Extracting reads from %s" % (count, fq_fname_base), file=sys.stderr)
         def work(fq_fname_base,
                  fq_fname, 
                  fq_fname2, 
@@ -215,7 +215,7 @@
             else:
                 aligner_cmd += ["-U", fq_fname]
             if verbose:
-                print >> sys.stderr, "\t\trunning", ' '.join(aligner_cmd)
+                print("\t\trunning", ' '.join(aligner_cmd), file=sys.stderr)
             align_proc = subprocess.Popen(aligner_cmd,
                                           stdout=subprocess.PIPE,
                                           stderr=open("/dev/null", 'w'))
@@ -330,7 +330,7 @@
                 if flag & 0x4 == 0 and \
                    ((aligner == "hisat2" and NH == 1) or (aligner == "bowtie2" and AS > XS and read1_first if flag & 0x40 or not paired else read2_first)):
                     if chr in region_loci:
-                        for region, loci in region_loci[chr].items():
+                        for region, loci in list(region_loci[chr].items()):
                             region = region.split('-')[0].lower()
                             _, _, loci_left, loci_right = loci
                             # there might be a different candidate region for each of left and right reads
@@ -371,12 +371,12 @@
                 if paired:
                     write_read(whole_gzip_dic[region_chr][region_num][1], prev_read_name, read2[0], read2[1])
 
-            for gzip1_proc, gzip2_proc in gzip_dic.values():
+            for gzip1_proc, gzip2_proc in list(gzip_dic.values()):
                 gzip1_proc.stdin.close()
                 if paired:
                     gzip2_proc.stdin.close()
 
-            for gzip_list in whole_gzip_dic.values():
+            for gzip_list in list(whole_gzip_dic.values()):
                 for gzip1_proc, gzip2_proc in gzip_list:
                     gzip1_proc.stdin.close()
                     if paired:
@@ -470,7 +470,7 @@
     parser.add_argument("--max-sample",
                         dest="max_sample",
                         type=int,
-                        default=sys.maxint,
+                        default=sys.maxsize,
                         help="Number of samples to be extracted (default: sys.maxint)")
     parser.add_argument("--job-range",
                         dest="job_range",
@@ -501,24 +501,24 @@
         args.read_fname = [args.read_fname_U]
     elif args.read_fname_1 != "" or args.read_fname_2 != "":
         if args.read_fname_1 == "" or args.read_fname_2 == "":
-            print >> sys.stderr, "Error: please specify both -1 and -2."
+            print("Error: please specify both -1 and -2.", file=sys.stderr)
             sys.exit(1)
         args.read_fname = [args.read_fname_1, args.read_fname_2]
     else:
         args.read_fname = []
     if len(args.read_fname) == 0:
         if args.read_dir == "" or not os.path.exists(args.read_dir):
-            print >> sys.stderr, "Error: please specify --read-dir with an existing directory."
+            print("Error: please specify --read-dir with an existing directory.", file=sys.stderr)
             sys.exit(1)
         if args.out_dir == "":
-            print >> sys.stderr, "Error: please specify --out-dir with a directory name."
+            print("Error: please specify --out-dir with a directory name.", file=sys.stderr)
             sys.exit(1)
     job_range = []
     for num in args.job_range.split(','):
         job_range.append(int(num))
 
     if args.aligner not in ["hisat2", "bowtie2"]:
-        print >> sys.stderr, "Error: --aligner should be either hisat2 or bowtie2."
+        print("Error: --aligner should be either hisat2 or bowtie2.", file=sys.stderr)
         sys.exit(1)        
     block_size = 20000000 if args.extract_whole else 0
         
--- hisat2.orig/hisatgenotype_extract_vars.py
+++ hisat2/hisatgenotype_extract_vars.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
@@ -163,8 +163,8 @@
             break
 
         if debug:
-            print >> sys.stderr, bp_i, bp_j, backbone_seq[bp_i-10:bp_i], backbone_seq[bp_i:bp_j], backbone_seq[bp_j:bp_j+10]
-            print >> sys.stderr, bp_i, bp_j, ''.join(seq[bp_i-10:bp_i]), ''.join(seq[bp_i:bp_j]), ''.join(seq[bp_j:bp_j+10])
+            print(bp_i, bp_j, backbone_seq[bp_i-10:bp_i], backbone_seq[bp_i:bp_j], backbone_seq[bp_j:bp_j+10], file=sys.stderr)
+            print(bp_i, bp_j, ''.join(seq[bp_i-10:bp_i]), ''.join(seq[bp_i:bp_j]), ''.join(seq[bp_j:bp_j+10]), file=sys.stderr)
         prev_i, prev_j = bp_i, bp_j
 
         while bp_i > 0 and seq[bp_i-1] in "ACGT" and backbone_seq[bp_j-1] in "ACGT":
@@ -182,7 +182,7 @@
 
         # DK - debugging purposes
         if debug:
-            print prev_i, prev_j, ''.join(seq[prev_i-10:prev_i]), ''.join(seq[prev_i:prev_j]), ''.join(seq[prev_j:prev_j+10])
+            print(prev_i, prev_j, ''.join(seq[prev_i-10:prev_i]), ''.join(seq[prev_i:prev_j]), ''.join(seq[prev_j:prev_j+10]))
 
     return ''.join(seq)
 
@@ -253,7 +253,7 @@
                                       stdout=subprocess.PIPE,
                                       stderr=open("/dev/null", 'w'))
         allele_id = ""
-        best_chr, best_left, best_right, best_AS, best_strand = "", -1, -1, -sys.maxint, ''
+        best_chr, best_left, best_right, best_AS, best_strand = "", -1, -1, -sys.maxsize, ''
         for line in align_proc.stdout:
             if line.startswith('@'):
                 continue
@@ -304,8 +304,8 @@
         assert allele_name != "" and strand != ''
         genes[gene] = allele_name
         gene_strand[gene] = strand
-        print >> sys.stderr, "%s-%s's reference allele is %s on '%s' strand of chromosome %s" % \
-            (base_fname.upper(), gene, allele_name, strand, chr)
+        print("%s-%s's reference allele is %s on '%s' strand of chromosome %s" % \
+            (base_fname.upper(), gene, allele_name, strand, chr), file=sys.stderr)
 
         assert chr != "" and left >= 0 and right > left
         if ext_seq_len > 0:
@@ -381,8 +381,8 @@
                 else:
                     gene_exon_counts[gene][num] += 1
                 
-        for gene, exon_counts in gene_exon_counts.items():
-            print >> sys.stderr, "%s exon counts:" % gene, exon_counts
+        for gene, exon_counts in list(gene_exon_counts.items()):
+            print("%s exon counts:" % gene, exon_counts, file=sys.stderr)
 
     tmp_locus_list = []
     for gene in locus_list:
@@ -392,7 +392,7 @@
             continue
         tmp_locus_list.append(gene)
     locus_list = tmp_locus_list
-    for key in genes.keys():
+    for key in list(genes.keys()):
         if key in locus_list:
             continue
         del genes[key]
@@ -418,7 +418,7 @@
     
     num_vars, num_haplotypes = 0, 0
     full_alleles = {}
-    for gene, ref_gene in genes.items():
+    for gene, ref_gene in list(genes.items()):
         strand = gene_strand[gene]
         left_ext_seq, right_ext_seq = "", ""
         if gene in left_ext_seq_dic:
@@ -444,7 +444,7 @@
                         continue
 
                     if name in names:
-                        print >> sys.stderr, "Warning: %s is found more than once in Names" % (name)
+                        print("Warning: %s is found more than once in Names" % (name), file=sys.stderr)
                         continue
 
                     names[name] = len(names)
@@ -492,7 +492,7 @@
             MSA_fname = "hisatgenotype_db/%s/msf/%s_gen.msf" % (base_fname.upper(), gene)
             
         if not os.path.exists(MSA_fname):
-            print >> sys.stderr, "Warning: %s does not exist" % MSA_fname
+            print("Warning: %s does not exist" % MSA_fname, file=sys.stderr)
             continue
 
         names, seqs = read_MSF_file(MSA_fname, left_ext_seq, right_ext_seq)
@@ -512,7 +512,7 @@
                     seq_lens[seq_len] += 1
 
             max_seq_count = 0
-            for tmp_seq_len, tmp_seq_count in seq_lens.items():
+            for tmp_seq_len, tmp_seq_count in list(seq_lens.items()):
                 if tmp_seq_count > max_seq_count:
                     seq_len = tmp_seq_len
                     max_seq_count = tmp_seq_count
@@ -531,7 +531,7 @@
         if partial and base_fname == "hla":
             partial_MSA_fname = "hisatgenotype_db/HLA/msf/%s_nuc.msf" % gene
             if not os.path.exists(partial_MSA_fname):
-                print >> sys.stderr, "Warning: %s does not exist" % partial_MSA_fname
+                print("Warning: %s does not exist" % partial_MSA_fname, file=sys.stderr)
                 continue
             partial_names, partial_seqs = read_MSF_file(partial_MSA_fname)
 
@@ -572,7 +572,7 @@
                 ref_exons.append([ref_seq_map[left], ref_seq_map[right]])
                 next_exon_len = right - left + exon_len
                 if next_exon_len >= len(ref_partial_seq_map):
-                    print >> sys.stderr, "Warning: partial sequences (%s) seem to be incomplete" % gene
+                    print("Warning: partial sequences (%s) seem to be incomplete" % gene, file=sys.stderr)
                     complete = False
                     break
                 ref_partial_exons.append([ref_partial_seq_map[exon_len], ref_partial_seq_map[next_exon_len]])
@@ -588,7 +588,7 @@
                                                                                    partial_seq_len,
                                                                                    min_var_freq,
                                                                                    False) # Remove empty sequences?
-                for name, seq_id in partial_names.items():
+                for name, seq_id in list(partial_names.items()):
                     if name in names:
                         continue
                     seq = partial_seqs[seq_id]
@@ -630,7 +630,7 @@
                     exons.append([left, right])
                 gene_exons[gene] = exons
                 exon_counts = {}
-                for exon_i, count in gene_exon_counts[gene].items():
+                for exon_i, count in list(gene_exon_counts[gene].items()):
                     exon_counts[len(gene_exons[gene]) - exon_i - 1] = count
                 gene_exon_counts[gene] = exon_counts
 
@@ -644,17 +644,17 @@
             backbone_seq, backbone_freq = create_consensus_seq(seqs, seq_len, min_var_freq, True)
             seq_len = find_seq_len(seqs)
 
-        print >> sys.stderr, "%s: number of HLA alleles is %d." % (gene, len(names))
+        print("%s: number of HLA alleles is %d." % (gene, len(names)), file=sys.stderr)
 
         Vars = {}
-        for cmp_name, id in names.items():
+        for cmp_name, id in list(names.items()):
             if cmp_name == backbone_name:
                 continue
             assert id < len(seqs)
             cmp_seq = seqs[id]
             if len(cmp_seq) != seq_len:
-                print >> sys.stderr, "Warning: the length of %s (%d) is different from %d" % \
-                    (cmp_name, len(cmp_seq), seq_len)
+                print("Warning: the length of %s (%d) is different from %d" % \
+                    (cmp_name, len(cmp_seq), seq_len), file=sys.stderr)
                 continue
 
             # DK - debugging purposes
@@ -754,7 +754,7 @@
                 insertVar('D', deletion)
 
 
-        print >> sys.stderr, "Number of variants is %d." % (len(Vars.keys()))
+        print("Number of variants is %d." % (len(list(Vars.keys()))), file=sys.stderr)
 
         # Compare variants
         def cmp_varKey(a, b):
@@ -784,20 +784,20 @@
                 return int(a_data) - int(b_data)            
 
         Vars_ = {}
-        for key, values in Vars.items():
+        for key, values in list(Vars.items()):
             freq, names_ = values
             for name in names_:
                 if not name in Vars_:
                     Vars_[name] = [key]
                 else:
                     Vars_[name].append(key)
-        for name, vars in Vars_.items():
+        for name, vars in list(Vars_.items()):
             Vars_[name] = sorted(vars, cmp=cmp_varKey)
 
         # Sanity check -
         #    (1) Reconstruct the other sequences from the backbone sequence and variants and
         #    (2) Confirm these constructed sequences are the same as those input sequences.
-        for cmp_name, id in names.items():
+        for cmp_name, id in list(names.items()):
             if cmp_name == backbone_name:
                 continue
 
@@ -835,27 +835,27 @@
             assert id < len(seqs)
             cmp_seq = seqs[id].replace('.', '')
             if len(constr_seq) != len(cmp_seq):
-                print >> sys.stderr, "Error: reconstruction fails (%s)! Lengths different: %d vs. %d" % \
-                    (cmp_name, len(constr_seq), len(cmp_seq))
+                print("Error: reconstruction fails (%s)! Lengths different: %d vs. %d" % \
+                    (cmp_name, len(constr_seq), len(cmp_seq)), file=sys.stderr)
                 assert False
 
             # Sanity check
             for s in range(len(constr_seq)):
                 if constr_seq[s] != cmp_seq[s]:
-                    print >> sys.stderr, "Differ at %d: %s vs. %s (reconstruction vs. original)" % \
-                        (s, constr_seq[s], cmp_seq[s])
-                    print "%s:%s vs. %s:%s" % \
-                        (constr_seq[s-10:s], constr_seq[s:s+10], cmp_seq[s-10:s], cmp_seq[s:s+10])
+                    print("Differ at %d: %s vs. %s (reconstruction vs. original)" % \
+                        (s, constr_seq[s], cmp_seq[s]), file=sys.stderr)
+                    print("%s:%s vs. %s:%s" % \
+                        (constr_seq[s-10:s], constr_seq[s:s+10], cmp_seq[s-10:s], cmp_seq[s:s+10]))
 
             if constr_seq != cmp_seq.replace('.', ''):
-                print >> sys.stderr, "Error: reconstruction fails for %s" % (cmp_name)
+                print("Error: reconstruction fails for %s" % (cmp_name), file=sys.stderr)
                 assert False
 
         # Write the backbone sequences into a fasta file
-        print >> backbone_file, ">%s" % (backbone_name)
+        print(">%s" % (backbone_name), file=backbone_file)
         backbone_seq_ = backbone_seq.replace('.', '')
         for s in range(0, len(backbone_seq_), 60):
-            print >> backbone_file, backbone_seq_[s:s+60]
+            print(backbone_seq_[s:s+60], file=backbone_file)
 
         # Remap the backbone allele, which is sometimes slighly different from
         #   fasta version
@@ -871,7 +871,7 @@
         align_proc = subprocess.Popen(aligner_cmd,
                                       stdout=subprocess.PIPE,
                                       stderr=open("/dev/null", 'w'))
-        best_chr, best_left, best_right, best_AS = "", 0, 0, -sys.maxint
+        best_chr, best_left, best_right, best_AS = "", 0, 0, -sys.maxsize
         for line in align_proc.stdout:
             if line.startswith('@'):
                 continue
@@ -900,7 +900,7 @@
         chr, left, right = best_chr, best_left, best_right
         align_proc.communicate()
         if left == right:
-            print >> sys.stderr, "Warning: %s (%s) is not remapped" % (gene, ref_gene)
+            print("Warning: %s (%s) is not remapped" % (gene, ref_gene), file=sys.stderr)
             continue
         assert left < right
 
@@ -989,17 +989,17 @@
                     print cmp_exon_seq_[p:p+60]
                 """
                 if exon_seq_ != cmp_exon_seq_:
-                    print >> sys.stderr, "Waring: exonic sequences do not match (%s)" % gene
+                    print("Waring: exonic sequences do not match (%s)" % gene, file=sys.stderr)
         else:
             exon_str = "%d-%d" % (left, right - 1)
 
-        print >> locus_file, "%s\t%s\t%d\t%d\t%d\t%s\t%s" % \
-            (backbone_name, chr, left, right - 1, len(backbone_seq.replace('.', '')), exon_str, gene_strand[gene])
+        print("%s\t%s\t%d\t%d\t%d\t%s\t%s" % \
+            (backbone_name, chr, left, right - 1, len(backbone_seq.replace('.', '')), exon_str, gene_strand[gene]), file=locus_file)
 
         # Write
         #       (1) variants w.r.t the backbone sequences into a SNP file
         #       (2) pairs of a variant and the corresponding HLA allels into a LINK file    
-        keys = sorted(Vars.keys(), cmp=cmp_varKey)
+        keys = sorted(list(Vars.keys()), cmp=cmp_varKey)
         var2ID = {}
         for k in range(len(keys)):
             locus, type, data = keys[k].split('-')
@@ -1016,20 +1016,20 @@
             names_ = sorted(names_)            
             varID = "hv%d" % (num_vars)
             tmp_backbone_name = backbone_name
-            print >> var_file, "%s\t%s\t%s\t%d\t%s" % \
-                (varID, type_str, tmp_backbone_name, base_locus + locus, data)
+            print("%s\t%s\t%s\t%d\t%s" % \
+                (varID, type_str, tmp_backbone_name, base_locus + locus, data), file=var_file)
             if freq >= min_var_freq:
-                print >> var_index_file, "%s\t%s\t%s\t%d\t%s" % \
-                    (varID, type_str, tmp_backbone_name, base_locus + locus, data)
-            print >> var_freq_file, "%s\t%.2f" % (varID, freq)
-            print >> link_file, "%s\t%s" % (varID, ' '.join(names_))
+                print("%s\t%s\t%s\t%d\t%s" % \
+                    (varID, type_str, tmp_backbone_name, base_locus + locus, data), file=var_index_file)
+            print("%s\t%.2f" % (varID, freq), file=var_freq_file)
+            print("%s\t%s" % (varID, ' '.join(names_)), file=link_file)
             var2ID[keys[k]] = num_vars
             num_vars += 1
 
         add_seq_len = 0
         # Write haplotypes
         excluded_vars = set()
-        var_leftmost, var_rightmost = sys.maxint, -1
+        var_leftmost, var_rightmost = sys.maxsize, -1
         for k in range(len(keys)):
             key = keys[k]
             if Vars[key][0] < min_var_freq:
@@ -1164,7 +1164,7 @@
                         h_end += (int(h2_data) - 1)
                     assert h_begin <= h_end
                     h_new_begin = h_begin
-                    for h_j in reversed(range(0, h_i)):
+                    for h_j in reversed(list(range(0, h_i))):
                         hc = haplotypes[h_j].split('#')
                         hc_begin, hc_type, hc_data = hc[-1].split('-')
                         hc_begin = int(hc_begin)
@@ -1178,30 +1178,30 @@
                     assert h_new_begin <= h_begin
                     h_begin = h_new_begin
                 tmp_backbone_name = backbone_name
-                print >> haplotype_file, "ht%d\t%s\t%d\t%d\t%s" % \
-                    (num_haplotypes, tmp_backbone_name, base_locus + h_begin, base_locus + h_end, ','.join(varIDs))
+                print("ht%d\t%s\t%d\t%d\t%s" % \
+                    (num_haplotypes, tmp_backbone_name, base_locus + h_begin, base_locus + h_end, ','.join(varIDs)), file=haplotype_file)
                 num_haplotypes += 1
                 add_seq_len += (h_end - h_begin + 1)
             assert len(sanity_vars) == len(cur_vars)
                     
             i = j
 
-        print >> sys.stderr, "Length of additional sequences for haplotypes:", add_seq_len
+        print("Length of additional sequences for haplotypes:", add_seq_len, file=sys.stderr)
                     
         # Write all the sequences with dots removed into a file
-        for name, ID in names.items():
-            print >> input_file, ">%s" % (name)
+        for name, ID in list(names.items()):
+            print(">%s" % (name), file=input_file)
             assert ID < len(seqs)
             seq = seqs[ID].replace('.', '')
             for s in range(0, len(seq), 60):
-                print >> input_file, seq[s:s+60]
-            print >> allele_file, name
+                print(seq[s:s+60], file=input_file)
+            print(name, file=allele_file)
 
                     
         # Write partial allele names
         for name in names:
             if name not in full_allele_names:
-                print >> partial_file, name
+                print(name, file=partial_file)
 
     backbone_file.close()
     locus_file.close()
@@ -1274,7 +1274,7 @@
     else:
         locus_list = args.locus_list.split(',')
     if args.inter_gap > args.intra_gap:
-        print >> sys.stderr, "Error: --inter-gap (%d) must be smaller than --intra-gap (%d)" % (args.inter_gap, args.intra_gap)
+        print("Error: --inter-gap (%d) must be smaller than --intra-gap (%d)" % (args.inter_gap, args.intra_gap), file=sys.stderr)
         sys.exit(1)
              
     if args.base_fname.find('/') != -1:
--- hisat2.orig/hisatgenotype_hla_cyp.py
+++ hisat2/hisatgenotype_hla_cyp.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
@@ -62,8 +62,8 @@
     def write_reads(reads, idx):
         read_file = open('hla_input_%d.fa' % idx, 'w')
         for read_i in range(len(reads)):
-            print >> read_file, ">%d" % (read_i + 1)
-            print >> read_file, reads[read_i]
+            print(">%d" % (read_i + 1), file=read_file)
+            print(reads[read_i], file=read_file)
         read_file.close()
     write_reads(HLA_reads_1, 1)
     write_reads(HLA_reads_2, 2)
@@ -106,7 +106,7 @@
                         "-2", "%s" % read_fname[1]]
 
     if verbose:
-        print >> sys.stderr, ' '.join(aligner_cmd)
+        print(' '.join(aligner_cmd), file=sys.stderr)
     align_proc = subprocess.Popen(aligner_cmd,
                                   stdout=subprocess.PIPE,
                                   stderr=open("/dev/null", 'w'))
@@ -145,7 +145,7 @@
 """ 
 def normalize(prob):
     total = sum(prob.values())
-    for allele, mass in prob.items():
+    for allele, mass in list(prob.items()):
         prob[allele] = mass / total
 
         
@@ -153,7 +153,7 @@
 """
 def prob_diff(prob1, prob2):
     diff = 0.0
-    for allele in prob1.keys():
+    for allele in list(prob1.keys()):
         if allele in prob2:
             diff += abs(prob1[allele] - prob2[allele])
         else:
@@ -182,15 +182,15 @@
                      HLA_length):
     def normalize2(prob, length):
         total = 0
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             total += (mass / length[allele])
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             prob[allele] = mass / length[allele] / total
 
     HLA_prob, HLA_prob_next = {}, {}
-    for cmpt, count in HLA_cmpt.items():
+    for cmpt, count in list(HLA_cmpt.items()):
         alleles = cmpt.split('-')
         for allele in alleles:
             if allele not in HLA_prob:
@@ -201,7 +201,7 @@
     normalize(HLA_prob)
     def next_prob(HLA_cmpt, HLA_prob, HLA_length):
         HLA_prob_next = {}
-        for cmpt, count in HLA_cmpt.items():
+        for cmpt, count in list(HLA_cmpt.items()):
             alleles = cmpt.split('-')
             alleles_prob = 0.0
             for allele in alleles:
@@ -221,11 +221,11 @@
         diff = prob_diff(HLA_prob, HLA_prob_next)
         HLA_prob = HLA_prob_next
         iter += 1
-    for allele, prob in HLA_prob.items():
+    for allele, prob in list(HLA_prob.items()):
         allele_len = HLA_length[allele]
         HLA_prob[allele] /= float(allele_len)
     normalize(HLA_prob)
-    HLA_prob = [[allele, prob] for allele, prob in HLA_prob.items()]
+    HLA_prob = [[allele, prob] for allele, prob in list(HLA_prob.items())]
     HLA_prob = sorted(HLA_prob, cmp=HLA_prob_cmp)
     return HLA_prob
 
@@ -235,11 +235,11 @@
 def joint_abundance(HLA_cmpt,
                     HLA_length):
     allele_names = set()
-    for cmpt in HLA_cmpt.keys():
+    for cmpt in list(HLA_cmpt.keys()):
         allele_names |= set(cmpt.split('-'))
     
     HLA_prob, HLA_prob_next = {}, {}
-    for cmpt, count in HLA_cmpt.items():
+    for cmpt, count in list(HLA_cmpt.items()):
         alleles = cmpt.split('-')
         for allele1 in alleles:
             for allele2 in allele_names:
@@ -256,7 +256,7 @@
 
     # Choose top allele pairs
     def choose_top_alleles(HLA_prob):
-        HLA_prob_list = [[allele_pair, prob] for allele_pair, prob in HLA_prob.items()]
+        HLA_prob_list = [[allele_pair, prob] for allele_pair, prob in list(HLA_prob.items())]
         HLA_prob_list = sorted(HLA_prob_list, cmp=HLA_prob_cmp)
         HLA_prob = {}
         best_prob = HLA_prob_list[0][1]
@@ -271,15 +271,15 @@
 
     def next_prob(HLA_cmpt, HLA_prob):
         HLA_prob_next = {}
-        for cmpt, count in HLA_cmpt.items():
+        for cmpt, count in list(HLA_cmpt.items()):
             alleles = cmpt.split('-')
             prob = 0.0
             for allele in alleles:
-                for allele_pair in HLA_prob.keys():
+                for allele_pair in list(HLA_prob.keys()):
                     if allele in allele_pair:
                         prob += HLA_prob[allele_pair]
             for allele in alleles:
-                for allele_pair in HLA_prob.keys():
+                for allele_pair in list(HLA_prob.keys()):
                     if not allele in allele_pair:
                         continue
                     if allele_pair not in HLA_prob_next:
@@ -296,7 +296,7 @@
         HLA_prob = choose_top_alleles(HLA_prob)
         iter += 1
 
-    HLA_prob = [[allele_pair, prob] for allele_pair, prob in HLA_prob.items()]
+    HLA_prob = [[allele_pair, prob] for allele_pair, prob in list(HLA_prob.items())]
     HLA_prob = sorted(HLA_prob, cmp=HLA_prob_cmp)
     return HLA_prob
 
@@ -350,9 +350,9 @@
         test_passed = {}
     for aligner, index_type in aligners:
         if index_type == "graph":
-            print >> sys.stderr, "\n\t\t%s %s on %s" % (aligner, index_type, reference_type)
+            print("\n\t\t%s %s on %s" % (aligner, index_type, reference_type), file=sys.stderr)
         else:
-            print >> sys.stderr, "\n\t\t%s %s" % (aligner, index_type)
+            print("\n\t\t%s %s" % (aligner, index_type), file=sys.stderr)
 
         if alignment_fname == "":
             # Align reads, and sort the alignments into a BAM file
@@ -448,8 +448,8 @@
                     debug = False
                     if read_id in ["2339"] and False:
                         debug = True
-                        print "read_id: %s)" % read_id, pos, cigar_str, "NM:", NM, MD, Zs
-                        print "            ", read_seq
+                        print("read_id: %s)" % read_id, pos, cigar_str, "NM:", NM, MD, Zs)
+                        print("            ", read_seq)
 
                     vars = []
                     if Zs:
@@ -545,7 +545,7 @@
                     def add_stat(HLA_cmpt, HLA_counts, HLA_count_per_read, exon = True):
                         max_count = max(HLA_count_per_read.values())
                         cur_cmpt = set()
-                        for allele, count in HLA_count_per_read.items():
+                        for allele, count in list(HLA_count_per_read.items()):
                             if count < max_count:
                                 continue
                             if allele in exclude_allele_list:
@@ -563,7 +563,7 @@
                         alleles = ["", ""]
                         # alleles = ["B*40:304", "B*40:02:01"]
                         allele1_found, allele2_found = False, False
-                        for allele, count in HLA_count_per_read.items():
+                        for allele, count in list(HLA_count_per_read.items()):
                             if count < max_count:
                                 continue
                             if allele == alleles[0]:
@@ -571,13 +571,13 @@
                             elif allele == alleles[1]:
                                 allele2_found = True
                         if allele1_found != allele2_found:
-                            print alleles[0], HLA_count_per_read[alleles[0]]
-                            print alleles[1], HLA_count_per_read[alleles[1]]
+                            print(alleles[0], HLA_count_per_read[alleles[0]])
+                            print(alleles[1], HLA_count_per_read[alleles[1]])
                             if allele1_found:
-                                print ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, HLA_count_per_read[alleles[1]]))
+                                print(("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, HLA_count_per_read[alleles[1]])))
                             else:
-                                print ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, HLA_count_per_read[alleles[0]]))
-                            print read_seq
+                                print(("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, HLA_count_per_read[alleles[0]])))
+                            print(read_seq)
 
                         cur_cmpt = sorted(list(cur_cmpt))
                         cur_cmpt = '-'.join(cur_cmpt)
@@ -609,11 +609,11 @@
                             # daehwan - for debugging purposes
                             if debug:
                                 if allele in ["DQA1*05:05:01:01", "DQA1*05:05:01:02"]:
-                                    print allele, add, var_id
+                                    print(allele, add, var_id)
 
                     # Decide which allele(s) a read most likely came from
                     # also sanity check - read length, cigar string, and MD string
-                    for var_id, data in Vars[gene].items():
+                    for var_id, data in list(Vars[gene].items()):
                         var_type, var_pos, var_data = data
                         if var_type != "deletion":
                             continue
@@ -637,13 +637,13 @@
                                             add_count(var_id, -1)
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, Links[var_id]
+                                                print(cmp, var_id, Links[var_id])
                                     elif var_type == "deletion":
                                         del_len = int(var_data)
                                         if ref_pos < var_pos and ref_pos + length > var_pos + del_len:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, Links[var_id], -1, Vars[gene][var_id]
+                                                print(cmp, var_id, Links[var_id], -1, Vars[gene][var_id])
                                             # Check if this might be one of the two tandem repeats (the same left coordinate)
                                             cmp_left, cmp_right = cmp[1], cmp[1] + cmp[2]
                                             test1_seq1 = ref_seq[cmp_left:cmp_right]
@@ -657,7 +657,7 @@
                                                 add_count(var_id, -1)
                                     else:
                                         if debug:
-                                            print cmp, var_id, Links[var_id], -1
+                                            print(cmp, var_id, Links[var_id], -1)
                                         add_count(var_id, -1)
                                 var_idx += 1
 
@@ -678,7 +678,7 @@
                                         if var_data == read_base:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, 1, var_data, read_base, Links[var_id]
+                                                print(cmp, var_id, 1, var_data, read_base, Links[var_id])
 
                                             # daehwan - for debugging purposes
                                             if False:
@@ -701,8 +701,8 @@
                             var_idx = lower_bound(Var_list[gene], ref_pos)
                             # daehwan - for debugging purposes
                             if debug:
-                                print left_pos, cigar_str, MD, vars
-                                print ref_pos, ins_seq, Var_list[gene][var_idx], Vars[gene][Var_list[gene][var_idx][1]]
+                                print(left_pos, cigar_str, MD, vars)
+                                print(ref_pos, ins_seq, Var_list[gene][var_idx], Vars[gene][Var_list[gene][var_idx][1]])
                                 # sys.exit(1)
                             while var_idx < len(Var_list[gene]):
                                 var_pos, var_id = Var_list[gene][var_idx]
@@ -714,7 +714,7 @@
                                         if var_data == ins_seq:
                                             # daehwan - for debugging purposes
                                             if debug:
-                                                print cmp, var_id, 1, Links[var_id]
+                                                print(cmp, var_id, 1, Links[var_id])
                                             add_count(var_id, 1)
                                 var_idx += 1
 
@@ -750,8 +750,8 @@
                                         var_len = int(var_data)
                                         if var_len == length:
                                             if debug:
-                                                print cmp, var_id, 1, Links[var_id]
-                                                print ref_seq[var_pos - 10:var_pos], ref_seq[var_pos:var_pos+int(var_data)], ref_seq[var_pos+int(var_data):var_pos+int(var_data)+10]
+                                                print(cmp, var_id, 1, Links[var_id])
+                                                print(ref_seq[var_pos - 10:var_pos], ref_seq[var_pos:var_pos+int(var_data)], ref_seq[var_pos+int(var_data):var_pos+int(var_data)+10])
                                             add_count(var_id, 1)
                                 var_idx += 1
 
@@ -782,9 +782,9 @@
                     if read_pos != len(read_seq) or \
                             cmp_cigar_str != cigar_str or \
                             cmp_MD != MD:
-                        print >> sys.stderr, "Error:", cigar_str, MD
-                        print >> sys.stderr, "\tcomputed:", cmp_cigar_str, cmp_MD
-                        print >> sys.stderr, "\tcmp list:", cmp_list
+                        print("Error:", cigar_str, MD, file=sys.stderr)
+                        print("\tcomputed:", cmp_cigar_str, cmp_MD, file=sys.stderr)
+                        print("\tcmp list:", cmp_list, file=sys.stderr)
                         assert False            
 
                     prev_read_id = read_id
@@ -887,7 +887,7 @@
                 if alleles:
                     add_alleles(alleles)
 
-            HLA_counts = [[allele, count] for allele, count in HLA_counts.items()]
+            HLA_counts = [[allele, count] for allele, count in list(HLA_counts.items())]
             def HLA_count_cmp(a, b):
                 if a[1] != b[1]:
                     return b[1] - a[1]
@@ -903,7 +903,7 @@
                     found = False
                     for test_HLA_name in test_HLA_names:
                         if count[0] == test_HLA_name:
-                            print >> sys.stderr, "\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_HLA_name, count[1])
+                            print("\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_HLA_name, count[1]), file=sys.stderr)
                             found = True
                             """
                             if count_i > 0 and HLA_counts[0][1] > count[1]:
@@ -913,12 +913,12 @@
                                 test_passed += 1
                             """
                     if count_i < 5 and not found:
-                        print >> sys.stderr, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                        print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=sys.stderr)
                 else:
-                    print >> sys.stderr, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                    print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=sys.stderr)
                     if count_i >= 9:
                         break
-            print >> sys.stderr
+            print(file=sys.stderr)
 
             HLA_prob = single_abundance(HLA_cmpt, HLA_lengths[gene])
 
@@ -937,7 +937,7 @@
                                     rank_i -= 1
                                 else:
                                     break
-                            print >> sys.stderr, "\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_HLA_name, prob[1] * 100.0)
+                            print("\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_HLA_name, prob[1] * 100.0), file=sys.stderr)
                             if rank_i < len(success):
                                 success[rank_i] = True
                             found_list[name_i] = True
@@ -945,12 +945,12 @@
                     if not False in found_list:
                         break
                 if not found:
-                    print >> sys.stderr, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, prob[0], prob[1] * 100.0)
+                    print("\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, prob[0], prob[1] * 100.0), file=sys.stderr)
                     if best_alleles and prob_i < 2:
-                        print >> sys.stdout, "SingleModel %s (abundance: %.2f%%)" % (prob[0], prob[1] * 100.0)
+                        print("SingleModel %s (abundance: %.2f%%)" % (prob[0], prob[1] * 100.0), file=sys.stdout)
                 if not simulation and prob_i >= 9:
                     break
-            print >> sys.stderr
+            print(file=sys.stderr)
 
             if len(test_HLA_names) == 2 or not simulation:
                 HLA_prob = joint_abundance(HLA_cmpt, HLA_lengths[gene])
@@ -961,7 +961,7 @@
                     allele_pair, prob = HLA_prob[prob_i]
                     allele1, allele2 = allele_pair.split('-')
                     if best_alleles and prob_i < 1:
-                        print >> sys.stdout, "PairModel %s (abundance: %.2f%%)" % (allele_pair, prob * 100.0)
+                        print("PairModel %s (abundance: %.2f%%)" % (allele_pair, prob * 100.0), file=sys.stdout)
                     if simulation:
                         if allele1 in test_HLA_names and allele2 in test_HLA_names:
                             rank_i = prob_i
@@ -970,14 +970,14 @@
                                     rank_i -= 1
                                 else:
                                     break
-                            print >> sys.stderr, "\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, allele_pair, prob * 100.0)
+                            print("\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, allele_pair, prob * 100.0), file=sys.stderr)
                             if rank_i == 0:
                                 success[0] = True
                             break
-                    print >> sys.stderr, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, allele_pair, prob * 100.0)
+                    print("\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, allele_pair, prob * 100.0), file=sys.stderr)
                     if not simulation and prob_i >= 9:
                         break
-                print >> sys.stderr
+                print(file=sys.stderr)
 
                 # Li's method
                 """
@@ -1149,7 +1149,7 @@
             extract_cmd += ["--hla-list", ','.join(hla_list)]
 
         if len(exclude_allele_list) > 0:
-            print exclude_allele_list
+            print(exclude_allele_list)
             extract_cmd += ["--exclude-allele-list", ",".join(exclude_allele_list)]
 
         if len(base_fname) > 3:
@@ -1160,12 +1160,12 @@
         extract_cmd += ["--inter-gap", "30",
                         "--intra-gap", "50"]
         if verbose:
-            print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+            print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
         proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
         proc.communicate()
         
         if not check_files(HLA_fnames):
-            print >> sys.stderr, "Error: extract_HLA_vars failed!"
+            print("Error: extract_HLA_vars failed!", file=sys.stderr)
             sys.exit(1)
             
     for aligner, index_type in aligners:
@@ -1181,11 +1181,11 @@
                              HLA_fnames[0],
                              "%s.graph" % base_fname]
                 if verbose:
-                    print >> sys.stderr, "\tRunning:", ' '.join(build_cmd)
+                    print("\tRunning:", ' '.join(build_cmd), file=sys.stderr)
                 proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                 proc.communicate()        
                 if not check_files(HLA_hisat2_graph_index_fnames):
-                    print >> sys.stderr, "Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?"
+                    print("Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?", file=sys.stderr)
                     sys.exit(1)
 
         # Build HISAT2 linear indexes based on the above information
@@ -1199,7 +1199,7 @@
                 proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                 proc.communicate()        
                 if not check_files(HLA_hisat2_linear_index_fnames):
-                    print >> sys.stderr, "Error: indexing HLA failed!"
+                    print("Error: indexing HLA failed!", file=sys.stderr)
                     sys.exit(1)
 
         # Build Bowtie2 indexes based on the above information
@@ -1214,7 +1214,7 @@
                 proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'))
                 proc.communicate()        
                 if not check_files(HLA_bowtie2_index_fnames):
-                    print >> sys.stderr, "Error: indexing HLA failed!"
+                    print("Error: indexing HLA failed!", file=sys.stderr)
                     sys.exit(1)
         
     # Read partial alleles from hla.data (temporary)
@@ -1249,12 +1249,12 @@
             extract_cmd += ["--inter-gap", "30",
                             "--intra-gap", "50"]
             if verbose:
-                print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+                print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
             proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
             proc.communicate()
             
             if not os.path.exists("./Default-HLA/hla_backbone.fa"):
-                print >> sys.stderr, "Error: extract_HLA_vars (Default) failed!"
+                print("Error: extract_HLA_vars (Default) failed!", file=sys.stderr)
                 sys.exit(1)
     
     # Read HLA alleles (names and sequences)
@@ -1278,14 +1278,14 @@
     
     # HLA gene alleles
     HLA_names = {}
-    for HLA_gene, data in HLAs.items():
+    for HLA_gene, data in list(HLAs.items()):
         HLA_names[HLA_gene] = list(data.keys())
 
     # HLA gene allele lengths
     HLA_lengths = {}
-    for HLA_gene, HLA_alleles in HLAs.items():
+    for HLA_gene, HLA_alleles in list(HLAs.items()):
         HLA_lengths[HLA_gene] = {}
-        for allele_name, seq in HLA_alleles.items():
+        for allele_name, seq in list(HLA_alleles.items()):
             HLA_lengths[HLA_gene][allele_name] = len(seq)
 
     # Construct excluded alleles (Via default backbone data)
@@ -1296,8 +1296,8 @@
         read_HLA_alleles("./Default-HLA/hla_backbone.fa",HLAs_default)
         read_HLA_alleles("./Default-HLA/hla_sequences.fa",HLAs_default)
         
-        for HLA_gene, HLA_alleles in HLAs_default.items():
-            for allele_name, seq in HLA_alleles.items():
+        for HLA_gene, HLA_alleles in list(HLAs_default.items()):
+            for allele_name, seq in list(HLA_alleles.items()):
                 if allele_name in default_allele_list:
                     HLA_lengths[HLA_gene][allele_name] = len(seq)
 
@@ -1308,7 +1308,7 @@
         pos = int(pos)
         if reference_type != "gene":
             allele, dist = None, 0
-            for tmp_gene, values in refHLA_loci.items():
+            for tmp_gene, values in list(refHLA_loci.items()):
                 allele_name, chr, left, right, exons = values
                 if allele == None:
                     allele = allele_name
@@ -1331,7 +1331,7 @@
         Vars[gene][var_id] = [var_type, pos - left, data]
         Var_list[gene].append([pos - left, var_id])
         
-    for gene, in_var_list in Var_list.items():
+    for gene, in_var_list in list(Var_list.items()):
         Var_list[gene] = sorted(in_var_list)
         
     Links = {}
@@ -1356,7 +1356,7 @@
         if base_fname == "hla":
             genes = list(set(hla_list) & set(HLA_names.keys()))
         else:
-            genes = HLA_names.keys()
+            genes = list(HLA_names.keys())
             
         if basic_test:
             for gene in genes:
@@ -1392,7 +1392,7 @@
                 if str(test_i + 1) not in daehwan_test_ids:
                     continue
 
-            print >> sys.stderr, "Test %d" % (test_i + 1)
+            print("Test %d" % (test_i + 1), file=sys.stderr)
             test_HLA_list = test_list[test_i]
            
             # daehwan - for debugging purposes
@@ -1403,12 +1403,12 @@
                         gene = test_HLA_name.split('*')[0]
                         test_HLA_seq = HLAs_default[gene][test_HLA_name]
                         seq_type = "partial" if test_HLA_name in partial_alleles else "full"
-                        print >> sys.stderr, "\t%s - %d bp (%s sequence)" % (test_HLA_name, len(test_HLA_seq), seq_type)
+                        print("\t%s - %d bp (%s sequence)" % (test_HLA_name, len(test_HLA_seq), seq_type), file=sys.stderr)
                         continue
                     gene = test_HLA_name.split('*')[0]
                     test_HLA_seq = HLAs[gene][test_HLA_name]
                     seq_type = "partial" if test_HLA_name in partial_alleles else "full"
-                    print >> sys.stderr, "\t%s - %d bp (%s sequence)" % (test_HLA_name, len(test_HLA_seq), seq_type)
+                    print("\t%s - %d bp (%s sequence)" % (test_HLA_name, len(test_HLA_seq), seq_type), file=sys.stderr)
             if custom_allele_check:
                 simulate_reads(HLAs_default, test_HLA_list, simulate_interval)
             else:
@@ -1446,24 +1446,24 @@
                                          best_alleles,
                                          verbose)
 
-            for aligner_type, passed in tmp_test_passed.items():
+            for aligner_type, passed in list(tmp_test_passed.items()):
                 if aligner_type in test_passed:
                     test_passed[aligner_type] += passed
                 else:
                     test_passed[aligner_type] = passed
 
-                print >> sys.stderr, "\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1)))
+                print("\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1))), file=sys.stderr)
 
 
-        for aligner_type, passed in test_passed.items():
-            print >> sys.stderr, "%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list))
+        for aligner_type, passed in list(test_passed.items()):
+            print("%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list)), file=sys.stderr)
     
     else: # With real reads or BAMs
         if base_fname == "hla":
             gene_list = hla_list
         else:
-            gene_list = Vars.keys()
-        print >> sys.stderr, "\t", ' '.join(gene_list)
+            gene_list = list(Vars.keys())
+        print("\t", ' '.join(gene_list), file=sys.stderr)
 
         fastq = True
         HLA_typing(ex_path,
@@ -1581,11 +1581,11 @@
 
     args = parser.parse_args()
     if not args.reference_type in ["gene", "chromosome", "genome"]:
-        print >> sys.stderr, "Error: --reference-type (%s) must be one of gene, chromosome, and genome." % (args.reference_type)
+        print("Error: --reference-type (%s) must be one of gene, chromosome, and genome." % (args.reference_type), file=sys.stderr)
         sys.exit(1)
     args.hla_list = args.hla_list.split(',')
     if args.aligners == "":
-        print >> sys.stderr, "Error: --aligners must be non-empty."
+        print("Error: --aligners must be non-empty.", file=sys.stderr)
         sys.exit(1)    
     args.aligners = args.aligners.split(',')
     for i in range(len(args.aligners)):
@@ -1596,7 +1596,7 @@
         args.read_fname = []
     if args.alignment_fname != "" and \
             not os.path.exists(args.alignment_fname):
-        print >> sys.stderr, "Error: %s doesn't exist." % args.alignment_fname
+        print("Error: %s doesn't exist." % args.alignment_fname, file=sys.stderr)
         sys.exit(1)
     
     if len(args.default_allele_list) > 0:
@@ -1621,11 +1621,11 @@
                 extract_cmd += ["--inter-gap", "30",
                                 "--intra-gap", "50"]
                 if verbose:
-                    print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+                    print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
                 proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                 proc.communicate()
                 if not os.path.exists("./Default-HLA/hla_backbone.fa"):
-                    print >> sys.stderr, "Error: extract_HLA_vars (Default) failed!"
+                    print("Error: extract_HLA_vars (Default) failed!", file=sys.stderr)
                     sys.exit(1)
        
             HLAs_default = {}
--- hisat2.orig/hisatgenotype_locus.py
+++ hisat2/hisatgenotype_locus.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
 #
@@ -49,7 +49,7 @@
 """
 def get_exonic_vars(Vars, exons):
     vars = set()
-    for var_id, var in Vars.items():
+    for var_id, var in list(Vars.items()):
         var_type, var_left, var_data = var
         var_right = var_left
         if var_type == "deletion":
@@ -66,7 +66,7 @@
 """
 def get_rep_alleles(Links, exon_vars, in_alleles = None):
     allele_vars = {}
-    for var, alleles in Links.items():
+    for var, alleles in list(Links.items()):
         if var not in exon_vars:
             continue
         for allele in alleles:
@@ -77,7 +77,7 @@
             allele_vars[allele].add(var)
 
     allele_groups = {}
-    for allele, vars in allele_vars.items():
+    for allele, vars in list(allele_vars.items()):
         vars = '-'.join(vars)
         if vars not in allele_groups:
             allele_groups[vars] = []
@@ -85,7 +85,7 @@
 
     allele_reps = {} # allele representatives
     allele_rep_groups = {} # allele groups by allele representatives
-    for allele_members in allele_groups.values():
+    for allele_members in list(allele_groups.values()):
         assert len(allele_members) > 0
         allele_rep = allele_members[0]
         allele_rep_groups[allele_rep] = allele_members
@@ -107,8 +107,8 @@
                   cmp_list,
                   debug = False):
     if debug:
-        print >> sys.stderr, cmp_list
-        print >> sys.stderr, read_seq
+        print(cmp_list, file=sys.stderr)
+        print(read_seq, file=sys.stderr)
 
     num_correction = 0
     i = 0
@@ -163,7 +163,7 @@
             nt_set = mpileup[left][0]
 
             if debug:
-                print >> sys.stderr, left, read_bp, ref_bp, mpileup[left]
+                print(left, read_bp, ref_bp, mpileup[left], file=sys.stderr)
 
             if len(nt_set) > 0 and read_bp not in nt_set:
                 read_bp = 'N' if len(nt_set) > 1 else nt_set[0]
@@ -188,8 +188,8 @@
                         var_idx += 1
 
                 if debug:
-                    print >> sys.stderr, left, read_bp, ref_bp, mpileup[left]
-                    print >> sys.stderr, cmp_list[i]
+                    print(left, read_bp, ref_bp, mpileup[left], file=sys.stderr)
+                    print(cmp_list[i], file=sys.stderr)
 
         read_pos += length
         i += 1
@@ -207,8 +207,8 @@
         i += 1
 
     if debug:
-        print >> sys.stderr, cmp_list
-        print >> sys.stderr, read_seq
+        print(cmp_list, file=sys.stderr)
+        print(read_seq, file=sys.stderr)
                             
     return cmp_list, read_seq, num_correction
 
@@ -255,9 +255,9 @@
     for aligner, index_type in aligners:
         for f_ in [sys.stderr, report_file]:
             if index_type == "graph":
-                print >> f_, "\n\t\t%s %s" % (aligner, index_type)
+                print("\n\t\t%s %s" % (aligner, index_type), file=f_)
             else:
-                print >> f_, "\n\t\t%s %s" % (aligner, index_type)
+                print("\n\t\t%s %s" % (aligner, index_type), file=f_)
 
         remove_alignment_file = False
         if alignment_fname == "":
@@ -466,10 +466,10 @@
             primary_exon_allele_rep_set = set(primary_exon_allele_reps.values())
 
             # Sanity check
-            for exon_allele in primary_exon_allele_reps.keys():
+            for exon_allele in list(primary_exon_allele_reps.keys()):
                 # DK - debugging purposes
                 if exon_allele not in allele_rep_set:
-                    print exon_allele, allele_reps[exon_allele], exon_allele in primary_exon_allele_reps.keys()
+                    print(exon_allele, allele_reps[exon_allele], exon_allele in list(primary_exon_allele_reps.keys()))
                     
                 assert exon_allele in allele_rep_set
                                     
@@ -482,7 +482,7 @@
 
             def haplotype_alts_list(haplotype_alts, left = True):
                 haplotype_list = []
-                for haplotype in haplotype_alts.keys():
+                for haplotype in list(haplotype_alts.keys()):
                     if left:
                         pos = int(haplotype.split('-')[-1])
                     else:
@@ -514,7 +514,7 @@
                 # 
                 def add_count(count_per_read, ht, add):
                     if base_fname == "genome" and len(count_per_read) == 1:
-                        for allele in count_per_read.keys():
+                        for allele in list(count_per_read.keys()):
                             count_per_read[allele] = add
                         return
                     
@@ -574,7 +574,7 @@
                             expected_inter_dist = fragment_len - read_len * 2
                         """
                             
-                        best_diff = sys.maxint
+                        best_diff = sys.maxsize
                         picked = []                                
                         for left_ht_str in left_positive_hts:
                             left_ht = left_ht_str.split('-')
@@ -643,7 +643,7 @@
                         assert ht_left >= e_left
                         if ht_right > e_right:
                             split = False
-                            for i in reversed(range(1, len(new_ht) - 1)):
+                            for i in reversed(list(range(1, len(new_ht) - 1))):
                                 var_id = new_ht[i]
                                 type, right, data = gene_vars[var_id]
                                 if type == "deletion":
@@ -696,8 +696,8 @@
                     # Unalined?
                     if flag & 0x4 != 0:
                         if simulation and verbose >= 2:
-                            print "Unaligned"
-                            print "\t", line
+                            print("Unaligned")
+                            print("\t", line)
                         continue
 
                     # Concordantly mapped?
@@ -905,7 +905,7 @@
                             # Check if this deletion is artificial alignment
                             if right_pos < len(mpileup):
                                 del_count, nt_count = 0, 0
-                                for nt, value in mpileup[right_pos][1].items():
+                                for nt, value in list(mpileup[right_pos][1].items()):
                                     count = value[0]
                                     if nt == 'D':
                                         del_count += count
@@ -1005,7 +1005,7 @@
                             return ""
                         max_count = max(Gene_count_per_read.values())
                         cur_cmpt = set()
-                        for allele, count in Gene_count_per_read.items():
+                        for allele, count in list(Gene_count_per_read.items()):
                             if count < max_count:
                                 continue
                             if len(include_alleles) > 0 and allele not in include_alleles:
@@ -1024,7 +1024,7 @@
                             alleles = ["", ""]
                             allele1_found, allele2_found = False, False
                             if alleles[0] != "":
-                                for allele, count in Gene_count_per_read.items():
+                                for allele, count in list(Gene_count_per_read.items()):
                                     if count < max_count:
                                         continue
                                     if allele == alleles[0]:
@@ -1032,12 +1032,12 @@
                                     elif allele == alleles[1]:
                                         allele2_found = True
                                 if allele1_found != allele2_found:
-                                    print >> sys.stderr, alleles[0], Gene_count_per_read[alleles[0]]
-                                    print >> sys.stderr, alleles[1], Gene_count_per_read[alleles[1]]
+                                    print(alleles[0], Gene_count_per_read[alleles[0]], file=sys.stderr)
+                                    print(alleles[1], Gene_count_per_read[alleles[1]], file=sys.stderr)
                                     if allele1_found:
-                                        print >> sys.stderr, ("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, Gene_count_per_read[alleles[1]]))
+                                        print(("%s\tread_id %s - %d vs. %d]" % (alleles[0], prev_read_id, max_count, Gene_count_per_read[alleles[1]])), file=sys.stderr)
                                     else:
-                                        print >> sys.stderr, ("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, Gene_count_per_read[alleles[0]]))
+                                        print(("%s\tread_id %s - %d vs. %d]" % (alleles[1], prev_read_id, max_count, Gene_count_per_read[alleles[0]])), file=sys.stderr)
 
                         cur_cmpt = sorted(list(cur_cmpt))
                         cur_cmpt = '-'.join(cur_cmpt)
@@ -1069,14 +1069,14 @@
 
                             # DK - debugging purposes
                             if prev_read_id.startswith("NS500497:33:HY32TBGXX:3:13511:0:56517876") and False:
-                                print prev_read_id, left_positive_hts, right_positive_hts
+                                print(prev_read_id, left_positive_hts, right_positive_hts)
                                 max_count = max(Gene_primary_exons_count_per_read.values())
-                                for allele, count in Gene_primary_exons_count_per_read.items():
+                                for allele, count in list(Gene_primary_exons_count_per_read.items()):
                                     if allele not in primary_exon_allele_rep_set:
                                         continue
                                     if count < max_count:
                                         continue
-                                    print allele, count
+                                    print(allele, count)
 
                             # DK - debugging purposes
                             """
@@ -1111,8 +1111,8 @@
                                 # DK - debugging purposes
                                 # for cmpt, count in Gene_primary_exons_count_per_read.items():
                                 if cur_primary_exons_cmpt.find("A*24:145") != -1 and cur_primary_exons_cmpt.find("A*24:02:01") == -1:
-                                    print prev_read_id
-                                    print cur_primary_exons_cmpt
+                                    print(prev_read_id)
+                                    print(cur_primary_exons_cmpt)
         
 
             
@@ -1135,9 +1135,9 @@
                                               (not partial and cur_cmpt_gen != "" and not set(cur_cmpt_gen) & set(test_Gene_names))
                                               
                                 if show_debug:
-                                    print "%s are chosen instead of %s" % (cur_cmpt if partial else cur_cmpt_gen, '-'.join(test_Gene_names))
+                                    print("%s are chosen instead of %s" % (cur_cmpt if partial else cur_cmpt_gen, '-'.join(test_Gene_names)))
                                     for prev_line in prev_lines:
-                                        print "\t", prev_line
+                                        print("\t", prev_line)
 
                             prev_lines = []
 
@@ -1212,10 +1212,10 @@
                     DK_debug = False
                     if orig_read_id.startswith("30|R!"):
                         DK_debug = True
-                        print line
-                        print cmp_list
-                        print "positive hts:", left_positive_hts, right_positive_hts
-                        print "cmp_list [%d, %d]" % (cmp_list_left, cmp_list_right)
+                        print(line)
+                        print(cmp_list)
+                        print("positive hts:", left_positive_hts, right_positive_hts)
+                        print("cmp_list [%d, %d]" % (cmp_list_left, cmp_list_right))
 
                     if assembly:
                         # Construct multiple candidate realignments for CODIS
@@ -1356,7 +1356,7 @@
                     continue
 
                 for f_ in [sys.stderr, report_file]:
-                    print >> f_, "\t\t\t%d reads and %d pairs are aligned" % (num_reads, num_pairs)
+                    print("\t\t\t%d reads and %d pairs are aligned" % (num_reads, num_pairs), file=f_)
                 
             else:
                 assert index_type == "linear"
@@ -1408,7 +1408,7 @@
                 if alleles:
                     add_alleles(alleles)
 
-            Gene_counts = [[allele, count] for allele, count in Gene_counts.items()]
+            Gene_counts = [[allele, count] for allele, count in list(Gene_counts.items())]
             def Gene_count_cmp(a, b):
                 if a[1] != b[1]:
                     return b[1] - a[1]
@@ -1425,18 +1425,18 @@
                     for test_Gene_name in test_Gene_names:
                         if count[0] == test_Gene_name:
                             for f_ in [sys.stderr, report_file]:
-                                print >> f_, "\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_Gene_name, count[1])
+                                print("\t\t\t*** %d ranked %s (count: %d)" % (count_i + 1, test_Gene_name, count[1]), file=f_)
                             found = True
                     if count_i < 5 and not found:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                            print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=f_)
                 else:
                     for f_ in [sys.stderr, report_file]:
-                        print >> f_, "\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1])
+                        print("\t\t\t\t%d %s (count: %d)" % (count_i + 1, count[0], count[1]), file=f_)
                     if count_i >= 9:
                         break
             for f_ in [sys.stderr, report_file]:
-                print >> f_
+                print(file=f_)
 
             # Calculate the abundance of representative alleles on exonic sequences
             if base_fname == "hla":
@@ -1456,7 +1456,7 @@
                     # Incorporate representative alleles for exons
                     if len(primary_exon_alleles) > 0:
                         Gene_exons_cmpt2 = {}
-                        for cmpt, value in Gene_exons_cmpt.items():
+                        for cmpt, value in list(Gene_exons_cmpt.items()):
                             cmpt2 = []
                             for allele in cmpt.split('-'):
                                 if allele in primary_exon_alleles:
@@ -1476,7 +1476,7 @@
                                 exon_prob2[allele] = prob
                         for allele, prob in exon_prob:
                             exon_prob2[allele] = prob * primary_exon_prob_sum
-                        exon_prob = [[allele, prob] for allele, prob in exon_prob2.items()]
+                        exon_prob = [[allele, prob] for allele, prob in list(exon_prob2.items())]
                         Gene_prob = exon_prob = sorted(exon_prob, cmp=typing_common.Gene_prob_cmp)
                 else:
                     # Incorporate representative alleles for exons
@@ -1498,7 +1498,7 @@
                 # Incorporate full-length alleles, non-representative alleles
                 if len(exon_alleles) > 0:
                     Gene_cmpt2 = {}
-                    for cmpt, value in Gene_cmpt.items():
+                    for cmpt, value in list(Gene_cmpt.items()):
                         cmpt2 = []
                         for allele in cmpt.split('-'):
                             if allele in exon_alleles:
@@ -1523,13 +1523,13 @@
                     for allele, prob in Gene_prob:
                         Gene_combined_prob[allele] = prob * exon_prob_sum
                                             
-                    Gene_prob = [[allele, prob] for allele, prob in Gene_combined_prob.items()]
+                    Gene_prob = [[allele, prob] for allele, prob in list(Gene_combined_prob.items())]
                     Gene_prob = sorted(Gene_prob, cmp=typing_common.Gene_prob_cmp)
             else:
-                if len(Gene_cmpt.keys()) <= 1:
+                if len(list(Gene_cmpt.keys())) <= 1:
                     Gene_prob = []
-                    if len(Gene_cmpt.keys()) == 1:
-                        Gene_prob = [[Gene_cmpt.keys()[0], 1.0]]
+                    if len(list(Gene_cmpt.keys())) == 1:
+                        Gene_prob = [[list(Gene_cmpt.keys())[0], 1.0]]
                 else:
                     Gene_prob = typing_common.single_abundance(Gene_cmpt)
 
@@ -1572,7 +1572,7 @@
                 # Compare two alleles
                 if simulation and len(test_Gene_names) == 2:
                     allele_name1, allele_name2 = test_Gene_names
-                    print >> sys.stderr, allele_name1, "vs.", allele_name2
+                    print(allele_name1, "vs.", allele_name2, file=sys.stderr)
                     asm_graph.print_node_comparison(asm_graph.true_allele_nodes)
 
                 def compare_alleles(vars1, vars2, print_output = True):
@@ -1604,8 +1604,8 @@
                             skip = False
                             if print_output:
                                 if cmp_var_in_exon:
-                                    print >> sys.stderr, "\033[94mexon%d\033[00m" % (exon_i + 1),
-                                print >> sys.stderr, cmp_var_id, cmp_var, "\t\t\t", mpileup[cmp_var[1]]
+                                    print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=sys.stderr)
+                                print(cmp_var_id, cmp_var, "\t\t\t", mpileup[cmp_var[1]], file=sys.stderr)
                             var_i += 1; var_j += 1
 
                             var_type, var_pos, var_data = cmp_var
@@ -1622,18 +1622,18 @@
                                     if print_output:
                                         if cmp_var_in_exon:
                                             for f_ in [sys.stderr, report_file]:
-                                                print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1),
+                                                print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=f_)
                                         for f_ in [sys.stderr, report_file]:
-                                            print >> f_, "***", cmp_var_id, cmp_var, "==", "\t\t\t", mpileup[cmp_var[1]]
+                                            print("***", cmp_var_id, cmp_var, "==", "\t\t\t", mpileup[cmp_var[1]], file=f_)
                                     mismatches += 1
                             var_i += 1
                         else:
                             if print_output:
                                 if node_var_in_exon:
                                     for f_ in [sys.stderr, report_file]:
-                                        print >> f_, "\033[94mexon%d\033[00m" % (exon_i + 1),
+                                        print("\033[94mexon%d\033[00m" % (exon_i + 1), end=' ', file=f_)
                                 for f_ in [sys.stderr, report_file]:
-                                    print >> f_, "*** ==", node_var_id, node_var, "\t\t\t", mpileup[node_var[1]]
+                                    print("*** ==", node_var_id, node_var, "\t\t\t", mpileup[node_var[1]], file=f_)
                             mismatches += 1
                             var_j += 1
 
@@ -1655,24 +1655,24 @@
                     return allele_seq, allele_exons, mismatches
                     
                 tmp_nodes = asm_graph.nodes
-                print >> sys.stderr, "Number of tmp nodes:", len(tmp_nodes)
+                print("Number of tmp nodes:", len(tmp_nodes), file=sys.stderr)
                 count = 0
-                for id, node in tmp_nodes.items():
+                for id, node in list(tmp_nodes.items()):
                     count += 1
                     if count > 10:
                         break
                     node_vars = node.get_var_ids()
-                    node.print_info(); print >> sys.stderr
+                    node.print_info(); print(file=sys.stderr)
                     if node.id in asm_graph.to_node:
                         for id2, at in asm_graph.to_node[node.id]:
-                            print >> sys.stderr, "\tat %d ==> %s" % (at, id2)
+                            print("\tat %d ==> %s" % (at, id2), file=sys.stderr)
 
                     if simulation:
                         cmp_Gene_names = test_Gene_names
                     else:
                         cmp_Gene_names = [allele_name for allele_name, _ in allele_node_order]
                         
-                    alleles, cmp_vars, max_common = [], [], -sys.maxint
+                    alleles, cmp_vars, max_common = [], [], -sys.maxsize
                     for cmp_Gene_name in cmp_Gene_names:
                         tmp_vars = allele_nodes[cmp_Gene_name].get_var_ids(node.left, node.right)
                         tmp_common = len(set(node_vars) & set(tmp_vars))
@@ -1685,21 +1685,21 @@
 
                     for allele_name, cmp_vars in alleles:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "vs.", allele_name
+                            print("vs.", allele_name, file=f_)
                             allele_seq, allele_exons, allele_mm = compare_alleles(cmp_vars, node_vars)
-                            print >> f_, "\t\tallele sequence (%d bps):" % len(allele_seq), allele_seq
-                            print >> f_, "\t\texons (zero-based offset):", allele_exons
+                            print("\t\tallele sequence (%d bps):" % len(allele_seq), allele_seq, file=f_)
+                            print("\t\texons (zero-based offset):", allele_exons, file=f_)
 
-                    print >> sys.stderr
-                    print >> sys.stderr
+                    print(file=sys.stderr)
+                    print(file=sys.stderr)
 
 
             # Identify alleles that perfectly or closesly match assembled alleles
-            for node_name, node in asm_graph.nodes.items():
+            for node_name, node in list(asm_graph.nodes.items()):
                 vars = set(node.get_var_ids())
 
-                max_allele_names, max_common = [], -sys.maxint
-                for allele_name, vars2 in allele_vars.items():
+                max_allele_names, max_common = [], -sys.maxsize
+                for allele_name, vars2 in list(allele_vars.items()):
                     vars2 = set(vars2)
                     tmp_common = len(vars & vars2) - len(vars | vars2)
                     if tmp_common > max_common:
@@ -1709,21 +1709,21 @@
                         max_allele_names.append(allele_name)
 
                 for f_ in [sys.stderr, report_file]:
-                    print >> f_, "Genomic:", node_name
+                    print("Genomic:", node_name, file=f_)
                     node_vars = node.get_var_ids()
-                    min_mismatches = sys.maxint
+                    min_mismatches = sys.maxsize
                     for max_allele_name in max_allele_names:
                         cmp_vars = allele_vars[max_allele_name]
                         cmp_vars = sorted(cmp_vars, cmp=lambda a, b: int(a[2:]) - int(b[2:]))
                         print_output = False
                         _, _, tmp_mismatches = compare_alleles(cmp_vars, node_vars, print_output)
-                        print >> f_, "\t\t%s:" % max_allele_name, max_common, tmp_mismatches
+                        print("\t\t%s:" % max_allele_name, max_common, tmp_mismatches, file=f_)
                         if tmp_mismatches < min_mismatches:
                             min_mismatches = tmp_mismatches
                     if min_mismatches > 0:
-                        print >> f_, "Novel allele"
+                        print("Novel allele", file=f_)
                     else:
-                        print >> f_, "Known allele"
+                        print("Known allele", file=f_)
 
             """
             allele_exon_vars = {}
@@ -1777,7 +1777,7 @@
                                 else:
                                     break
                             for f_ in [sys.stderr, report_file]:
-                                print >> f_, "\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_Gene_name, prob[1] * 100.0)
+                                print("\t\t\t*** %d ranked %s (abundance: %.2f%%)" % (rank_i + 1, test_Gene_name, prob[1] * 100.0), file=f_)
                             if rank_i < len(success):
                                 success[rank_i] = True
                             found_list[name_i] = True
@@ -1787,11 +1787,11 @@
                         break
                 if not found:
                     for f_ in [sys.stderr, report_file]:
-                        print >> f_, "\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, _allele_rep, prob[1] * 100.0)
+                        print("\t\t\t\t%d ranked %s (abundance: %.2f%%)" % (prob_i + 1, _allele_rep, prob[1] * 100.0), file=f_)
 
                     if best_alleles and prob_i < 2:
                         for f_ in [sys.stderr, report_file]:
-                            print >> f_, "SingleModel %s (abundance: %.2f%%)" % (_allele_rep, prob[1] * 100.0)
+                            print("SingleModel %s (abundance: %.2f%%)" % (_allele_rep, prob[1] * 100.0), file=f_)
 
                 # DK - debugging purposes
                 """
@@ -1811,7 +1811,7 @@
                     break
                 if prob_i >= 19:
                     break
-            print >> sys.stderr         
+            print(file=sys.stderr)         
 
             if simulation and not False in success:
                 aligner_type = "%s %s" % (aligner, index_type)
@@ -1858,7 +1858,7 @@
     for gene_name in Genes:
         # Assert there is only one allele per gene, which is a backbone allele
         assert len(Genes[gene_name]) == 1
-        backbone_allele_name, backbone_seq = Genes[gene_name].items()[0]
+        backbone_allele_name, backbone_seq = list(Genes[gene_name].items())[0]
         gene_vars, gene_var_list = Vars[gene_name], Var_list[gene_name]
         allele_vars = {}
         for _, var_id in gene_var_list:
@@ -1869,7 +1869,7 @@
                     allele_vars[allele_name] = []
                 allele_vars[allele_name].append(var_id)
 
-        for allele_name, vars in allele_vars.items():
+        for allele_name, vars in list(allele_vars.items()):
             seq = ""
             prev_pos = 0
             for var_id in vars:
@@ -1927,7 +1927,7 @@
         Vars[gene][var_id] = [var_type, pos, data]
         Var_list[gene].append([pos, var_id])
         
-    for gene, in_var_list in Var_list.items():
+    for gene, in_var_list in list(Var_list.items()):
         Var_list[gene] = sorted(in_var_list)
 
     return Vars, Var_list
@@ -1937,7 +1937,7 @@
 """
 def read_Gene_vars_genotype_genome(fname, refGene_loci):
     loci = {}
-    for gene, values in refGene_loci.items():
+    for gene, values in list(refGene_loci.items()):
         allele_name, chr, left, right = values[:4]
         if chr not in loci:
             loci[chr] = []
@@ -1967,7 +1967,7 @@
         Vars[gene][var_id] = [var_type, pos - left, data]
         Var_list[gene].append([pos - left, var_id])
         
-    for gene, in_var_list in Var_list.items():
+    for gene, in_var_list in list(Var_list.items()):
         Var_list[gene] = sorted(in_var_list)
 
     return Vars, Var_list
@@ -2068,7 +2068,7 @@
             genome_fnames.append(genotype_genome + ".%d.ht2" % (i+1))
 
         if not typing_common.check_files(genome_fnames):
-            print >> sys.stderr, "Error: some of the following files are not available:", ' '.join(genome_fnames)
+            print("Error: some of the following files are not available:", ' '.join(genome_fnames), file=sys.stderr)
             sys.exit(1)
     else:
         typing_common.extract_database_if_not_exists(base_fname,
@@ -2140,7 +2140,7 @@
             refGene_loci[Gene_gene] = [Gene_name, chr, left, right, exons, primary_exons]
     Genes = {}
     if len(locus_list) == 0:
-        locus_list = refGene_loci.keys()
+        locus_list = list(refGene_loci.keys())
 
     # Read variants, and link information
     if genotype_genome:
@@ -2151,7 +2151,7 @@
         Links = read_Gene_links("%s.link" % base_fname)
 
     # Some loci may have only one allele such as AMELX and AMELY
-    for gene_name in refGene_loci.keys():
+    for gene_name in list(refGene_loci.keys()):
         if gene_name in Vars:
             continue
         Vars[gene_name], Var_list[gene_name], Links[gene_name] = {}, [], {}        
@@ -2177,23 +2177,23 @@
         Genes2 = {}
         read_Gene_alleles(base_fname + "_backbone.fa", Genes2)
         read_Gene_alleles(base_fname + "_sequences.fa", Genes2)
-        for gene_name, alleles in Genes.items():
+        for gene_name, alleles in list(Genes.items()):
             assert gene_name in Genes2
-            for allele_name, allele_seq in alleles.items():
+            for allele_name, allele_seq in list(alleles.items()):
                 assert allele_name in Genes2[gene_name]
                 allele_seq2 = Genes2[gene_name][allele_name]
                 assert allele_seq == allele_seq2
 
     # alleles names
     Gene_names = {}
-    for Gene_gene, data in Genes.items():
+    for Gene_gene, data in list(Genes.items()):
         Gene_names[Gene_gene] = list(data.keys())
 
     # allele lengths
     Gene_lengths = {}
-    for Gene_gene, Gene_alleles in Genes.items():
+    for Gene_gene, Gene_alleles in list(Genes.items()):
         Gene_lengths[Gene_gene] = {}
-        for allele_name, seq in Gene_alleles.items():
+        for allele_name, seq in list(Gene_alleles.items()):
             Gene_lengths[Gene_gene][allele_name] = len(seq)
 
     # Test typing
@@ -2245,7 +2245,7 @@
                 if str(test_i + 1) not in test_ids:
                     continue
 
-            print >> sys.stderr, "Test %d" % (test_i + 1), str(datetime.now())
+            print("Test %d" % (test_i + 1), str(datetime.now()), file=sys.stderr)
             test_locus_list = test_list[test_i]
             num_frag_list = typing_common.simulate_reads(Genes,
                                                          base_fname,
@@ -2269,7 +2269,7 @@
                     gene = test_Gene_name.split('*')[0]
                     test_Gene_seq = Genes[gene][test_Gene_name]
                     seq_type = "partial" if test_Gene_name in partial_alleles else "full"
-                    print >> sys.stderr, "\t%s - %d bp (%s sequence, %d pairs)" % (test_Gene_name, len(test_Gene_seq), seq_type, num_frag_list_i[j_])
+                    print("\t%s - %d bp (%s sequence, %d pairs)" % (test_Gene_name, len(test_Gene_seq), seq_type, num_frag_list_i[j_]), file=sys.stderr)
 
             if "single-end" in debug_instr:
                 read_fname = ["%s_input_1.fa" % base_fname]
@@ -2312,23 +2312,23 @@
                                      verbose,
                                      assembly_verbose)
 
-            for aligner_type, passed in tmp_test_passed.items():
+            for aligner_type, passed in list(tmp_test_passed.items()):
                 if aligner_type in test_passed:
                     test_passed[aligner_type] += passed
                 else:
                     test_passed[aligner_type] = passed
 
-                print >> sys.stderr, "\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1)))
+                print("\t\tPassed so far: %d/%d (%.2f%%)" % (test_passed[aligner_type], test_i + 1, (test_passed[aligner_type] * 100.0 / (test_i + 1))), file=sys.stderr)
 
 
-        for aligner_type, passed in test_passed.items():
-            print >> sys.stderr, "%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list))
+        for aligner_type, passed in list(test_passed.items()):
+            print("%s:\t%d/%d passed (%.2f%%)" % (aligner_type, passed, len(test_list), passed * 100.0 / len(test_list)), file=sys.stderr)
     
     else: # With real reads or BAMs
         if base_fname == "genome":
-            print >> sys.stderr, "\t", locus_list
+            print("\t", locus_list, file=sys.stderr)
         else:
-            print >> sys.stderr, "\t", ' '.join(locus_list)
+            print("\t", ' '.join(locus_list), file=sys.stderr)
         typing(simulation,
                base_fname,
                locus_list,
@@ -2543,7 +2543,7 @@
     else:
         locus_list = only_locus_list = args.only_locus_list.split(',')
     if args.aligners == "":
-        print >> sys.stderr, "Error: --aligners must be non-empty."
+        print("Error: --aligners must be non-empty.", file=sys.stderr)
         sys.exit(1)    
     args.aligners = args.aligners.split(',')
     for i in range(len(args.aligners)):
@@ -2552,14 +2552,14 @@
         args.read_fname = [args.read_fname_U]
     elif args.read_fname_1 != "" or args.read_fname_2 != "":
         if args.read_fname_1 == "" or args.read_fname_2 == "":
-            print >> sys.stderr, "Error: please specify both -1 and -2."
+            print("Error: please specify both -1 and -2.", file=sys.stderr)
             sys.exit(1)
         args.read_fname = [args.read_fname_1, args.read_fname_2]
     else:
         args.read_fname = []
     if args.alignment_fname != "" and \
             not os.path.exists(args.alignment_fname):
-        print >> sys.stderr, "Error: %s doesn't exist." % args.alignment_fname
+        print("Error: %s doesn't exist." % args.alignment_fname, file=sys.stderr)
         sys.exit(1)
 
     if args.verbose and args.verbose_level == 0:
@@ -2577,10 +2577,10 @@
                 debug[item] = 1
 
     if not args.partial:
-        print >> sys.stderr, "Warning: --no-partial should be used for debugging purpose only."
+        print("Warning: --no-partial should be used for debugging purpose only.", file=sys.stderr)
 
     if args.read_len * 2 > args.fragment_len:
-        print >> sys.stderr, "Warning: fragment might be too short (%d)" % (args.fragment_len)
+        print("Warning: fragment might be too short (%d)" % (args.fragment_len), file=sys.stderr)
 
     skip_fragment_regions = []
     if args.skip_fragment_regions != "":
--- hisat2.orig/hisatgenotype_modules/hisatgenotype_assembly_graph.py
+++ hisat2/hisatgenotype_modules/hisatgenotype_assembly_graph.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys
 import math, random
@@ -11,7 +11,7 @@
 def get_major_nt(nt_dic):
     nt = ''
     max_count = 0
-    for tmp_nt, tmp_value in nt_dic.items():
+    for tmp_nt, tmp_value in list(nt_dic.items()):
         tmp_count, tmp_var_id = tmp_value
         if len(tmp_nt) == 1:
             assert tmp_nt in "ACGTDN"
@@ -29,8 +29,8 @@
 
 #
 def match_score(nt_dic1, nt_dic2):
-    sum_1 = sum([count for count, _ in nt_dic1.values()])
-    sum_2 = sum([count for count, _ in nt_dic2.values()])
+    sum_1 = sum([count for count, _ in list(nt_dic1.values())])
+    sum_2 = sum([count for count, _ in list(nt_dic2.values())])
     total1, total2 = sum_1 * 2.0, sum_2 * 2.0
     best = 0.0
     for nt in "ACGT":
@@ -189,12 +189,12 @@
                     break
 
             if debug:
-                print "at %d (%d) with overlap of %d and mismatch of %.2f" % (i, self.left + i, j, tmp_mm)
+                print("at %d (%d) with overlap of %d and mismatch of %.2f" % (i, self.left + i, j, tmp_mm))
 
             if tmp_mm <= max_mm:
                 return i, min(len(seq) - i, len(other_seq)), tmp_mm
                 
-        return -1, -1, sys.maxint
+        return -1, -1, sys.maxsize
 
     
     # Combine two nodes with considering deletions
@@ -208,7 +208,7 @@
         assert self.left <= other.left
 
         # Merge two sequences
-        assert len(other.seq) > 0 and 'D' not in other.seq[0].keys()
+        assert len(other.seq) > 0 and 'D' not in list(other.seq[0].keys())
         j = 0        
         # Merge the overlapped parts
         if self.right >= other.left:
@@ -225,7 +225,7 @@
             new_seq = self.seq[:i]
             while i < len(self.seq) and j < len(other.seq):
                 nt_dic, nt_dic2 = self.seq[i], other.seq[j]
-                for nt, value in nt_dic2.items():
+                for nt, value in list(nt_dic2.items()):
                     count, var_id = value
                     if nt in nt_dic:
                         nt_dic[nt][0] += count
@@ -242,8 +242,8 @@
         # Fill in the gap between the two nodes if exists
         else:
             new_seq = self.seq[:]
-            sum_1 = sum([count for count, _ in self.seq[-1].values()])
-            sum_2 = sum([count for count, _ in other.seq[0].values()])
+            sum_1 = sum([count for count, _ in list(self.seq[-1].values())])
+            sum_2 = sum([count for count, _ in list(other.seq[0].values())])
             flank_cov = (sum_1 + sum_2) / 2.0
             for k in range(other.left - self.right - 1):
                 ref_nt_dic = self.mpileup[k + 1 + self.right][1]
@@ -252,8 +252,8 @@
                 if len(ref_nt_dic) == 0 or True:
                     nt_dic = {'N' : [1, ""]}
                 else:
-                    weight = flank_cov / max(1.0, sum([count for count, _ in ref_nt_dic.values()]))
-                    for nt, value in ref_nt_dic.items():
+                    weight = flank_cov / max(1.0, sum([count for count, _ in list(ref_nt_dic.values())]))
+                    for nt, value in list(ref_nt_dic.items()):
                         count, var_id = value
                         nt_dic[nt] = [count * weight, var_id]
                 new_seq.append(nt_dic)
@@ -293,7 +293,7 @@
 
     
     # Get variant ids
-    def get_var_ids(self, left = 0, right = sys.maxint):
+    def get_var_ids(self, left = 0, right = sys.maxsize):
         vars = []
         left = max(left, self.left)
         right = min(right, self.right)
@@ -308,7 +308,7 @@
                     ins_len += 1
                 else:
                     break            
-            for _, var in nt_dic.values():
+            for _, var in list(nt_dic.values()):
                 if var == "" or \
                    var == "unknown":
                     continue
@@ -326,7 +326,7 @@
     
     # Get variant ids
     #   left and right are gene-level coordinates
-    def get_vars(self, left = 0, right = sys.maxint):
+    def get_vars(self, left = 0, right = sys.maxsize):
         vars = []
         left = max(left, self.left)
         right = min(right, self.right)
@@ -353,7 +353,7 @@
                 vars.append(["gap", pos])
                 continue            
             added = False
-            for _, var in nt_dic.values():
+            for _, var in list(nt_dic.values()):
                 if var == "" or \
                    var == "unknown":
                     continue
@@ -369,7 +369,7 @@
                         skip_pos = pos + int(data) - 1
                     added = True
                     vars.append([var, pos])
-            if not added and "unknown" in [var_id for _, var_id in nt_dic.values()]:
+            if not added and "unknown" in [var_id for _, var_id in list(nt_dic.values())]:
                 vars.append(["unknown", pos])
 
         return vars
@@ -384,7 +384,7 @@
     def calculate_avg_cov(self):
         self.avg = 0.0
         for nt_dic in self.seq:
-            for count, _ in nt_dic.values():
+            for count, _ in list(nt_dic.values()):
                 self.avg += count
         self.avg /= len(self.seq)
         return self.avg
@@ -418,7 +418,7 @@
                 seq += "\033[00m"
 
             var = []
-            for _, var_id in nt_dic.values():
+            for _, var_id in list(nt_dic.values()):
                 if var_id == "":
                     continue
                 var.append(var_id)
@@ -429,13 +429,13 @@
             if nt[0] == 'I':
                 ins_len += 1
         
-        print >> output, "Node ID:", self.id
-        print >> output, "Pos: [%d, %d], Avg. coverage: %.1f" % (self.left, self.right, self.get_avg_cov())
-        print >> output, "\t", seq
-        print >> output, "\t", var_str
-        print >> output, "mates:", len(self.mate_ids) # sorted(self.mate_ids)
-        print >> output, "reads:", len(self.read_ids) # sorted(self.read_ids)
-        print >> output
+        print("Node ID:", self.id, file=output)
+        print("Pos: [%d, %d], Avg. coverage: %.1f" % (self.left, self.right, self.get_avg_cov()), file=output)
+        print("\t", seq, file=output)
+        print("\t", var_str, file=output)
+        print("mates:", len(self.mate_ids), file=output) # sorted(self.mate_ids)
+        print("reads:", len(self.read_ids), file=output) # sorted(self.read_ids)
+        print(file=output)
 
                 
 class Graph:
@@ -484,7 +484,7 @@
             
         if id_i == 0:
             if id in self.nodes:
-                print >> sys.stderr, "Warning) multi-mapped read:", id
+                print("Warning) multi-mapped read:", id, file=sys.stderr)
                 # assert False
                 return
             assert id not in self.nodes
@@ -498,7 +498,7 @@
     # Remove nodes that are inside other nodes or with low coverage
     def remove_nodes(self, nodes):
         delete_ids = set()
-        node_list = [[id, node.left, node.right] for id, node in nodes.items()]
+        node_list = [[id, node.left, node.right] for id, node in list(nodes.items())]
         def node_cmp(a, b):
             if a[2] != b[2]:
                 return a[2] - b[2]
@@ -609,7 +609,7 @@
                     leftshift(seq, self.backbone[node.left:node.left + len(seq)])
                 node_seq["%s.%d" % (id, node_i)] = seq
             
-        for id in self.nodes.keys():
+        for id in list(self.nodes.keys()):
             add_node_seq(node_seq, id)
             
         # AAA.1 => AAA, 1
@@ -621,7 +621,7 @@
         while True:
             delete_ids = set()
             nodes = []
-            for id, node in self.nodes.items():
+            for id, node in list(self.nodes.items()):
                 nodes_ = [node]
                 if id in self.other_nodes:
                     nodes_ += self.other_nodes[id]
@@ -747,8 +747,8 @@
 
                 debug_msg = False
                 if debug_msg:
-                    print >> sys.stderr, "at", pos, vertices
-                    print >> sys.stderr, "count:", vertice_count
+                    print("at", pos, vertices, file=sys.stderr)
+                    print("count:", vertice_count, file=sys.stderr)
 
                 if try_hard:
                     vertice_with_id = [[vertice_count[v], v] for v in range(len(vertice_count))]
@@ -758,7 +758,7 @@
                         num_ids = vertices[v][3]
                         delete_ids |= set(num_ids)
                         if debug_msg:
-                            print >> sys.stderr, v, "is removed with", num_ids
+                            print(v, "is removed with", num_ids, file=sys.stderr)
                 else:
                     if first_pair:
                         v, v2, multi_read_ids = first_pair
@@ -808,7 +808,7 @@
                                         num_ids = vertices[v][3]
                                         delete_ids |= set(num_ids)
                                         if debug_msg:
-                                            print >> sys.stderr, v, "is removed with", num_ids
+                                            print(v, "is removed with", num_ids, file=sys.stderr)
                                     elif vertice_count[v] * 8 < avg_kmers:
                                         num_ids = vertices[v][3]
                                         delete_ids |= set(num_ids)
@@ -820,11 +820,11 @@
                                     num_ids = vertices[v][3]
                                     delete_ids |= set(num_ids)
                                     if debug_msg:
-                                        print >> sys.stderr, v, "is removed with", num_ids
+                                        print(v, "is removed with", num_ids, file=sys.stderr)
 
                 if debug_msg:
-                    print >> sys.stderr
-                    print >> sys.stderr           
+                    print(file=sys.stderr)
+                    print(file=sys.stderr)           
                 
             # delete nodes
             ids_to_be_updated = set()
@@ -837,7 +837,7 @@
                 else:
                     self.other_nodes[id][sub-1] = None
             
-            for id in self.nodes.keys():
+            for id in list(self.nodes.keys()):
                 other_nodes = []
                 if id in self.other_nodes:
                     for other_node in self.other_nodes[id]:
@@ -882,7 +882,7 @@
                     else:
                         consensus_seq[j][nt] += 1
 
-            if print_msg: print >> sys.stderr, i
+            if print_msg: print(i, file=sys.stderr)
             for v in range(len(curr_vertices)):
                 nt, k_m1_mer, predecessors, num_ids = curr_vertices[v]
                 kmer = k_m1_mer + nt
@@ -895,7 +895,7 @@
                     if len(consensus_seq[j]) >= 2:
                         kmer_seq += "\033[00m"
                     
-                if print_msg: print >> sys.stderr, "\t%d:" % v, kmer_seq, len(num_ids), predecessors, num_ids
+                if print_msg: print("\t%d:" % v, kmer_seq, len(num_ids), predecessors, num_ids, file=sys.stderr)
 
         id_to_num = {}
         for num in range(len(num_to_id)):
@@ -993,7 +993,7 @@
         paths = sorted(paths, cmp=path_cmp)
 
         for p in range(len(paths)):
-            if print_msg: print >> sys.stderr, "path:", p, paths[p]
+            if print_msg: print("path:", p, paths[p], file=sys.stderr)
 
         excl_num_ids = set() # exclusive num ids
         equiv_list = []
@@ -1037,9 +1037,9 @@
                 classes = equiv_list[i]
                 for j in range(len(classes)):
                     ids, num_ids, all_ids, alleles = classes[j]
-                    if print_msg: print >> sys.stderr, i, j, ids, len(num_ids), sorted(list(num_ids))[:20], alleles
+                    if print_msg: print(i, j, ids, len(num_ids), sorted(list(num_ids))[:20], alleles, file=sys.stderr)
 
-                if print_msg: print >> sys.stderr
+                if print_msg: print(file=sys.stderr)
 
             if known_alleles:
                 for i in range(len(equiv_list)):
@@ -1049,8 +1049,8 @@
                         node_id = "(%d-%d)%s" % (i, j, num_to_id[num_ids[0]])
                         node = self.nodes2[node_id]
                         node_vars = node.get_var_ids()
-                        max_alleles, max_common = set(), -sys.maxint
-                        for anode in self.predicted_allele_nodes.values():
+                        max_alleles, max_common = set(), -sys.maxsize
+                        for anode in list(self.predicted_allele_nodes.values()):
                             allele_vars = anode.get_var_ids(node.left, node.right)
                             tmp_common = len(set(node_vars) & set(allele_vars)) - len(set(node_vars) | set(allele_vars))
                             if tmp_common > max_common:
@@ -1061,7 +1061,7 @@
                         classes[j][3] = max_alleles
 
             
-            best_common_mat, best_stat, best_i, best_i2 = [], -sys.maxint, -1, -1
+            best_common_mat, best_stat, best_i, best_i2 = [], -sys.maxsize, -1, -1
             for i in range(len(equiv_list) - 1):
                 classes = equiv_list[i]
                 for i2 in range(i + 1, len(equiv_list)):
@@ -1097,9 +1097,9 @@
                         best_common_mat, best_stat, best_i, best_i2 = common_mat, common_stat, i, i2
 
             if print_msg:
-                print >> sys.stderr, "best:", best_i, best_i2, best_stat, best_common_mat
-                print >> sys.stderr
-                print >> sys.stderr
+                print("best:", best_i, best_i2, best_stat, best_common_mat, file=sys.stderr)
+                print(file=sys.stderr)
+                print(file=sys.stderr)
 
             if known_alleles and best_stat < 0:
                 self.remove_nodes(self.nodes2)
@@ -1113,7 +1113,7 @@
                         ids, num_ids, all_ids, alleles = classes[j]
                         num_ids = sorted(list(num_ids))
 
-                        if print_msg: print >> sys.stderr, i, j, num_ids
+                        if print_msg: print(i, j, num_ids, file=sys.stderr)
 
                         assert (num_ids) > 0
                         read_id = num_to_id[num_ids[0]]
@@ -1293,7 +1293,7 @@
 
                 exclude_ids |= set(remove_list)
 
-                for node_id, node in self.nodes2.items():
+                for node_id, node in list(self.nodes2.items()):
                     if node_id in exclude_ids:
                         continue
                     num, id = node_id.split(')')
@@ -1310,14 +1310,14 @@
         
     # Display graph information
     def print_info(self): 
-        print >> sys.stderr, "Backbone len: %d" % len(self.backbone)
-        print >> sys.stderr, "\t%s" % self.backbone   
+        print("Backbone len: %d" % len(self.backbone), file=sys.stderr)
+        print("\t%s" % self.backbone, file=sys.stderr)   
 
 
     # Compare nodes and get information
     def get_node_comparison_info(self, node_dic):
         assert len(node_dic) > 0
-        nodes = [[id, node.left, node.right] for id, node in node_dic.items()]
+        nodes = [[id, node.left, node.right] for id, node in list(node_dic.items())]
         def node_cmp(a, b):
             if a[1] != b[1]:
                 return a[1] - b[1]
@@ -1404,20 +1404,20 @@
             if len(cur_seqs) <= 0:
                 continue
                 
-            print >> sys.stderr, p
+            print(p, file=sys.stderr)
             for seq, id in cur_seqs:
-                print >> sys.stderr, "\t", seq, id
+                print("\t", seq, id, file=sys.stderr)
 
                 
     # Calculate coverage
     def calculate_coverage(self):
         allele_nodes = self.true_allele_nodes if self.simulation else self.predicted_allele_nodes
-        allele_nodes = [[id, node.left, node.right] for id, node in allele_nodes.items()]
+        allele_nodes = [[id, node.left, node.right] for id, node in list(allele_nodes.items())]
         coverage = {}
         for allele_id, _, _ in allele_nodes:
             coverage[allele_id] = [0.0 for _ in range(len(self.backbone))]
 
-        nodes = [[id, node.left, node.right] for id, node in self.nodes.items()]
+        nodes = [[id, node.left, node.right] for id, node in list(self.nodes.items())]
         for id, left, right in nodes:
             node = self.nodes[id]
             nodes2 = [[node, left, right]]
@@ -1428,7 +1428,7 @@
             for node, left, right in nodes2:
                 node_vars = node.get_vars()
                 node_var_ids = node.get_var_ids()
-                max_common = -sys.maxint
+                max_common = -sys.maxsize
                 max_allele_node_ids = []
                 for allele_node_id, allele_left, allele_right in allele_nodes:
                     if right - left <= 500 and (left < allele_left or right > allele_right):
@@ -1454,9 +1454,9 @@
                         coverage[allele_node_id][p] += add_cov
 
         max_cov = 0.0
-        for allele_id, cov in coverage.items():
+        for allele_id, cov in list(coverage.items()):
             max_cov = max(max_cov, max(cov))
-        for allele_id, cov in coverage.items():
+        for allele_id, cov in list(coverage.items()):
             cov2 = [c / max_cov for c in cov]
             coverage[allele_id] = cov2
         self.coverage = coverage
@@ -1465,7 +1465,7 @@
     # Begin drawing graph
     def begin_draw(self, fname_base):
         pdfDraw = self.pdfDraw = open(fname_base + '.pdf', 'w')
-        print >> pdfDraw, r'%PDF-1.7'
+        print(r'%PDF-1.7', file=pdfDraw)
         self.objects, self.stream = [], []
         self.draw_items = []
         
@@ -1557,23 +1557,23 @@
 
         # Write xref and trailer
         to_xref = pdfDraw.tell()
-        print >> pdfDraw, 'xref'
-        print >> pdfDraw, "0 %d" % (len(self.objects) + 1)
-        print >> pdfDraw, r'0000000000 65535 f'
+        print('xref', file=pdfDraw)
+        print("0 %d" % (len(self.objects) + 1), file=pdfDraw)
+        print(r'0000000000 65535 f', file=pdfDraw)
         for object in self.objects:
-            print >> pdfDraw, "%s 00000 n" % "{:010}".format(object)
-        print >> pdfDraw, 'trailer <</Size %d /Root 1 0 R>>' % (len(self.objects) + 1)
-        print >> pdfDraw, 'startxref'
-        print >> pdfDraw, str(to_xref)
-        print >> pdfDraw, r'%%EOF'
+            print("%s 00000 n" % "{:010}".format(object), file=pdfDraw)
+        print('trailer <</Size %d /Root 1 0 R>>' % (len(self.objects) + 1), file=pdfDraw)
+        print('startxref', file=pdfDraw)
+        print(str(to_xref), file=pdfDraw)
+        print(r'%%EOF', file=pdfDraw)
         
         self.pdfDraw.close()
 
         
     def add_pdf_object(self, obj):
         self.objects.append(self.pdfDraw.tell())
-        print >> self.pdfDraw, "%d 0 obj %s" % (len(self.objects), obj)
-        print >> self.pdfDraw, 'endobj'
+        print("%d 0 obj %s" % (len(self.objects), obj), file=self.pdfDraw)
+        print('endobj', file=self.pdfDraw)
 
 
     def add_pdf_stream(self, stream):
@@ -1586,7 +1586,7 @@
              begin_y,
              title = ""):
         assert len(self.nodes) > 0
-        nodes = [[id, node.left, node.right] for id, node in self.nodes.items()]
+        nodes = [[id, node.left, node.right] for id, node in list(self.nodes.items())]
         def node_cmp(a, b):
             return a[1] - b[1]
         nodes = sorted(nodes, cmp=node_cmp)
@@ -1827,7 +1827,7 @@
                     color = "0.85 0.85 0.85"
                 elif len(allele_nodes) > 0:
                     color = "1 1 1"
-                    max_common = -sys.maxint
+                    max_common = -sys.maxsize
                     for a in range(len(allele_nodes)):
                         allele_node_id, allele_left, allele_right = allele_nodes[a]
                         if right - left <= 500 and (left < allele_left or right > allele_right):
--- hisat2.orig/hisatgenotype_modules/hisatgenotype_typing_common.py
+++ hisat2/hisatgenotype_modules/hisatgenotype_typing_common.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
 #
@@ -248,12 +248,12 @@
     # DK - debugging purposes
     # extract_cmd += ["--ext-seq", "300"]
     if verbose:
-        print >> sys.stderr, "\tRunning:", ' '.join(extract_cmd)
+        print("\tRunning:", ' '.join(extract_cmd), file=sys.stderr)
     proc = subprocess.Popen(extract_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
     proc.communicate()
 
     if not check_files(fnames):
-        print >> sys.stderr, "Error: hisatgenotype_extract_vars failed!"
+        print("Error: hisatgenotype_extract_vars failed!", file=sys.stderr)
         sys.exit(1)
 
         
@@ -276,11 +276,11 @@
                              "%s_backbone.fa" % base,
                              "%s.graph" % base]
                 if verbose:
-                    print >> sys.stderr, "\tRunning:", ' '.join(build_cmd)
+                    print("\tRunning:", ' '.join(build_cmd), file=sys.stderr)
                 proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                 proc.communicate()        
                 if not check_files(hisat2_graph_index_fnames):
-                    print >> sys.stderr, "Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?"
+                    print("Error: indexing HLA failed!  Perhaps, you may have forgotten to build hisat2 executables?", file=sys.stderr)
                     sys.exit(1)
         # Build HISAT2 linear indexes based on the above information
         else:
@@ -293,7 +293,7 @@
                 proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'), stderr=open("/dev/null", 'w'))
                 proc.communicate()        
                 if not check_files(hisat2_linear_index_fnames):
-                    print >> sys.stderr, "Error: indexing HLA failed!"
+                    print("Error: indexing HLA failed!", file=sys.stderr)
                     sys.exit(1)                    
     else:
         # Build Bowtie2 indexes based on the above information
@@ -307,7 +307,7 @@
             proc = subprocess.Popen(build_cmd, stdout=open("/dev/null", 'w'))
             proc.communicate()        
             if not check_files(bowtie2_index_fnames):
-                print >> sys.stderr, "Error: indexing HLA failed!"
+                print("Error: indexing HLA failed!", file=sys.stderr)
                 sys.exit(1)
 
                     
@@ -492,7 +492,7 @@
 
             # Extract variants included in each allele
             var_ids = []
-            for var_id, allele_list in Links.items():
+            for var_id, allele_list in list(Links.items()):
                 if allele_name in allele_list:
                     var_ids.append(var_id)
 
@@ -569,8 +569,8 @@
             query_name = "%d|%s_%s" % (read_i + 1, "LR"[idx-1], reads[read_i][1])
             if len(query_name) > 254:
                 query_name = query_name[:254]
-            print >> read_file, ">%s" % query_name
-            print >> read_file, reads[read_i][0]
+            print(">%s" % query_name, file=read_file)
+            print(reads[read_i][0], file=read_file)
         read_file.close()
     write_reads(reads_1, 1)
     write_reads(reads_2, 2)
@@ -626,7 +626,7 @@
                         "-2", "%s" % read_fname[1]]
 
     if verbose >= 1:
-        print >> sys.stderr, ' '.join(aligner_cmd)
+        print(' '.join(aligner_cmd), file=sys.stderr)
     align_proc = subprocess.Popen(aligner_cmd,
                                   stdout=subprocess.PIPE,
                                   stderr=open("/dev/null", 'w'))
@@ -730,7 +730,7 @@
         num_nt = sum(nt_dic.values())
         nt_set = []
         if num_nt >= 20:
-            for nt, count in nt_dic.items():
+            for nt, count in list(nt_dic.items()):
                 if nt not in "ACGT":
                     continue
                 if count >= num_nt * 0.2 or count >= 7:
@@ -739,7 +739,7 @@
 
     # Sort variants
     var_list = [[] for i in range(len(mpileup))]
-    for var_id, value in vars.items():
+    for var_id, value in list(vars.items()):
         var_type, var_pos, var_data = value
         assert var_pos < len(var_list)
         var_list[var_pos].append([var_id, var_type, var_data])
@@ -750,7 +750,7 @@
         nt_dic = mpileup[i][1]
         ref_nt = ref_seq[i]
         new_nt_dic = {}
-        for nt, count in nt_dic.items():
+        for nt, count in list(nt_dic.items()):
             var_id = ""
             if nt == 'D':
                 if i <= skip_i:
@@ -826,7 +826,7 @@
         else:
             concordant = False
 
-        NH, YT = sys.maxint, ""
+        NH, YT = sys.maxsize, ""
         for i in range(11, len(cols)):
              col = cols[i]
              if col.startswith("NH"):
@@ -878,7 +878,7 @@
 """
 def prob_diff(prob1, prob2):
     diff = 0.0
-    for allele in prob1.keys():
+    for allele in list(prob1.keys()):
         if allele in prob2:
             diff += abs(prob1[allele] - prob2[allele])
         else:
@@ -908,20 +908,20 @@
                      Gene_length = {}):
     def normalize(prob):
         total = sum(prob.values())
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             prob[allele] = mass / total        
 
     def normalize_len(prob, length):
         total = 0
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             total += (mass / length[allele])
-        for allele, mass in prob.items():
+        for allele, mass in list(prob.items()):
             assert allele in length
             prob[allele] = mass / length[allele] / total
 
     Gene_prob, Gene_prob_next = {}, {}
-    for cmpt, count in Gene_cmpt.items():
+    for cmpt, count in list(Gene_cmpt.items()):
         alleles = cmpt.split('-')
         for allele in alleles:
             if allele not in Gene_prob:
@@ -934,7 +934,7 @@
 
     def next_prob(Gene_cmpt, Gene_prob, Gene_length):
         Gene_prob_next = {}
-        for cmpt, count in Gene_cmpt.items():
+        for cmpt, count in list(Gene_cmpt.items()):
             alleles = cmpt.split('-')
             alleles_prob = 0.0
             for allele in alleles:
@@ -960,7 +960,7 @@
             return Gene_prob
         Gene_prob2 = {}
         max_prob = max(Gene_prob.values())
-        for allele, prob in Gene_prob.items():
+        for allele, prob in list(Gene_prob.items()):
             if prob >= max_prob / 10.0:
                 Gene_prob2[allele] = prob
         return Gene_prob2
@@ -976,14 +976,14 @@
             Gene_prob_next2 = next_prob(Gene_cmpt, Gene_prob_next, Gene_length)
             sum_squared_r, sum_squared_v = 0.0, 0.0
             p_r, p_v = {}, {}
-            for a in Gene_prob.keys():
+            for a in list(Gene_prob.keys()):
                 p_r[a] = Gene_prob_next[a] - Gene_prob[a]
                 sum_squared_r += (p_r[a] * p_r[a])
                 p_v[a] = Gene_prob_next2[a] - Gene_prob_next[a] - p_r[a]
                 sum_squared_v += (p_v[a] * p_v[a])
             if sum_squared_v > 0.0:
                 gamma = -math.sqrt(sum_squared_r / sum_squared_v)
-                for a in Gene_prob.keys():
+                for a in list(Gene_prob.keys()):
                     Gene_prob_next2[a] = max(0.0, Gene_prob[a] - 2 * gamma * p_r[a] + gamma * gamma * p_v[a]);
                 Gene_prob_next = next_prob(Gene_cmpt, Gene_prob_next2, Gene_length)
 
@@ -996,10 +996,10 @@
 
         # DK - debugging purposes
         if iter % 10 == 0 and False:
-            print >> sys.stderr, "iter", iter
-            for allele, prob in Gene_prob.items():
+            print("iter", iter, file=sys.stderr)
+            for allele, prob in list(Gene_prob.items()):
                 if prob >= 0.001:
-                    print >> sys.stderr, "\t", iter, allele, prob
+                    print("\t", iter, allele, prob, file=sys.stderr)
         
         iter += 1
 
@@ -1009,7 +1009,7 @@
             normalize_len(Gene_prob, Gene_length)
     else:
         normalize(Gene_prob)
-    Gene_prob = [[allele, prob] for allele, prob in Gene_prob.items()]
+    Gene_prob = [[allele, prob] for allele, prob in list(Gene_prob.items())]
     Gene_prob = sorted(Gene_prob, cmp=Gene_prob_cmp)
     return Gene_prob
 
@@ -1034,7 +1034,7 @@
                      verbose):
     haplotype_alts_left, haplotype_alts_right = {}, {}
     second_order_haplotypes = set()
-    for allele_name, vars in allele_vars.items():
+    for allele_name, vars in list(allele_vars.items()):
         for v in range(len(vars) - 1):
             ht = vars[v] + "-" + vars[v+1]
             second_order_haplotypes.add(ht)
@@ -1066,7 +1066,7 @@
                 prev_id = haplotype[1]        
 
             var_i = lower_bound(rev_Var_list, pos + 1)
-            for var_j in reversed(range(0, var_i)):
+            for var_j in reversed(list(range(0, var_i))):
                 _, var_id = rev_Var_list[var_j]
                 var_type, var_pos, var_data = Vars[var_id]
                 if var_type == "deletion":
@@ -1230,16 +1230,16 @@
 
     # Print alternative haplotypes / Sanity check
     def print_haplotype_alts(haplotype_alts):
-        for haplotype, haplotype_set in haplotype_alts.items():
-            if verbose: print "\t%s:" % haplotype, haplotype_set
+        for haplotype, haplotype_set in list(haplotype_alts.items()):
+            if verbose: print("\t%s:" % haplotype, haplotype_set)
             haplotype_seq = get_haplotype_seq(haplotype.split('-'))
             for haplotype_alt in haplotype_set:
                 haplotype_alt_seq = get_haplotype_seq(haplotype_alt.split('-'))
                 assert haplotype_seq == haplotype_alt_seq            
 
-    if verbose: print "number of left haplotypes:", len(haplotype_alts_left)
+    if verbose: print("number of left haplotypes:", len(haplotype_alts_left))
     print_haplotype_alts(haplotype_alts_left)
-    if verbose: print "number of right haplotypes:", len(haplotype_alts_right)
+    if verbose: print("number of right haplotypes:", len(haplotype_alts_right))
     print_haplotype_alts(haplotype_alts_right)
 
     return haplotype_alts_left, haplotype_alts_right
@@ -1287,7 +1287,7 @@
 
     # Left direction
     found = False
-    for i in reversed(range(len(cmp_list))):
+    for i in reversed(list(range(len(cmp_list)))):
         i_found = False
         cmp_i = cmp_list[i]
         type, cur_left, length = cmp_i[:3]
@@ -1309,7 +1309,7 @@
         else:
             cur_ht_str = "%d-%s" % (left, '-'.join(cur_ht))
         ht_i = lower_bound(Alts_left_list, cur_right + 1)
-        for ht_j in reversed(range(0, min(ht_i + 1, len(Alts_left_list)))):
+        for ht_j in reversed(list(range(0, min(ht_i + 1, len(Alts_left_list))))):
             ht_pos, ht = Alts_left_list[ht_j]
             if ht_pos < cur_left:
                 break            
@@ -1335,15 +1335,15 @@
 
             i_found = True
             if debug:
-                print cmp_list[:i+1]
-                print "\t", cur_ht, "vs", Alts_left_list[ht_j]
+                print(cmp_list[:i+1])
+                print("\t", cur_ht, "vs", Alts_left_list[ht_j])
 
             _, rep_ht = Alts_left_list[ht_j]
 
             if debug:
-                print "DK1:", cmp_i, cmp_list
-                print "DK2:", rep_ht, Alts_left[rep_ht]
-                print "DK3:", left, right
+                print("DK1:", cmp_i, cmp_list)
+                print("DK2:", rep_ht, Alts_left[rep_ht])
+                print("DK3:", left, right)
 
             for alt_ht_str in Alts_left[rep_ht]:
                 alt_ht = alt_ht_str.split('-')
@@ -1393,7 +1393,7 @@
                     left_alt_set.add(part_alt_ht_str)
                         
                 if debug:
-                    print "\t\t", cur_left, alt_ht_str
+                    print("\t\t", cur_left, alt_ht_str)
 
         if i_found:
             if not found:
@@ -1455,9 +1455,9 @@
             _, rep_ht = Alts_right_list[ht_j]
 
             if debug:
-                print "DK1:", cmp_i, cmp_list
-                print "DK2:", rep_ht, Alts_right[rep_ht]
-                print "DK3:", left, right, ht_pos
+                print("DK1:", cmp_i, cmp_list)
+                print("DK2:", rep_ht, Alts_right[rep_ht])
+                print("DK3:", left, right, ht_pos)
 
             for alt_ht_str in Alts_right[rep_ht]:
                 alt_ht = alt_ht_str.split('-')
@@ -1525,12 +1525,12 @@
         if ht == "":
             continue
         if ht in ht_set_:
-            print >> sys.stderr, "Error) %s should not be in" % ht, ht_set_
+            print("Error) %s should not be in" % ht, ht_set_, file=sys.stderr)
 
             # DK - debugging purposes
-            print "DK: cmp_list_range: [%d, %d]" % (cmp_left, cmp_right)
-            print "DK: cmp_list:", cmp_list
-            print "DK: left_alt_set:", left_alt_set, "right_alt_set:", right_alt_set
+            print("DK: cmp_list_range: [%d, %d]" % (cmp_left, cmp_right))
+            print("DK: cmp_list:", cmp_list)
+            print("DK: left_alt_set:", left_alt_set, "right_alt_set:", right_alt_set)
             
             assert False
         ht_set_.add(ht)
@@ -1539,14 +1539,14 @@
         if ht == "":
             continue
         if ht in ht_set_:
-            print >> sys.stderr, "Error) %s should not be in" % ht, ht_set_
+            print("Error) %s should not be in" % ht, ht_set_, file=sys.stderr)
             assert False
         ht_set_.add(ht)
 
     if debug:
-        print "cmp_list_range: [%d, %d]" % (cmp_left, cmp_right)
-        print "left  alt set:", left_alt_set
-        print "right alt set:", right_alt_set
+        print("cmp_list_range: [%d, %d]" % (cmp_left, cmp_right))
+        print("left  alt set:", left_alt_set)
+        print("right alt set:", right_alt_set)
     
     return cmp_left, cmp_right, list(left_alt_set), list(right_alt_set)
 
--- hisat2.orig/hisatgenotype_scripts/compare_HLA.py
+++ hisat2/hisatgenotype_scripts/compare_HLA.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 from argparse import ArgumentParser, FileType
@@ -9,7 +9,7 @@
             utsw_fname):
     hla_list = ["A", "B", "C", "DQA1", "DQB1", "DRB1"]
     for level in [1,2]:
-        print >> sys.stderr, "Level: %d" % level
+        print("Level: %d" % level, file=sys.stderr)
         def read_hla_types(fname):
             hla, hla_orig = {}, {}
             for line in open(fname):
@@ -58,8 +58,8 @@
                 if not found:
                     hla[sample][gene].append([allele, abundance])
 
-            for sample_hla in hla.values():
-                for gene, allele_list in sample_hla.items():
+            for sample_hla in list(hla.values()):
+                for gene, allele_list in list(sample_hla.items()):
                     sample_hla[gene] = sorted(allele_list, key=lambda a: a[1], reverse=True)
                 
             return hla, hla_orig
@@ -69,8 +69,8 @@
 
         for gene in hla_list:
             count, count_10 = [0, 0, 0], [0, 0, 0]
-            print >> sys.stderr, "\t%s" % gene
-            for sample in hla2.keys():
+            print("\t%s" % gene, file=sys.stderr)
+            for sample in list(hla2.keys()):
                 if sample not in hla1:
                     continue
                 hla1_sample = hla1[sample]
@@ -107,15 +107,15 @@
                 # """
                 # if gene in ["A", "B", "C", "DQA1", "DQB1", "DRB1"] and num_match < 2:
                 if level == 3 and gene in ["B"] and num_match < 2:
-                    print sample
-                    print "\t", hla1_gene, "orig:", hla1_orig[sample][gene]
-                    print "\t", hla2_gene, "orig:", hla2_orig[sample][gene]
+                    print(sample)
+                    print("\t", hla1_gene, "orig:", hla1_orig[sample][gene])
+                    print("\t", hla2_gene, "orig:", hla2_orig[sample][gene])
                     # sys.exit(1)
                 # """
 
                 # DK - debugging purposes
                 if num_match >= len(count) or num_match_10 >= len(count_10):
-                    print sample, num_match, num_match_10
+                    print(sample, num_match, num_match_10)
 
                 assert num_match < len(count) and num_match_10 < len(count_10)
                 count[num_match] += 1
@@ -124,8 +124,8 @@
             if sum(count) <= 0:
                 continue
 
-            print >> sys.stderr, "\t\tTop two\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count[0], count[1], count[2], (count[1] + count[2] * 2) / float(sum(count) * 2) * 100.0)
-            print >> sys.stderr, "\t\tTop ten\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count_10[0], count_10[1], count_10[2], (count_10[1] + count_10[2] * 2) / float(sum(count_10) * 2) * 100.0)
+            print("\t\tTop two\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count[0], count[1], count[2], (count[1] + count[2] * 2) / float(sum(count) * 2) * 100.0), file=sys.stderr)
+            print("\t\tTop ten\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count_10[0], count_10[1], count_10[2], (count_10[1] + count_10[2] * 2) / float(sum(count_10) * 2) * 100.0), file=sys.stderr)
 
 
 if __name__ == "__main__":
--- hisat2.orig/hisatgenotype_scripts/compare_HLA_Omixon.py
+++ hisat2/hisatgenotype_scripts/compare_HLA_Omixon.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 import sys, os
 from argparse import ArgumentParser, FileType
@@ -52,8 +52,8 @@
 
     for gene in hla_list:
         count, count_10 = [0, 0, 0], [0, 0, 0]
-        print >> sys.stderr, gene
-        for sample in omixon_hla.keys():
+        print(gene, file=sys.stderr)
+        for sample in list(omixon_hla.keys()):
             if sample not in hisat_hla:
                 continue
             hisat_sample = hisat_hla[sample]
@@ -106,8 +106,8 @@
         if sum(count) <= 0:
             continue
         
-        print >> sys.stderr, "\tTop two\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count[0], count[1], count[2], (count[1] + count[2] * 2) / float(sum(count) * 2) * 100.0)
-        print >> sys.stderr, "\tTop ten\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count_10[0], count_10[1], count_10[2], (count_10[1] + count_10[2] * 2) / float(sum(count_10) * 2) * 100.0)
+        print("\tTop two\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count[0], count[1], count[2], (count[1] + count[2] * 2) / float(sum(count) * 2) * 100.0), file=sys.stderr)
+        print("\tTop ten\t0: %d, 1: %d, 2: %d (%.2f%%)" % (count_10[0], count_10[1], count_10[2], (count_10[1] + count_10[2] * 2) / float(sum(count_10) * 2) * 100.0), file=sys.stderr)
         
 
 if __name__ == "__main__":
--- hisat2.orig/hisatgenotype_scripts/extract_Omixon_HLA.py
+++ hisat2/hisatgenotype_scripts/extract_Omixon_HLA.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2016, Daehwan Kim <infphilo@gmail.com>
@@ -44,9 +44,9 @@
                     nuc_alleles[gene] = set()
                 nuc_alleles[gene].add(allele)
 
-    print >> sys.stderr, "IMGTHLA"
-    for gene, alleles in nuc_alleles.items():
-        print >> sys.stderr, "\t%s: %d alleles" % (gene, len(alleles))
+    print("IMGTHLA", file=sys.stderr)
+    for gene, alleles in list(nuc_alleles.items()):
+        print("\t%s: %d alleles" % (gene, len(alleles)), file=sys.stderr)
 
     # Read HLA alleles from Omixon data
     omixon_alleles = {}
@@ -95,11 +95,11 @@
             omixon_alleles[gene].add(allele2)
             prev_allele1, prev_allele2 = allele1, allele2
 
-            print "%s\t%s\t%s" % (genome, allele1, allele2)
+            print("%s\t%s\t%s" % (genome, allele1, allele2))
 
-    print >> sys.stderr, "Omixon"
-    for gene, alleles in omixon_alleles.items():
-        print >> sys.stderr, "\t%s: %d alleles" % (gene, len(alleles))
+    print("Omixon", file=sys.stderr)
+    for gene, alleles in list(omixon_alleles.items()):
+        print("\t%s: %d alleles" % (gene, len(alleles)), file=sys.stderr)
         for allele in alleles:
             if allele in nuc_alleles[gene]:
                 continue
@@ -110,6 +110,6 @@
                     break                    
 
             if not found:
-                print >> sys.stderr, "\t\t%s is missing" % allele
+                print("\t\t%s is missing" % allele, file=sys.stderr)
 
             
--- hisat2.orig/hisatgenotype_scripts/hisatgenotype_HLA_genotyping_PGs.py
+++ hisat2/hisatgenotype_scripts/hisatgenotype_HLA_genotyping_PGs.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2015, Daehwan Kim <infphilo@gmail.com>
@@ -84,7 +84,7 @@
     ex_path = os.path.dirname(curr_script)
 
     if not os.path.exists("illumina/HLA"):
-        print >> sys.stderr, "Error: illumina/HLA data is needed (please send an email to infphilo@gmail.com for getting the data)"
+        print("Error: illumina/HLA data is needed (please send an email to infphilo@gmail.com for getting the data)", file=sys.stderr)
         sys.exit(1)
 
     num_test, num_success = 0, 0
@@ -95,14 +95,14 @@
         read_fname_1, read_fname_2 = "illumina/HLA/%s.fished_1.fq" % genome, "illumina/HLA/%s.fished_2.fq" % genome
         if not os.path.exists(read_fname_1) or not os.path.exists(read_fname_2):
             continue
-        print >> sys.stderr, genome        
+        print(genome, file=sys.stderr)        
         cmd_aligners = ['.'.join(aligners[i]) for i in range(len(aligners))]
         test_hla_script = os.path.join(ex_path, "hisat2_test_HLA_genotyping.py")
         for gene in sorted(genes.keys()):
             if not gene in hla_list:
                 continue
             alleles = genes[gene]
-            print >> sys.stderr, "\t%s - %s" % (gene, ' / '.join(alleles))            
+            print("\t%s - %s" % (gene, ' / '.join(alleles)), file=sys.stderr)            
             test_hla_cmd = [test_hla_script,
                             "--reference-type", reference_type,
                             "--hla-list", gene,
@@ -113,13 +113,13 @@
                             "--num-mismatch", str(num_mismatch)]
 
             if verbose:
-                print >> sys.stderr, ' '.join(test_hla_cmd)
+                print(' '.join(test_hla_cmd), file=sys.stderr)
             
             proc = subprocess.Popen(test_hla_cmd, stdout=subprocess.PIPE, stderr=open("/dev/null", 'w'))
             num_test += 2
             test_alleles = set()
             for line in proc.stdout:
-                print "\t\t", line,
+                print("\t\t", line, end=' ')
                 model, allele = line.split()[:2]
                 if model != "SingleModel":
                     continue
@@ -131,7 +131,7 @@
                 if allele in test_alleles:
                     num_success += 1
 
-    print >> sys.stderr, "%d/%d (%.2f%%)" % (num_success, num_test, num_success * 100.0 / num_test)
+    print("%d/%d (%.2f%%)" % (num_success, num_test, num_success * 100.0 / num_test), file=sys.stderr)
 
 
 """
@@ -154,7 +154,7 @@
                         type=str,
                         default="hisat2.graph",
                         help="A comma-separated list of aligners (default: hisat2.graph)")
-    genomes_default = ','.join(gold_allele_info.keys())
+    genomes_default = ','.join(list(gold_allele_info.keys()))
     parser.add_argument("--genome-list",
                         dest="genome_list",
                         type=str,
@@ -178,11 +178,11 @@
     args = parser.parse_args()
 
     if not args.reference_type in ["gene", "chromosome", "genome"]:
-        print >> sys.stderr, "Error: --reference-type (%s) must be one of gene, chromosome, and genome." % (args.reference_type)
+        print("Error: --reference-type (%s) must be one of gene, chromosome, and genome." % (args.reference_type), file=sys.stderr)
         sys.exit(1)
     args.hla_list = args.hla_list.split(',')
     if args.aligners == "":
-        print >> sys.stderr, "Error: --aligners must be non-empty."
+        print("Error: --aligners must be non-empty.", file=sys.stderr)
         sys.exit(1)    
     args.aligners = args.aligners.split(',')
     for i in range(len(args.aligners)):
--- hisat2.orig/hisatgenotype_scripts/hisatgenotype_convert_codis.py
+++ hisat2/hisatgenotype_scripts/hisatgenotype_convert_codis.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
@@ -28,7 +28,7 @@
 try:
     import openpyxl
 except ImportError:
-    print >> sys.stderr, "Error: please install openpyxl by running 'pip install openpyxl'."
+    print("Error: please install openpyxl by running 'pip install openpyxl'.", file=sys.stderr)
     sys.exit(1)
 
 
@@ -225,7 +225,7 @@
 def get_equal_score(repeat_i, repeat_nums_i, repeat_j, repeat_nums_j):
     if repeat_i == repeat_j:
         # DK - experimental SW alignment
-        min_diff = sys.maxint
+        min_diff = sys.maxsize
         for repeat_num_i in repeat_nums_i:
             for repeat_num_j in repeat_nums_j:
                 min_diff = min(abs(repeat_num_i - repeat_num_j), min_diff)
@@ -372,7 +372,7 @@
     allele_freq = {}
     if min_freq > 0.0:
         excel = openpyxl.load_workbook("hisatgenotype_db/CODIS/NIST-US1036-AlleleFrequencies.xlsx")
-        sheet = excel.get_sheet_by_name(u'All data, n=1036')
+        sheet = excel.get_sheet_by_name('All data, n=1036')
         for col in range(2, 100):
             locus_name = sheet.cell(row = 3, column = col).value
             if not locus_name:
@@ -394,22 +394,22 @@
     CODIS_seq = orig_CODIS_seq
     if len(locus_list) > 0:
         new_CODIS_seq = {}
-        for locus_name, fields in CODIS_seq.items():
+        for locus_name, fields in list(CODIS_seq.items()):
             if locus_name in locus_list:
                 new_CODIS_seq[locus_name] = fields
         CODIS_seq = new_CODIS_seq        
 
     # Add some additional sequences to allele sequences to make them reasonably long for typing and assembly
-    for locus_name, fields in CODIS_seq.items():
+    for locus_name, fields in list(CODIS_seq.items()):
         _, left_seq, repeat_seq, right_seq = fields
         allele_seq = left_seq + repeat_seq + right_seq
         left_flank_seq, right_flank_seq = get_flanking_seqs(allele_seq)
         CODIS_seq[locus_name][1] = left_flank_seq + left_seq
         CODIS_seq[locus_name][3] = right_seq + right_flank_seq
 
-        print >> sys.stderr, "%s is found on the reference genome (GRCh38)" % locus_name
+        print("%s is found on the reference genome (GRCh38)" % locus_name, file=sys.stderr)
     
-    for locus_name in CODIS_seq.keys():
+    for locus_name in list(CODIS_seq.keys()):
         alleles = []
         for line in open("hisatgenotype_db/CODIS/codis.dat"):
             locus_name2, allele_id, repeat_st = line.strip().split('\t')
@@ -499,8 +499,8 @@
             for allele_id, repeat_st in alleles:
                 allele_seq = to_sequence(repeat_st)
                 if allele_seq in seq_to_ids:
-                    print >> sys.stderr, "Warning) %s: %s has the same sequence as %s" % \
-                        (locus_name, allele_id, seq_to_ids[allele_seq])
+                    print("Warning) %s: %s has the same sequence as %s" % \
+                        (locus_name, allele_id, seq_to_ids[allele_seq]), file=sys.stderr)
                     continue
                 if allele_seq not in seq_to_ids:
                     seq_to_ids[allele_seq] = [allele_id]
@@ -528,28 +528,28 @@
             allele_seqs = [[allele_id, ref_allele]] + allele_seqs
             alleles = [[allele_id, ref_allele_st]] + alleles
 
-        print >> sys.stderr, "%s: %d alleles with reference allele as %s" % (locus_name, len(alleles), CODIS_ref_name[locus_name])
+        print("%s: %d alleles with reference allele as %s" % (locus_name, len(alleles), CODIS_ref_name[locus_name]), file=sys.stderr)
         if verbose:
-            print >> sys.stderr, "\t", ref_allele_left, ref_allele, ref_allele_right
+            print("\t", ref_allele_left, ref_allele, ref_allele_right, file=sys.stderr)
             for allele_id, allele in alleles:
-                print >> sys.stderr, allele_id, "\t", allele
+                print(allele_id, "\t", allele, file=sys.stderr)
 
         # Create a backbone sequence
         assert len(alleles) > 0
         backbone_allele = deepcopy(alleles[-1][1])
         for allele_id, allele_st in reversed(alleles[:-1]):
             if verbose:
-                print >> sys.stderr
-                print >> sys.stderr, allele_id
-                print >> sys.stderr, "backbone         :", backbone_allele
-                print >> sys.stderr, "allele           :", allele_st
+                print(file=sys.stderr)
+                print(allele_id, file=sys.stderr)
+                print("backbone         :", backbone_allele, file=sys.stderr)
+                print("allele           :", allele_st, file=sys.stderr)
             backbone_allele = combine_alleles(backbone_allele, allele_st)
             msf_allele_seq, msf_backbone_seq = msf_alignment(backbone_allele, allele_st)
             if verbose:                
-                print >> sys.stderr, "combined backbone:", backbone_allele
-                print >> sys.stderr, "msf_allele_seq  :", msf_allele_seq
-                print >> sys.stderr, "msf_backbone_seq:", msf_backbone_seq
-                print >> sys.stderr
+                print("combined backbone:", backbone_allele, file=sys.stderr)
+                print("msf_allele_seq  :", msf_allele_seq, file=sys.stderr)
+                print("msf_backbone_seq:", msf_backbone_seq, file=sys.stderr)
+                print(file=sys.stderr)
 
         allele_dic = {}
         for allele_id, allele_seq in allele_seqs:
@@ -563,7 +563,7 @@
         # Sanity check
         assert len(allele_dic) == len(allele_repeat_msf)
         repeat_len = None
-        for allele_id, repeat_msf in allele_repeat_msf.items():
+        for allele_id, repeat_msf in list(allele_repeat_msf.items()):
             if not repeat_len:
                 repeat_len = len(repeat_msf)
             else:
@@ -572,11 +572,11 @@
         # Creat full multiple sequence alignment
         ref_allele_id = CODIS_ref_name[locus_name]
         allele_msf = {}
-        for allele_id, repeat_msf in allele_repeat_msf.items():
+        for allele_id, repeat_msf in list(allele_repeat_msf.items()):
             allele_msf[allele_id] = ref_allele_left + repeat_msf + ref_allele_right
 
         # Make sure the length of allele ID is short, less than 20 characters
-        max_allele_id_len = max([len(allele_id) for allele_id in allele_dic.keys()])
+        max_allele_id_len = max([len(allele_id) for allele_id in list(allele_dic.keys())])
         assert max_allele_id_len < 20
 
         # Write MSF (multiple sequence alignment file)
@@ -584,17 +584,17 @@
         msf_fname = "%s_gen.msf" % locus_name
         msf_file = open(msf_fname, 'w')
         for s in range(0, msf_len, 50):
-            for allele_id, msf in allele_msf.items():
+            for allele_id, msf in list(allele_msf.items()):
                 assert len(msf) == msf_len
                 allele_name = "%s*%s" % (locus_name, allele_id)
-                print >> msf_file, "%20s" % allele_name,
+                print("%20s" % allele_name, end=' ', file=msf_file)
                 for s2 in range(s, min(msf_len, s + 50), 10):
-                    print >> msf_file, " %s" % msf[s2:s2+10],
-                print >> msf_file
+                    print(" %s" % msf[s2:s2+10], end=' ', file=msf_file)
+                print(file=msf_file)
 
             if s + 50 >= msf_len:
                 break
-            print >> msf_file
+            print(file=msf_file)
         msf_file.close()
 
         # Write FASTA file
@@ -602,9 +602,9 @@
         fasta_file = open(fasta_fname, 'w')
         for allele_id, allele_seq in allele_seqs:
             gen_seq = ref_allele_left + allele_seq + ref_allele_right
-            print >> fasta_file, ">%s*%s %d bp" % (locus_name, allele_id, len(gen_seq))
+            print(">%s*%s %d bp" % (locus_name, allele_id, len(gen_seq)), file=fasta_file)
             for s in range(0, len(gen_seq), 60):
-                print >> fasta_file, gen_seq[s:s+60]
+                print(gen_seq[s:s+60], file=fasta_file)
         fasta_file.close()
 
 
--- hisat2.orig/hisatgenotype_scripts/hisatgenotype_extract_codis_data.py
+++ hisat2/hisatgenotype_scripts/hisatgenotype_extract_codis_data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
@@ -73,7 +73,7 @@
             continue
         url = "%s/str_%s.htm" % (base_url, locus_name)
         content = get_html(url).split("\r\n")
-        content = map(lambda x: x.strip(), content)
+        content = [x.strip() for x in content]
         content2 = []
         for line in content:
             if line.startswith("<t") or \
@@ -121,7 +121,7 @@
                 l += 1
 
         for allele_id, repeat_st in alleles:
-            print >> codis_data_file, "%s\t%s\t%s" % (locus_name, allele_id, repeat_st)
+            print("%s\t%s\t%s" % (locus_name, allele_id, repeat_st), file=codis_data_file)
 
     codis_data_file.close()
 
--- hisat2.orig/hisatgenotype_scripts/hisatgenotype_extract_cyp_data.py
+++ hisat2/hisatgenotype_scripts/hisatgenotype_extract_cyp_data.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2016, Raymon Cao <rcao5@jhu.edu> and Daehwan Kim <infphilo@gmail.com>
@@ -88,8 +88,8 @@
         # Open file to write on
         cyp_file = open("cyp_var_files/%s.var" % (cyp_gene_name), 'w')
         
-        print >> sys.stderr, cyp_url, cyp_gene_name
-        print >> cyp_file, cyp_url, cyp_gene_name
+        print(cyp_url, cyp_gene_name, file=sys.stderr)
+        print(cyp_url, cyp_gene_name, file=cyp_file)
 
         cyp_output = get_html(cyp_url)
         if cyp_output == "":
@@ -155,11 +155,11 @@
                 try:
                     assert len(varInfo) == 1
                 except:
-                    varInfo = filter(lambda a: a != 'None', varInfo)
+                    varInfo = [a for a in varInfo if a != 'None']
                 
         
-            if isinstance(alleleName, basestring):
-                print >> cyp_file, (str(alleleName) + "\t" + ','.join(varInfo))
+            if isinstance(alleleName, str):
+                print((str(alleleName) + "\t" + ','.join(varInfo)), file=cyp_file)
             
         cyp_file.close()
 
@@ -169,7 +169,7 @@
 """
 
 def checkNTloc(fasta_fileName,var_fileName,gene_name):
-    print "\nGene: %s" % gene_name
+    print("\nGene: %s" % gene_name)
     seq = ""
     for line in open(fasta_fileName,'r'):
         if line[0] == '>':
@@ -180,11 +180,11 @@
     cyp_var_dict = makeVarDict(cyp_var_file)
     cyp_var_file.close()
 
-    print "len:", len(seq)
+    print("len:", len(seq))
     varsPos = set()
     varsNeg = set()
 
-    for varList in cyp_var_dict.values():
+    for varList in list(cyp_var_dict.values()):
         for var in varList:
             if ">" in var: # is SNP
                 posNt = int(var[:-3])
@@ -219,7 +219,7 @@
                     try:
                         assert posNt[1] - posNt[0] + 1 == len(ntDel)
                     except AssertionError:
-                        print "Incorrect deletion format: %s , skipping variation" % (var)
+                        print("Incorrect deletion format: %s , skipping variation" % (var))
                         '''sys.exit(1)'''
                         continue
                     ntDelList = list(ntDel)
@@ -251,12 +251,12 @@
                 align_score += 1
 
         scorePos[i] = align_score
-    oSetPos = max(scorePos.iteritems(), key=operator.itemgetter(1))[0]
-    print "Positive postitions offset: %d" % oSetPos
-    print "Score: %d out of %d\n" % (scorePos[oSetPos], len(varsPos))
+    oSetPos = max(iter(scorePos.items()), key=operator.itemgetter(1))[0]
+    print("Positive postitions offset: %d" % oSetPos)
+    print("Score: %d out of %d\n" % (scorePos[oSetPos], len(varsPos)))
     
 
-    print "Checking negative position offset: %d" % (oSetPos + 1)
+    print("Checking negative position offset: %d" % (oSetPos + 1))
     align_score = 0
     oSetNeg = oSetPos + 1
     for var in varsNeg:
@@ -270,7 +270,7 @@
         
         if seq[pos + oSetNeg] == base:
             align_score += 1
-    print "Score: %d out of %d\n\n" % (align_score, len(varsNeg))
+    print("Score: %d out of %d\n\n" % (align_score, len(varsNeg)))
 
     if len(varsNeg) == 0 and len(varsPos) != 0:
         return oSetPos, oSetNeg, float(scorePos[oSetPos])/float(len(varsPos)), 1.0, float(scorePos[oSetPos] + align_score)/float(len(varsPos) + len(varsNeg))
@@ -344,7 +344,7 @@
             assert not alleleName in alleleVarDict
             alleleVarDict[alleleName] = set(varList)
         except:
-            print >> sys.stdout, ("Warning, %s allele is already represented" % alleleName)
+            print(("Warning, %s allele is already represented" % alleleName), file=sys.stdout)
             alleleVarDict[alleleName] = alleleVarDict[alleleName] | set(varList)
 
     return alleleVarDict
@@ -386,7 +386,7 @@
         if len(blast_allele_var) > 0:
             cyp_var_dict[gene_name.upper() + '*REFGRCH38P7'] = set(blast_allele_var)
     except IOError:
-        print('\t%s blast file was skipped.' % gene_name)
+        print(('\t%s blast file was skipped.' % gene_name))
 
     cyp_faFile = open("cyp_fasta/%s.fasta" % gene_name,'r')
     cyp_seq = extractSeq(cyp_faFile)
@@ -398,7 +398,7 @@
 
     # Building backbone structure (augment length with insertions)
     longestIns = {} # { key = position : value = length }
-    for allele,varList in cyp_var_dict.items():
+    for allele,varList in list(cyp_var_dict.items()):
         for var in varList:
             if not "ins" in var:
                 continue
@@ -411,7 +411,7 @@
             try:
                 assert correctFormat
             except:
-                print >> sys.stdout, "\tIncorrect format for insertion: variation %s on allele %s" % (var, allele)
+                print("\tIncorrect format for insertion: variation %s on allele %s" % (var, allele), file=sys.stdout)
                 continue
 
             # convert to position in string
@@ -450,7 +450,7 @@
     map_cyp = create_map(preBackbone_seq) # { Index of bp in original seq : Actual index in string }
     
 
-    for allele,varList in cyp_var_dict.items():
+    for allele,varList in list(cyp_var_dict.items()):
         for var in varList:
             isSnp = False
             isDel = False
@@ -481,15 +481,15 @@
                         pos = pos + oSetNeg
 
                 if pos < 0 or pos > len(cyp_seq) - 1:
-                    print >> sys.stdout, "\tWarning: position %d out of bounds" % (dbPos)
-                    print >> sys.stdout, "\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele)
+                    print("\tWarning: position %d out of bounds" % (dbPos), file=sys.stdout)
+                    print("\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele), file=sys.stdout)
                     continue
                     
                 try:
                     assert(preBackbone_seq[map_cyp[pos]] == ntChange[0]) # nt at pos in seq must match database
                 except:
-                    print >> sys.stdout, "\tWarning: position %d in sequence contains %s, but expected %s from database" % (dbPos, preBackbone_seq[map_cyp[pos]], ntChange[0])
-                    print >> sys.stdout, "\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele)
+                    print("\tWarning: position %d in sequence contains %s, but expected %s from database" % (dbPos, preBackbone_seq[map_cyp[pos]], ntChange[0]), file=sys.stdout)
+                    print("\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele), file=sys.stdout)
                     continue
                 
                 # Adding to msf table
@@ -519,8 +519,8 @@
                 skipDel = False
                 for i in range(len(pos)):
                     if pos[i] < 0 or pos[i] > len(cyp_seq) - 1:
-                        print >> sys.stdout, "\tWarning: position %d out of bounds" % (dbPos[i])
-                        print >> sys.stdout, "\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele)
+                        print("\tWarning: position %d out of bounds" % (dbPos[i]), file=sys.stdout)
+                        print("\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele), file=sys.stdout)
                         skipDel = True
 
                 if skipDel:
@@ -530,15 +530,15 @@
                 try:
                     assert pos[1] - pos[0] + 1 == len(ntDel)
                 except:
-                    print >> sys.stdout, "\tIncorrect deletion data with %s on allele %s. Skipping variation." % (var, allele)
+                    print("\tIncorrect deletion data with %s on allele %s. Skipping variation." % (var, allele), file=sys.stdout)
                     continue
                             
                 try:
                     assert preBackbone_seq[ map_cyp[pos[0]] : map_cyp[pos[1]] + 1 ] == ntDel
                 except:
-                    print >> sys.stdout, "\tWarning, positions %d to %d in sequence contains %s, but expected %s from database" % \
-                          (dbPos[0], dbPos[1], preBackbone_seq[ map_cyp[pos[0]] : map_cyp[pos[1]] + 1 ], ntDel)
-                    print >> sys.stdout, "\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele)
+                    print("\tWarning, positions %d to %d in sequence contains %s, but expected %s from database" % \
+                          (dbPos[0], dbPos[1], preBackbone_seq[ map_cyp[pos[0]] : map_cyp[pos[1]] + 1 ], ntDel), file=sys.stdout)
+                    print("\t\tError occured on variation %s on allele %s. Skipping variation." % (var, allele), file=sys.stdout)
                     continue
 
 
@@ -559,7 +559,7 @@
                 try:
                     assert pos[1] - pos[0] == 1
                 except AssertionError:
-                    print >> sys.stdout, "\tIncorrect insertion data with %s on allele %s. Skipping variation." % (var, allele)
+                    print("\tIncorrect insertion data with %s on allele %s. Skipping variation." % (var, allele), file=sys.stdout)
                     continue 
                 ntIns = var.split('ins')[1]
                 for nt in ntIns:
@@ -575,8 +575,8 @@
                 skipIns = False
                 for i in range(len(pos)):
                     if pos[i] < 0 or pos[i] > len(cyp_seq) - 1:
-                        print >> sys.stdout, "Warning: position %d out of bounds" % (dbPos[i])
-                        print >> sys.stdout, "\tError occured on variation %s on allele %s. Skipping variation." % (var, allele)
+                        print("Warning: position %d out of bounds" % (dbPos[i]), file=sys.stdout)
+                        print("\tError occured on variation %s on allele %s. Skipping variation." % (var, allele), file=sys.stdout)
                         skipIns = True
 
                 if skipIns:
@@ -597,7 +597,7 @@
 
     # Sanity checking
     seq_len = 0
-    for allele, msf_seq in msfTable.items():
+    for allele, msf_seq in list(msfTable.items()):
         if seq_len == 0:
             seq_len = len(msf_seq)
         else:
@@ -607,7 +607,7 @@
     # Follow MSF style of IMGT/HLA database
     msfFile = open('cyp_msf/%s_gen.msf' % gene_name[3:].upper(),'w')
     for i in range(0, seq_len, 50):
-        for allele, msf_seq in msfTable.items():
+        for allele, msf_seq in list(msfTable.items()):
             output = "%12s" % allele[3:].upper()
             for j in range(i, i+50, 10):
                 if j >= seq_len:
@@ -617,8 +617,8 @@
                 else:
                     output += " "
                 output += msf_seq[j:j+10]
-            print >> msfFile, output
-        print >> msfFile
+            print(output, file=msfFile)
+        print(file=msfFile)
 
     msfFile.close()
 
@@ -636,7 +636,7 @@
     for gene_name in gene_names:
         oSetPos, oSetNeg, oSetScorePos, oSetScoreNeg, tot_score = checkNTloc("cyp_fasta/%s.fasta" % gene_name,"cyp_var_files/%s.var" % gene_name,gene_name)
         if not (tot_score >= 0.95):
-            print "\tLess than 95% match, skipping gene."
+            print("\tLess than 95% match, skipping gene.")
             continue
         
         makeMSF(gene_name, oSetPos, oSetNeg)
@@ -750,7 +750,7 @@
         msf_dict = readMSF(msf_file) # { Allele name : MSF sequence }
         msf_file.close()
     except IOError:
-        print("\t%s msf file was skipped.\n" % (gene_name))
+        print(("\t%s msf file was skipped.\n" % (gene_name)))
         return
 
     var_file = open(var_fname,'r')
@@ -762,7 +762,7 @@
         if len(blast_allele_var) > 0:
             var_dict[gene_name.upper() + '*REFGRCH38P7'] = set(blast_allele_var)
     except IOError:
-        print('\t%s blast file was skipped.' % gene_name)
+        print(('\t%s blast file was skipped.' % gene_name))
     
     fa_file = open(fasta_filename,'r')
     oriSeq = extractSeq(fa_file)
@@ -771,7 +771,7 @@
 
     # Find reference allele
     ref_allele = ''
-    for allele_name in var_dict.keys():
+    for allele_name in list(var_dict.keys()):
         if len(var_dict[allele_name]) == 1 and list(var_dict[allele_name])[0] == "None":
             assert ref_allele == ''
             ref_allele = allele_name
@@ -783,25 +783,25 @@
 
     try:
         assert msf_dict[ref_allele].replace('.','') == oriSeq
-        print("Sequences match for reference allele %s" % ref_allele)
+        print(("Sequences match for reference allele %s" % ref_allele))
     except AssertionError:
-        print("Warning: sequences do not match for reference allele %s" % ref_allele)
+        print(("Warning: sequences do not match for reference allele %s" % ref_allele))
         sys.exit(1)
 
 
     # Check all alleles are included
     try:
-        assert set([k.upper() for k in msf_dict.keys()]).issubset(set([k.upper() for k in var_dict.keys()]))
+        assert set([k.upper() for k in list(msf_dict.keys())]).issubset(set([k.upper() for k in list(var_dict.keys())]))
     except AssertionError:
         print("Extra alleles in MSF!\n")
-        print(sorted(msf_dict.keys()))
+        print((sorted(msf_dict.keys())))
         print("\n\n")
-        print(sorted(var_dict.keys()))
+        print((sorted(var_dict.keys())))
         sys.exit(1)
 
 
     # Convert from database positions to sequence positions (using offset)
-    for allele, var_list in var_dict.items():
+    for allele, var_list in list(var_dict.items()):
         oSet_var_list = []
         for var in var_list:
             if '>' in var: # snp
@@ -818,7 +818,7 @@
                 if pos < 0 or pos > len(oriSeq) - 1: # out of bounds
                     continue
                 if oriSeq[pos] != ntSnp[0]: # mismatch
-                    print('\tMismatch on variation %s' % var)
+                    print(('\tMismatch on variation %s' % var))
                     continue
 
                 oSet_var = str(pos) + ntSnp[0] + '>' + ntSnp[1]
@@ -844,7 +844,7 @@
                         if pos[i] < 0 or pos[i] > len(oriSeq) - 1: # out of bounds
                             skipDel = True
                 if (oriSeq[ pos[0] : pos[1] + 1 ] != ntDel): # mismatch
-                    print('\tMismatch on variation %s' % var)
+                    print(('\tMismatch on variation %s' % var))
                     continue
 
                 if skipDel:
@@ -869,7 +869,7 @@
                 try:
                     assert pos[1] - pos[0] == 1
                 except AssertionError:
-                    print('\tIncorrect insertion format on variation %s' % var)
+                    print(('\tIncorrect insertion format on variation %s' % var))
                     continue
                 ntIns = var.split('ins')[1]
                 for nt in ntIns:
@@ -901,7 +901,7 @@
 
     # Check variants created from MSF file against variants list
     num_correct_alleles = 0
-    for allele, msf_seq in msf_dict.items():
+    for allele, msf_seq in list(msf_dict.items()):
         if allele == ref_allele:
             num_correct_alleles += 1
             continue
@@ -914,12 +914,12 @@
         except AssertionError:
             incorrect_msf_entries.append(allele)
             print('\n')
-            print('\t\tVar File:\t' + str(var_dict[allele]))
-            print('\t\tMSF File:\t' + str(set(msf_var_list)))
-            print('\t\tDifference:\t' + str(var_dict[allele] - set(msf_var_list)) + '\n')
+            print(('\t\tVar File:\t' + str(var_dict[allele])))
+            print(('\t\tMSF File:\t' + str(set(msf_var_list))))
+            print(('\t\tDifference:\t' + str(var_dict[allele] - set(msf_var_list)) + '\n'))
             '''sys.exit(1)'''
 
-    print("\t%d out of %d alleles have correct msf sequences\n" % (num_correct_alleles, len(msf_dict)))
+    print(("\t%d out of %d alleles have correct msf sequences\n" % (num_correct_alleles, len(msf_dict))))
 
 def check_msf_files():
     print("\nChecking MSF files:")
@@ -927,7 +927,7 @@
     for gene_name in gene_names:
         checkMSFfile(gene_name, 'cyp_msf/%s_gen.msf' % gene_name[3:].upper(), 'cyp_var_files/%s.var' % gene_name, 'cyp_fasta/%s.fasta' % gene_name)
 
-    print('\n\n%d incorrect msf entries on alleles %s\n' % (len(incorrect_msf_entries), str(incorrect_msf_entries)))
+    print(('\n\n%d incorrect msf entries on alleles %s\n' % (len(incorrect_msf_entries), str(incorrect_msf_entries))))
 
 
 """
@@ -940,19 +940,19 @@
         msf_seq_dict = readMSF(msf_file)
         msf_file.close()
     except IOError:
-        print("\t%s msf file was skipped." % (gene_name))
+        print(("\t%s msf file was skipped." % (gene_name)))
         return
 
     gen_fasta_file = open('gen_fasta/%s_gen.fasta' % gene_name[3:].upper(), 'w')
     
-    for allele, seq in msf_seq_dict.items():
+    for allele, seq in list(msf_seq_dict.items()):
         seq = seq.replace('.','')
-        print >> gen_fasta_file, ('>' + allele[3:].upper() + ' ' + str(len(seq)) + ' bp')
+        print(('>' + allele[3:].upper() + ' ' + str(len(seq)) + ' bp'), file=gen_fasta_file)
         seq_lines = [seq[i:i+line_length] for i in range(0, len(seq), line_length)]
-        print >> gen_fasta_file, ('\n'.join(seq_lines))
+        print(('\n'.join(seq_lines)), file=gen_fasta_file)
 
     gen_fasta_file.close()
-    print('%s_gen.fasta completed' % gene_name)
+    print(('%s_gen.fasta completed' % gene_name))
 
 def build_gen_fasta_files():
     os.system('mkdir gen_fasta')
--- hisat2.orig/hisatgenotype_scripts/hisatgenotype_locus_samples.py
+++ hisat2/hisatgenotype_scripts/hisatgenotype_locus_samples.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 #
 # Copyright 2017, Daehwan Kim <infphilo@gmail.com>
@@ -124,10 +124,10 @@
     if not os.path.exists(read_fname_1) or not os.path.exists(read_fname_2):
         return
     lock.acquire()
-    print >> sys.stderr, genome
+    print(genome, file=sys.stderr)
     lock.release()
 
-    for family, loci in region_list.items():
+    for family, loci in list(region_list.items()):
         test_hla_cmd = ["hisatgenotype_locus.py",
                         "--base", family]
         if len(loci) > 0:
@@ -144,7 +144,7 @@
 
         if verbose:
             lock.acquire()
-            print >> sys.stderr, ' '.join(test_hla_cmd)
+            print(' '.join(test_hla_cmd), file=sys.stderr)
             lock.release()
 
         proc = subprocess.Popen(test_hla_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@@ -160,7 +160,7 @@
 
     lock.acquire()
     for allele, abundance in output_list:
-        print >> sys.stdout, "%s\t%s\t%.2f" % (genome, allele, abundance)
+        print("%s\t%s\t%.2f" % (genome, allele, abundance), file=sys.stdout)
         genotype_results.append([genome, allele, abundance])
     sys.stdout.flush()
     lock.release()
@@ -190,7 +190,7 @@
                                                 verbose)
     
     if not os.path.exists(read_dir):
-        print >> sys.stderr, "Error: %s does not exist." % read_dir
+        print("Error: %s does not exist." % read_dir, file=sys.stderr)
         sys.exit(1)
 
     if out_dir != "" and not os.path.exists(out_dir):
@@ -236,10 +236,10 @@
             #    continue
             genotype_dic[region][genome].append([allele, abundance])
 
-        for region, region_genotype in genotype_dic.items():
-            print >> sys.stderr, region
+        for region, region_genotype in list(genotype_dic.items()):
+            print(region, file=sys.stderr)
             included, total = 0, 0
-            for genome, genome_alleles in region_genotype.items():
+            for genome, genome_alleles in list(region_genotype.items()):
                 genome_alleles = set([allele for allele, _ in genome_alleles])
                 if "father" in CEPH_pedigree[genome]:
                     assert "mother" in CEPH_pedigree[genome]
@@ -254,12 +254,12 @@
                     for parent_allele, _ in region_genotype[parents[0]]:
                         for parent_allele2, _ in region_genotype[parents[1]]:
                             parent_allele_sets.append(set([parent_allele, parent_allele2]))
-                print >> sys.stderr, "\t", genome, genome_alleles, parent_allele_sets
+                print("\t", genome, genome_alleles, parent_allele_sets, file=sys.stderr)
                 if len(parent_allele_sets) > 0:
                     total += 1
                     if genome_alleles in parent_allele_sets:
                         included += 1
-            print >> sys.stderr, "\t%d / %d" % (included, total)
+            print("\t%d / %d" % (included, total), file=sys.stderr)
 
 
 """
@@ -299,7 +299,7 @@
     parser.add_argument("--max-sample",
                         dest="max_sample",
                         type=int,
-                        default=sys.maxint,
+                        default=sys.maxsize,
                         help="Number of samples to be analyzed (default: sys.maxint)")
     parser.add_argument("--out-dir",
                         dest="out_dir",
@@ -318,11 +318,11 @@
     args = parser.parse_args()
 
     if args.read_dir == "":
-        print >> sys.stderr, "Error: please specify --read-dir."
+        print("Error: please specify --read-dir.", file=sys.stderr)
         sys.exit(1)
 
     if not args.reference_type in ["gene", "chromosome", "genome"]:
-        print >> sys.stderr, "Error: --reference-type (%s) must be one of gene, chromosome, and genome." % (args.reference_type)
+        print("Error: --reference-type (%s) must be one of gene, chromosome, and genome." % (args.reference_type), file=sys.stderr)
         sys.exit(1)
 
     region_list = {}
@@ -330,7 +330,7 @@
         for region in args.region_list.split(','):
             region = region.split('.')
             if len(region) < 1 or len(region) > 2:
-                print >> sys.stderr, "Error: --region-list is incorrectly formatted."
+                print("Error: --region-list is incorrectly formatted.", file=sys.stderr)
                 sys.exit(1)
                 
             family = region[0].lower()
--- hisat2.orig/scripts/sa.py
+++ hisat2/scripts/sa.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 
 """
 sa.py
@@ -15,7 +15,7 @@
 def loadBowtieSa(fh):
 	""" Load a .sa file from handle into an array of ints """
 	nsa = struct.unpack('I', fh.read(4))[0]
-	return [ struct.unpack('I', fh.read(4))[0] for i in xrange(0, nsa) ]
+	return [ struct.unpack('I', fh.read(4))[0] for i in range(0, nsa) ]
 
 def loadBowtieSaFilename(fn):
 	""" Load a .sa file from filename into an array of ints """
@@ -58,7 +58,7 @@
 		# Suffix array is in sas; note that $ is considered greater than all
 		# other characters
 		if ref is not None:
-			for i in xrange(1, len(sas)):
+			for i in range(1, len(sas)):
 				sa1, sa2 = sas[i-1], sas[i]
 				assert sa1 != sa2
 				# Sanity check that suffixes are really in order
@@ -76,4 +76,4 @@
 			assert sas[-1] == len(ref)
 	
 	go()
-	
\ No newline at end of file
+	
--- hisat2.orig/scripts/validate_repeat.py
+++ hisat2/scripts/validate_repeat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 import sys, subprocess
 import re
 from argparse import ArgumentParser, FileType
@@ -203,7 +203,7 @@
                     seq = reverse_complement(seq)
 
                 if seq != repeat_sequence:
-                    print 'Mismatch', seq, repeat_sequence, snp_cnt, coord, snp_id_list, repeat_length
+                    print('Mismatch', seq, repeat_sequence, snp_cnt, coord, snp_id_list, repeat_length)
                     
     fp.close()
 
