Оптимизация программ на Python

Как ускорить программу на Python (pdf)

Бенчмарки

Ниже собраны программы, измеряющие время работы разных вариантов кода на Python. В начале каждой программы приведён пример вывода. Запустите их у себя, чтобы получить время работы на своём компьютере.

print(..., file=f) vs. f.write(...)

# Example output:
# Read 1000000 numbers using "int(input())" in 1.8937892050016671 seconds
# Read 1000000 numbers using "int(sys.stdin.readline())" in 0.6622662610025145 seconds

import os
import random
import sys
import time

DATA_SIZE = 10 ** 6
FILE_NAME = 'tmp_feocuysg.txt'
DATA = list(range(DATA_SIZE))


original_stdin = None


def prepare_file():
    global original_stdin

    random.shuffle(DATA)
    with open(FILE_NAME, 'w') as f:
        for x in DATA:
            f.write(str(x) + '\n')
    original_stdin, sys.stdin = sys.stdin, open(FILE_NAME)


def cleanup_file():
    global original_stdin

    sys.stdin.close()
    original_stdin, sys.stdin = None, original_stdin
    os.remove(FILE_NAME)


def test_input():
    prepare_file()
    start = time.perf_counter()

    x = 0
    for i in range(DATA_SIZE):
        x += int(input())

    elapsed = time.perf_counter() - start
    cleanup_file()
    return elapsed


def test_readline():
    prepare_file()
    start = time.perf_counter()

    x = 0
    for i in range(DATA_SIZE):
        x += int(sys.stdin.readline())

    elapsed = time.perf_counter() - start
    cleanup_file()
    return elapsed


elapsed = test_input()
print('Read {} numbers using "int(input())" in {} seconds'
      .format(DATA_SIZE, elapsed))

elapsed = test_readline()
print('Read {} numbers using "int(sys.stdin.readline())" in {} seconds'
      .format(DATA_SIZE, elapsed))
            

input() vs. stdin.readline()

# Example output
# Wrote 1000000 numbers using "print(x, file=f)" in 1.6223861339967698 seconds
# Wrote 1000000 numbers using "f.write(str(x) + '\n')" in 0.7785177710029529 seconds

import os
import random
import time

DATA_SIZE = 10 ** 6
FILE_NAME = 'tmp_caiorsg.txt'
DATA = list(range(DATA_SIZE))

def prepare_data():
    random.shuffle(DATA)
    return DATA


def test_print():
    data = prepare_data()
    with open(FILE_NAME, 'w') as f:
        start = time.perf_counter()
        for x in data:
            print(x, file=f)
        elapsed = time.perf_counter() - start
    os.remove(FILE_NAME)
    return elapsed


def test_write():
    data = prepare_data()
    with open(FILE_NAME, 'w') as f:
        start = time.perf_counter()
        for x in data:
            f.write(str(x) + '\n')
        elapsed = time.perf_counter() - start
    os.remove(FILE_NAME)
    return elapsed


elapsed = test_print()
print('Wrote {} numbers using "print(x, file=f)" in {} seconds'
      .format(DATA_SIZE, elapsed))

elapsed = test_write()
print('Wrote {} numbers using "f.write(str(x) + \'\\n\')" in {} seconds'
      .format(DATA_SIZE, elapsed))
            

Код на верхнем уровне vs. Код в функции

# Example output:
# Sum of 10000000 numbers on module level in 5.0005733920042985 seconds
# Sum of 10000000 numbers on function level in 3.303951841997332 seconds

import os
import random
import time

DATA_SIZE = 10 ** 7
DATA = list(range(DATA_SIZE))


def prepare_data():
    random.shuffle(DATA)
    return DATA


def main():
    data = prepare_data()
    start = time.perf_counter()

    a = 0
    for i in range(DATA_SIZE):
        a += data[i] if i&1 else -data[i]

    return time.perf_counter() - start


# Top-level
data = prepare_data()
start = time.perf_counter()

a = 0
for i in range(DATA_SIZE):
    a += data[i] if i&1 else -data[i]

elapsed = time.perf_counter() - start
print('Sum of {} numbers on module level in {} seconds'
      .format(DATA_SIZE, elapsed))


# Function
elapsed = main()
print('Sum of {} numbers on function level in {} seconds'
      .format(DATA_SIZE, elapsed))
            

a.append(...) vs. a[i] = ... vs. a = [... for i in range(...)]

# Example output:
# Read 1000000 numbers into array using "a.append(int(f.readline()))" in 0.5692962329994771 seconds
# Read 1000000 numbers into array using "a[i] = int(f.readline())" in 0.5054713299978175 seconds
# Read 1000000 numbers into array using "a = [int(f.readline()) for i in range(n)]" in 0.5023784420045558 seconds

import os
import random
import time

DATA_SIZE = 10 ** 6
FILE_NAME = 'tmp_fovbeas.txt'
DATA = list(range(DATA_SIZE))


def prepare_file():
    random.shuffle(DATA)
    with open(FILE_NAME, 'w') as f:
        for x in DATA:
            f.write(str(x) + '\n')
    return open(FILE_NAME)


def cleanup_file():
    os.remove(FILE_NAME)


def test_append():
    with prepare_file() as f:
        start = time.perf_counter()

        a = []
        for i in range(DATA_SIZE):
            a.append(int(f.readline()))

        elapsed = time.perf_counter() - start
    cleanup_file()
    return elapsed


def test_assign():
    with prepare_file() as f:
        start = time.perf_counter()

        a = [0] * DATA_SIZE
        for i in range(DATA_SIZE):
            a[i] = int(f.readline())

        elapsed = time.perf_counter() - start
    cleanup_file()
    return elapsed


def test_generator():
    with prepare_file() as f:
        start = time.perf_counter()

        a = [int(f.readline()) for i in range(DATA_SIZE)]

        elapsed = time.perf_counter() - start
    cleanup_file()
    return elapsed


elapsed = test_append()
print('Read {} numbers into array using "a.append(int(f.readline()))" in {} seconds'
      .format(DATA_SIZE, elapsed))

elapsed = test_assign()
print('Read {} numbers into array using "a[i] = int(f.readline())" in {} seconds'
      .format(DATA_SIZE, elapsed))

elapsed = test_generator()
print('Read {} numbers into array using "a = [int(f.readline()) for i in range(n)]" in {} seconds'
      .format(DATA_SIZE, elapsed))