-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeedtest_hash.py
More file actions
executable file
·154 lines (134 loc) · 5.22 KB
/
speedtest_hash.py
File metadata and controls
executable file
·154 lines (134 loc) · 5.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from os import devnull, system, sync
import timeit
from crc32c import crc32c # crc32c for intel
from zlib import crc32 # standard crc32
import xxhash
import re # regex
import itertools
from time import sleep, mktime # For timeouts and time output
from datetime import datetime
from pathlib import Path
# ##################################################################################################
# ##############################################################################
# Setting functions:
# ##############################################################################
# ##################################################################################################
def search_files(where):
""" DEF:
[0] full source path
[1] file name
[2] basename
[3] extension
[4] size
[5] mod-date
[6] hash
[7] full target path(s) (list)
"""
found_files = []
recurse = '**/*'
for i in where:
for j in Path(i).glob(recurse):
if j.is_file():
j = Path(j).resolve()
j_stat = j.stat()
found_files += [[str(j),
j.name,
j.stem,
j.suffix,
j_stat.st_size,
j_stat.st_mtime,
"XYZ",
[]]]
found_files = sorted(found_files, key=lambda attris: attris[0]) # For %c in param['naming_file'].
# print(len(found_files))
return found_files
def get_source_hashes_CRC32C(what):
blocksize = 128*256
for i in what:
with Path(i[0]).open("rb") as file:
crcvalue = 0
while True:
buf = file.read(blocksize)
if not buf:
break
crcvalue = (crc32c(buf, crcvalue) & 0xffffffff)
hashstring = f'{crcvalue:x}'
i[6] = hashstring
# return what
def get_source_hashes_CRC32(what):
blocksize = 128*256
for i in what:
with Path(i[0]).open("rb") as file:
crcvalue = 0
while True:
buf = file.read(blocksize)
if not buf:
break
crcvalue = (crc32(buf, crcvalue) & 0xffffffff)
hashstring = f'{crcvalue:x}'
i[6] = hashstring
# return what
def get_source_hashes_xxhash_256(what):
blocksize = 128*256
xxh = xxhash.xxh3_64()
for i in what:
xxh.reset()
with Path(i[0]).open("rb") as file:
while True:
buf = file.read(blocksize)
if not buf:
break
xxh.update(buf)
crcvalue = xxh.hexdigest()
i[6] = crcvalue
# return what
def get_source_hashes_xxhash_4096(what):
blocksize = 1024*4096
xxh = xxhash.xxh3_64()
for i in what:
xxh.reset()
with Path(i[0]).open("rb") as file:
while True:
buf = file.read(blocksize)
if not buf:
break
xxh.update(buf)
crcvalue = xxh.hexdigest()
i[6] = crcvalue
# return what
# ##################################################################################################
# ##############################################################################
# Chronology / Workflow:
# ##############################################################################
# ##################################################################################################
# NOTE: It is best to test this on your machine with your most used sources (e.g. SD-cards).
# Results vary greatly: With RAM, 128x256bit are faster than 1024x4096, but this tends to get
# reversed more the slower the cards are.
source_files = search_files(["/tmp/usb1/DCIM", "/tmp/usb2/DCIM"])
print("Files:\t" + str(len(source_files)))
# Just to give the computer some rest between iterations:
sleep(15)
time_crc32c = timeit.timeit('get_source_hashes_CRC32C(source_files)',
'from __main__ import source_files, get_source_hashes_CRC32C',
number=1)
print("CRC32C:\t" + str(time_crc32c))
# Just to give the computer some rest between iterations:
sleep(15)
time_crc32 = timeit.timeit('get_source_hashes_CRC32(source_files)',
'from __main__ import source_files, get_source_hashes_CRC32',
number=1)
print("CRC32:\t" + str(time_crc32))
# Just to give the computer some rest between iterations:
sleep(15)
time_xxhash256 = timeit.timeit('get_source_hashes_xxhash_256(source_files)',
'from __main__ import source_files, get_source_hashes_xxhash_256',
number=1)
print("xxHash (256):\t" + str(time_xxhash256))
# Just to give the computer some rest between iterations:
sleep(15)
time_xxhash4096 = timeit.timeit('get_source_hashes_xxhash_4096(source_files)',
'from __main__ import source_files, get_source_hashes_xxhash_4096',
number=1)
print("xxHash (4096):\t" + str(time_xxhash4096))