fork download
  1. import os
  2. from multiprocessing import Process, cpu_count
  3. from threading import Thread
  4. from queue import Queue
  5.  
  6. # -------------------------------
  7. # CONFIG
  8. # -------------------------------
  9. SEARCH_DIR = "data"
  10. KEYWORD = "error"
  11. THREADS_PER_PROCESS = 4
  12.  
  13.  
  14. # -------------------------------
  15. # THREAD WORKER
  16. # -------------------------------
  17. def thread_search(queue, keyword, process_id, thread_id):
  18. while True:
  19. file_path = queue.get()
  20. if file_path is None:
  21. break
  22.  
  23. try:
  24. with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
  25. for line_no, line in enumerate(f, start=1):
  26. if keyword in line:
  27. print(
  28. f"[P{process_id}-T{thread_id}] "
  29. f"{file_path}:{line_no}: {line.strip()}"
  30. )
  31. except Exception as e:
  32. print(f"[P{process_id}-T{thread_id}] Error: {e}")
  33.  
  34. queue.task_done()
  35.  
  36.  
  37. # -------------------------------
  38. # PROCESS WORKER
  39. # -------------------------------
  40. def process_search(files, keyword, process_id):
  41. queue = Queue()
  42. threads = []
  43.  
  44. for i in range(THREADS_PER_PROCESS):
  45. t = Thread(
  46. target=thread_search,
  47. args=(queue, keyword, process_id, i)
  48. )
  49. t.start()
  50. threads.append(t)
  51.  
  52. for f in files:
  53. queue.put(f)
  54.  
  55. queue.join()
  56.  
  57. for _ in threads:
  58. queue.put(None)
  59.  
  60. for t in threads:
  61. t.join()
  62.  
  63. print(f"Process {process_id} finished")
  64.  
  65.  
  66. # -------------------------------
  67. # SPLIT LIST
  68. # -------------------------------
  69. def split_list(lst, n):
  70. k = len(lst) // n
  71. r = len(lst) % n
  72. chunks = []
  73. start = 0
  74.  
  75. for i in range(n):
  76. end = start + k + (1 if i < r else 0)
  77. chunks.append(lst[start:end])
  78. start = end
  79.  
  80. return chunks
  81.  
  82.  
  83. # -------------------------------
  84. # MAIN
  85. # -------------------------------
  86. if __name__ == "__main__":
  87. # Collect all text files
  88. all_files = []
  89. for root, _, files in os.walk(SEARCH_DIR):
  90. for f in files:
  91. if f.endswith(".txt"):
  92. all_files.append(os.path.join(root, f))
  93.  
  94. if not all_files:
  95. print("No files found")
  96. exit()
  97.  
  98. num_processes = min(cpu_count(), len(all_files))
  99. print(f"Using {num_processes} processes")
  100. print(f"{THREADS_PER_PROCESS} threads per process")
  101. print(f"Searching for keyword: '{KEYWORD}'\n")
  102.  
  103. chunks = split_list(all_files, num_processes)
  104. processes = []
  105.  
  106. for i in range(num_processes):
  107. p = Process(
  108. target=process_search,
  109. args=(chunks[i], KEYWORD, i)
  110. )
  111. p.start()
  112. processes.append(p)
  113.  
  114. for p in processes:
  115. p.join()
  116.  
  117. print("\nSearch completed ✔")
Success #stdin #stdout 0.21s 16044KB
stdin
Standard input is empty
stdout
No files found