feat: stats and AI doc strings

This commit is contained in:
jstoobysmith 2024-07-15 14:52:50 -04:00
parent a17c98e922
commit d6460e62bc
19 changed files with 162 additions and 40 deletions

View file

@ -37,6 +37,37 @@ Run using
lake exe type_former_lint
```
## stats.sh
Outputs statistics for HepLean.
Run using
```
lake exe stats
```
## openAI_doc_check.lean
Uses openAI's API to check for spelling mistakes etc. in doc strings.
This code needs `https://github.com/jstoobysmith/Lean_llm_fork` to be installed.
It also needs the enviroment variable `OPENAI_API_KEY` defined using e.g.
```
export OPENAI_API_KEY=<Your-openAI-key>
```
Run on a specific file using
```
lake exe openAI_doc_check <module_name>
```
where `<module_name>` is e.g. `HepLean.SpaceTime.Basic`.
Run on a random file using
```
lake exe openAI_doc_check random
```
## lint-all.sh
Performs all linting checks on HepLean.

View file

@ -83,7 +83,8 @@ def hepLeanLintFile (path : FilePath) : IO Bool := do
let allOutput := (Array.map (fun lint ↦
(Array.map (fun (e, n, c) ↦ HepLeanErrorContext.mk e n c path)) (lint lines)))
#[doubleEmptyLineLinter, doubleSpaceLinter, substringLinter ".-/", substringLinter " )",
substringLinter "( ", substringLinter "=by", substringLinter " def "]
substringLinter "( ", substringLinter "=by", substringLinter " def ",
substringLinter "/-- We "]
let errors := allOutput.flatten
printErrors errors
return errors.size > 0

View file

@ -0,0 +1,79 @@
/-
Copyright (c) 2024 Joseph Tooby-Smith. All rights reserved.
Released under Apache 2.0 license.
Authors: Joseph Tooby-Smith
-/
import Batteries.Lean.IO.Process
import Lean
import Lean.Parser
import Lean.Data.Json
import Mathlib.Lean.CoreM
import LLM.GPT.Json
import LLM.GPT.API
/-!
# HepLean OpenAI doc check
This file uses the openAI API to check the doc strings of definitions and theorems in a
Lean 4 file.
It currently depends on `https://github.com/jstoobysmith/Lean_llm_fork` being imported,
which is currently not by default.
-/
open Lean
open LLM
open System Meta
def getDocString (constName : Array ConstantInfo) : MetaM String := do
let env ← getEnv
let mut docString := ""
for c in constName do
if ¬ Lean.Name.isInternalDetail c.name then
let doc ← Lean.findDocString? env c.name
match doc with
| some x => docString := docString ++ "- " ++ x ++ "\n"
| none => docString := docString
return docString
def header : String := "
Answer as if you an expert mathematician, physicist and software engineer.
Below I have listed the doc strings of definitions and theorems in a Lean 4 file.
Output a list of 1) spelling mistakes. 2) grammatical errors. 3) suggestions for improvement.
Note that text within `...` should be treated as code and not spellchecked. Doc strings
should be a complete sentence, or a noun phrase.
"
def main (args : List String) : IO UInt32 := do
initSearchPath (← findSysroot)
let firstArg := args.head?
match firstArg with
| some x => do
let mut imp : Import := Import.mk x.toName false
if x == "random" then
let mods : Name := `HepLean
let imps : Import := {module := mods}
let mFile ← findOLean imps.module
unless (← mFile.pathExists) do
throw <| IO.userError s!"object file '{mFile}' of module {imps.module} does not exist"
let (hepLeanMod, _) ← readModuleData mFile
let imports := hepLeanMod.imports
let y ← IO.rand 0 (imports.size -1)
imp := imports.get! y
let mFile ← findOLean imp.module
let filePath := (mkFilePath (imp.module.toString.split (· == '.'))).addExtension "lean"
IO.println s!"Checking: {filePath}"
let (modData, _) ← readModuleData mFile
let cons := modData.constants
let docString ← CoreM.withImportModules #[imp.module] (getDocString cons).run'
let message : LLM.Message := {role := Role.user, content := header ++ docString}
let request : GPT.Request := {messages := [message]}
let response ← LLM.GPT.chat (toString <| ToJson.toJson request)
let parsedRespone := GPT.parse response
match parsedRespone with
| .ok x => IO.println x
| .error e => IO.println e
| none => IO.println "No module provided."
return 0

View file

@ -16,6 +16,7 @@ This file concerns with statistics of HepLean.
-/
open Lean System Meta
structure FileStats where