osm parsing

This commit is contained in:
Araozu 2024-09-10 10:48:19 -05:00
commit d4a380c234
2 changed files with 180 additions and 0 deletions

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module osm_parser
go 1.23.0

177
main.go Normal file
View File

@ -0,0 +1,177 @@
package main
import (
"encoding/xml"
"errors"
"fmt"
"log"
"os"
"regexp"
)
type OsmBounds struct {
MinLat float64 `xml:"minlat,attr"`
MinLon float64 `xml:"minlon,attr"`
MaxLat float64 `xml:"maxlat,attr"`
MaxLon float64 `xml:"maxlon,attr"`
}
type OsmNode struct {
Id int64 `xml:"id,attr"`
Lat float64 `xml:"lat,attr"`
Lon float64 `xml:"lon,attr"`
Version int64 `xml:"version,attr"`
Timestamp string `xml:"timestamp,attr"`
Changeset int64 `xml:"changeset,attr"`
Uid int64 `xml:"uid,attr"`
User string `xml:"user,attr"`
}
type OsmMember struct {
Type string `xml:"type,attr"`
Ref int64 `xml:"ref,attr"`
Role string `xml:"role,attr"`
}
type OsmTag struct {
K string `xml:"k,attr"`
V string `xml:"v,attr"`
}
type OsmRelation struct {
Id int64 `xml:"id,attr"`
Version int32 `xml:"version,attr"`
Timestamp string `xml:"timestamp,attr"`
Changeset int64 `xml:"changeset,attr"`
Uid int64 `xml:"uid,attr"`
User string `xml:"user,attr"`
Members []OsmMember `xml:"member"`
Tags []OsmTag `xml:"tag"`
}
type OsmDocument struct {
Note string `xml:"note"`
Bounds OsmBounds `xml:"bounds"`
Nodes []OsmNode `xml:"node"`
Relations []OsmRelation `xml:"relation"`
}
type CombiData struct {
/// Name of the operator of the route.
/// E.g.: "C7 - AqpMasivo"
Company string
/// Name of the route
/// E.g.: "Combi B: Villa Santa Rosa -> Terminal Terrestre"
Name string
/// Contains a single, identifiable name for the route.
/// This is the Name field, but without any extra info
/// E.g.: "B"
ParsedName string
}
func main() {
log.Println("Begin processing")
xmlData, err := os.ReadFile("aqp_map.xml")
if err != nil {
panic(err)
}
var osmDocument OsmDocument
err = xml.Unmarshal(xmlData, &osmDocument)
if err != nil {
panic(err)
}
log.Println("XML unmarshal finished")
// Get the relation with id 17642638,
// that relation hosts the SIT data
var sitId int64 = 17642638
var sitRelation *OsmRelation
for _, relation := range osmDocument.Relations {
if relation.Id == sitId {
sitRelation = &relation
}
}
if sitRelation == nil {
panic("SIT relation with id 17642638 not found!")
}
// (naively) get all the submembers
sitMembers := make([]OsmRelation, 0)
for _, member := range sitRelation.Members {
// search the member
// TODO: determine if this is a performance bottleneck,
// and requires optimization (insertion sort & binary search)
for _, relation := range osmDocument.Relations {
if relation.Id == member.Ref {
sitMembers = append(sitMembers, relation)
}
}
}
// transform sitMembers into CombiData
combis := make([]CombiData, 0)
for _, member := range sitMembers {
var operatorTag *OsmTag
var nameTag *OsmTag
for _, tag := range member.Tags {
if tag.K == "operator" {
operatorTag = &tag
continue
}
if tag.K == "name" {
nameTag = &tag
continue
}
}
if operatorTag == nil {
log.Fatalf("Found a SIT member without an operator tag, with id %d\n", member.Id)
}
if nameTag == nil {
log.Fatalf("Found a SIT member without a name tag, with id %d\n", member.Id)
}
parsedName, err := parseRoute(nameTag.V)
if err != nil {
log.Printf("SIT member id: %s\n", member.Id)
panic(err)
}
combis = append(combis, CombiData{
Company: operatorTag.V,
Name: nameTag.V,
ParsedName: parsedName,
})
}
for _, combi := range combis {
log.Printf("%s: `%s`\n", combi.Company, combi.ParsedName)
}
}
func parseRoute(routeName string) (string, error) {
// There are 3 types of route name formats:
// Combi xx : ____
// Curster xx : ____
// xx : ___
// attempt first format
match := regexp.MustCompile("Combi (.+) :.+").FindStringSubmatch(routeName)
if match != nil {
return match[1], nil
}
match = regexp.MustCompile("Custer (.+) :.+").FindStringSubmatch(routeName)
if match != nil {
return match[1], nil
}
match = regexp.MustCompile("(.+) :.+").FindStringSubmatch(routeName)
if match != nil {
return match[1], nil
}
// Return an error
return "", errors.New(fmt.Sprintf("Route %s did not match any pattern", routeName))
}